From fd25e883e2807a151f673b87c152a59701a0df80 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brecht@blender.org>
Date: Sun, 24 Oct 2021 14:19:19 +0200
Subject: Cycles: remove prefix from source code file names

Remove prefix of filenames that is the same as the folder name. This used
to help when #includes were using individual files, but now they are always
relative to the cycles root directory and so the prefixes are redundant.

For patches and branches, git merge and rebase should be able to detect the
renames and move over code to the right file.
---
 intern/cycles/app/cycles_server.cpp                |   14 +-
 intern/cycles/app/cycles_standalone.cpp            |   26 +-
 intern/cycles/app/cycles_xml.cpp                   |   16 +-
 intern/cycles/app/oiio_output_driver.h             |   10 +-
 intern/cycles/blender/CMakeLists.txt               |   62 +-
 intern/cycles/blender/blender_camera.cpp           |  965 -----------
 intern/cycles/blender/blender_curves.cpp           |  915 -----------
 intern/cycles/blender/blender_device.cpp           |  120 --
 intern/cycles/blender/blender_device.h             |   39 -
 intern/cycles/blender/blender_display_driver.cpp   |  771 ---------
 intern/cycles/blender/blender_display_driver.h     |  213 ---
 intern/cycles/blender/blender_geometry.cpp         |  241 ---
 intern/cycles/blender/blender_id_map.h             |  295 ----
 intern/cycles/blender/blender_image.cpp            |  220 ---
 intern/cycles/blender/blender_image.h              |   61 -
 intern/cycles/blender/blender_light.cpp            |  205 ---
 intern/cycles/blender/blender_logging.cpp          |   33 -
 intern/cycles/blender/blender_mesh.cpp             | 1302 ---------------
 intern/cycles/blender/blender_object.cpp           |  769 ---------
 intern/cycles/blender/blender_object_cull.cpp      |  142 --
 intern/cycles/blender/blender_object_cull.h        |   48 -
 intern/cycles/blender/blender_output_driver.cpp    |  126 --
 intern/cycles/blender/blender_output_driver.h      |   40 -
 intern/cycles/blender/blender_particles.cpp        |   94 --
 intern/cycles/blender/blender_python.cpp           | 1063 ------------
 intern/cycles/blender/blender_session.cpp          | 1003 ------------
 intern/cycles/blender/blender_session.h            |  164 --
 intern/cycles/blender/blender_shader.cpp           | 1589 ------------------
 intern/cycles/blender/blender_sync.cpp             |  949 -----------
 intern/cycles/blender/blender_sync.h               |  276 ----
 intern/cycles/blender/blender_texture.cpp          |   57 -
 intern/cycles/blender/blender_texture.h            |   32 -
 intern/cycles/blender/blender_util.h               |  720 --------
 intern/cycles/blender/blender_viewport.cpp         |  106 --
 intern/cycles/blender/blender_viewport.h           |   63 -
 intern/cycles/blender/blender_volume.cpp           |  322 ----
 intern/cycles/blender/camera.cpp                   |  965 +++++++++++
 intern/cycles/blender/curves.cpp                   |  915 +++++++++++
 intern/cycles/blender/device.cpp                   |  120 ++
 intern/cycles/blender/device.h                     |   39 +
 intern/cycles/blender/display_driver.cpp           |  771 +++++++++
 intern/cycles/blender/display_driver.h             |  213 +++
 intern/cycles/blender/geometry.cpp                 |  241 +++
 intern/cycles/blender/id_map.h                     |  295 ++++
 intern/cycles/blender/image.cpp                    |  220 +++
 intern/cycles/blender/image.h                      |   61 +
 intern/cycles/blender/light.cpp                    |  205 +++
 intern/cycles/blender/logging.cpp                  |   33 +
 intern/cycles/blender/mesh.cpp                     | 1302 +++++++++++++++
 intern/cycles/blender/object.cpp                   |  769 +++++++++
 intern/cycles/blender/object_cull.cpp              |  142 ++
 intern/cycles/blender/object_cull.h                |   48 +
 intern/cycles/blender/output_driver.cpp            |  126 ++
 intern/cycles/blender/output_driver.h              |   40 +
 intern/cycles/blender/particles.cpp                |   94 ++
 intern/cycles/blender/python.cpp                   | 1063 ++++++++++++
 intern/cycles/blender/session.cpp                  | 1003 ++++++++++++
 intern/cycles/blender/session.h                    |  166 ++
 intern/cycles/blender/shader.cpp                   | 1589 ++++++++++++++++++
 intern/cycles/blender/sync.cpp                     |  949 +++++++++++
 intern/cycles/blender/sync.h                       |  276 ++++
 intern/cycles/blender/texture.cpp                  |   57 +
 intern/cycles/blender/texture.h                    |   32 +
 intern/cycles/blender/util.h                       |  720 ++++++++
 intern/cycles/blender/viewport.cpp                 |  107 ++
 intern/cycles/blender/viewport.h                   |   63 +
 intern/cycles/blender/volume.cpp                   |  322 ++++
 intern/cycles/bvh/CMakeLists.txt                   |   38 +-
 intern/cycles/bvh/binning.cpp                      |  293 ++++
 intern/cycles/bvh/binning.h                        |  115 ++
 intern/cycles/bvh/build.cpp                        | 1144 +++++++++++++
 intern/cycles/bvh/build.h                          |  142 ++
 intern/cycles/bvh/bvh.cpp                          |   10 +-
 intern/cycles/bvh/bvh.h                            |    8 +-
 intern/cycles/bvh/bvh2.cpp                         |   10 +-
 intern/cycles/bvh/bvh2.h                           |    6 +-
 intern/cycles/bvh/bvh_binning.cpp                  |  293 ----
 intern/cycles/bvh/bvh_binning.h                    |  115 --
 intern/cycles/bvh/bvh_build.cpp                    | 1144 -------------
 intern/cycles/bvh/bvh_build.h                      |  142 --
 intern/cycles/bvh/bvh_embree.cpp                   |  728 ---------
 intern/cycles/bvh/bvh_embree.h                     |   68 -
 intern/cycles/bvh/bvh_multi.cpp                    |   37 -
 intern/cycles/bvh/bvh_multi.h                      |   39 -
 intern/cycles/bvh/bvh_node.cpp                     |  224 ---
 intern/cycles/bvh/bvh_node.h                       |  255 ---
 intern/cycles/bvh/bvh_optix.cpp                    |   47 -
 intern/cycles/bvh/bvh_optix.h                      |   49 -
 intern/cycles/bvh/bvh_params.h                     |  335 ----
 intern/cycles/bvh/bvh_sort.cpp                     |  187 ---
 intern/cycles/bvh/bvh_sort.h                       |   38 -
 intern/cycles/bvh/bvh_split.cpp                    |  518 ------
 intern/cycles/bvh/bvh_split.h                      |  240 ---
 intern/cycles/bvh/bvh_unaligned.cpp                |  165 --
 intern/cycles/bvh/bvh_unaligned.h                  |   73 -
 intern/cycles/bvh/embree.cpp                       |  728 +++++++++
 intern/cycles/bvh/embree.h                         |   68 +
 intern/cycles/bvh/multi.cpp                        |   37 +
 intern/cycles/bvh/multi.h                          |   39 +
 intern/cycles/bvh/node.cpp                         |  224 +++
 intern/cycles/bvh/node.h                           |  255 +++
 intern/cycles/bvh/optix.cpp                        |   47 +
 intern/cycles/bvh/optix.h                          |   50 +
 intern/cycles/bvh/params.h                         |  335 ++++
 intern/cycles/bvh/sort.cpp                         |  187 +++
 intern/cycles/bvh/sort.h                           |   38 +
 intern/cycles/bvh/split.cpp                        |  518 ++++++
 intern/cycles/bvh/split.h                          |  240 +++
 intern/cycles/bvh/unaligned.cpp                    |  165 ++
 intern/cycles/bvh/unaligned.h                      |   73 +
 intern/cycles/device/CMakeLists.txt                |   20 +-
 intern/cycles/device/cpu/device.cpp                |    2 +-
 intern/cycles/device/cpu/device.h                  |    4 +-
 intern/cycles/device/cpu/device_impl.cpp           |   32 +-
 intern/cycles/device/cpu/device_impl.h             |    8 +-
 intern/cycles/device/cpu/kernel.h                  |    2 +-
 intern/cycles/device/cpu/kernel_function.h         |    4 +-
 intern/cycles/device/cpu/kernel_thread_globals.cpp |    6 +-
 intern/cycles/device/cuda/device.cpp               |    6 +-
 intern/cycles/device/cuda/device.h                 |    4 +-
 intern/cycles/device/cuda/device_impl.cpp          |   22 +-
 intern/cycles/device/cuda/device_impl.h            |    2 +-
 intern/cycles/device/cuda/graphics_interop.h       |    2 +-
 intern/cycles/device/cuda/kernel.h                 |    2 +-
 intern/cycles/device/cuda/queue.h                  |    6 +-
 intern/cycles/device/denoise.cpp                   |   88 +
 intern/cycles/device/denoise.h                     |  110 ++
 intern/cycles/device/device.cpp                    |   20 +-
 intern/cycles/device/device.h                      |   30 +-
 intern/cycles/device/device_denoise.cpp            |   88 -
 intern/cycles/device/device_denoise.h              |  110 --
 intern/cycles/device/device_graphics_interop.cpp   |   21 -
 intern/cycles/device/device_graphics_interop.h     |   42 -
 intern/cycles/device/device_kernel.cpp             |  165 --
 intern/cycles/device/device_kernel.h               |   33 -
 intern/cycles/device/device_memory.cpp             |  285 ----
 intern/cycles/device/device_memory.h               |  650 --------
 intern/cycles/device/device_queue.cpp              |   95 --
 intern/cycles/device/device_queue.h                |  115 --
 intern/cycles/device/dummy/device.cpp              |    2 +-
 intern/cycles/device/dummy/device.h                |    4 +-
 intern/cycles/device/graphics_interop.cpp          |   21 +
 intern/cycles/device/graphics_interop.h            |   42 +
 intern/cycles/device/hip/device.cpp                |    6 +-
 intern/cycles/device/hip/device.h                  |    4 +-
 intern/cycles/device/hip/device_impl.cpp           |   24 +-
 intern/cycles/device/hip/device_impl.h             |    4 +-
 intern/cycles/device/hip/graphics_interop.h        |    2 +-
 intern/cycles/device/hip/kernel.h                  |    2 +-
 intern/cycles/device/hip/queue.h                   |    6 +-
 intern/cycles/device/kernel.cpp                    |  165 ++
 intern/cycles/device/kernel.h                      |   33 +
 intern/cycles/device/memory.cpp                    |  285 ++++
 intern/cycles/device/memory.h                      |  650 ++++++++
 intern/cycles/device/multi/device.cpp              |   14 +-
 intern/cycles/device/multi/device.h                |    4 +-
 intern/cycles/device/optix/device.cpp              |    3 +-
 intern/cycles/device/optix/device.h                |    4 +-
 intern/cycles/device/optix/device_impl.cpp         |   16 +-
 intern/cycles/device/optix/device_impl.h           |    2 +-
 intern/cycles/device/optix/queue.cpp               |    2 +-
 intern/cycles/device/queue.cpp                     |   95 ++
 intern/cycles/device/queue.h                       |  115 ++
 intern/cycles/graph/node.cpp                       |    8 +-
 intern/cycles/graph/node.h                         |    6 +-
 intern/cycles/graph/node_enum.h                    |    4 +-
 intern/cycles/graph/node_type.cpp                  |    4 +-
 intern/cycles/graph/node_type.h                    |   10 +-
 intern/cycles/graph/node_xml.cpp                   |    6 +-
 intern/cycles/graph/node_xml.h                     |    6 +-
 intern/cycles/integrator/adaptive_sampling.cpp     |    2 +-
 intern/cycles/integrator/denoiser.cpp              |    4 +-
 intern/cycles/integrator/denoiser.h                |    6 +-
 intern/cycles/integrator/denoiser_device.cpp       |   10 +-
 intern/cycles/integrator/denoiser_device.h         |    2 +-
 intern/cycles/integrator/denoiser_oidn.cpp         |    8 +-
 intern/cycles/integrator/denoiser_oidn.h           |    4 +-
 intern/cycles/integrator/denoiser_optix.cpp        |    2 +-
 intern/cycles/integrator/pass_accessor.cpp         |    4 +-
 intern/cycles/integrator/pass_accessor.h           |    6 +-
 intern/cycles/integrator/pass_accessor_cpu.cpp     |    8 +-
 intern/cycles/integrator/pass_accessor_gpu.cpp     |    4 +-
 intern/cycles/integrator/pass_accessor_gpu.h       |    2 +-
 intern/cycles/integrator/path_trace.cpp            |   10 +-
 intern/cycles/integrator/path_trace.h              |    8 +-
 intern/cycles/integrator/path_trace_display.cpp    |    2 +-
 intern/cycles/integrator/path_trace_display.h      |    8 +-
 intern/cycles/integrator/path_trace_work.cpp       |    2 +-
 intern/cycles/integrator/path_trace_work.h         |    4 +-
 intern/cycles/integrator/path_trace_work_cpu.cpp   |    8 +-
 intern/cycles/integrator/path_trace_work_cpu.h     |    6 +-
 intern/cycles/integrator/path_trace_work_gpu.cpp   |   18 +-
 intern/cycles/integrator/path_trace_work_gpu.h     |   10 +-
 intern/cycles/integrator/render_scheduler.cpp      |    6 +-
 intern/cycles/integrator/render_scheduler.h        |    2 +-
 intern/cycles/integrator/shader_eval.cpp           |    8 +-
 intern/cycles/integrator/shader_eval.h             |    6 +-
 intern/cycles/integrator/tile.cpp                  |    4 +-
 intern/cycles/integrator/tile.h                    |    2 +-
 intern/cycles/integrator/work_balancer.cpp         |    4 +-
 intern/cycles/integrator/work_balancer.h           |    2 +-
 intern/cycles/integrator/work_tile_scheduler.cpp   |    6 +-
 intern/cycles/integrator/work_tile_scheduler.h     |    2 +-
 intern/cycles/kernel/CMakeLists.txt                |  344 ++--
 intern/cycles/kernel/bake/bake.h                   |    4 +-
 intern/cycles/kernel/bvh/bvh.h                     |   38 +-
 intern/cycles/kernel/bvh/bvh_embree.h              |  156 --
 intern/cycles/kernel/bvh/bvh_local.h               |  211 ---
 intern/cycles/kernel/bvh/bvh_nodes.h               |  153 --
 intern/cycles/kernel/bvh/bvh_shadow_all.h          |  339 ----
 intern/cycles/kernel/bvh/bvh_traversal.h           |  241 ---
 intern/cycles/kernel/bvh/bvh_types.h               |   44 -
 intern/cycles/kernel/bvh/bvh_util.h                |  226 ---
 intern/cycles/kernel/bvh/bvh_volume.h              |  234 ---
 intern/cycles/kernel/bvh/bvh_volume_all.h          |  303 ----
 intern/cycles/kernel/bvh/embree.h                  |  156 ++
 intern/cycles/kernel/bvh/local.h                   |  211 +++
 intern/cycles/kernel/bvh/nodes.h                   |  153 ++
 intern/cycles/kernel/bvh/shadow_all.h              |  339 ++++
 intern/cycles/kernel/bvh/traversal.h               |  241 +++
 intern/cycles/kernel/bvh/types.h                   |   44 +
 intern/cycles/kernel/bvh/util.h                    |  226 +++
 intern/cycles/kernel/bvh/volume.h                  |  234 +++
 intern/cycles/kernel/bvh/volume_all.h              |  303 ++++
 intern/cycles/kernel/camera/camera.h               |    8 +-
 intern/cycles/kernel/camera/camera_projection.h    |  258 ---
 intern/cycles/kernel/camera/projection.h           |  258 +++
 .../cycles/kernel/closure/bsdf_ashikhmin_velvet.h  |    2 +-
 intern/cycles/kernel/closure/bsdf_diffuse.h        |    2 +-
 intern/cycles/kernel/closure/bsdf_diffuse_ramp.h   |    2 +-
 .../cycles/kernel/closure/bsdf_hair_principled.h   |    2 +-
 intern/cycles/kernel/closure/bsdf_microfacet.h     |    7 +-
 .../cycles/kernel/closure/bsdf_microfacet_multi.h  |    4 +-
 .../kernel/closure/bsdf_principled_diffuse.h       |    2 +-
 intern/cycles/kernel/device/cpu/compat.h           |   10 +-
 intern/cycles/kernel/device/cpu/globals.h          |    6 +-
 intern/cycles/kernel/device/cpu/kernel.cpp         |    2 +-
 intern/cycles/kernel/device/cpu/kernel.h           |    4 +-
 intern/cycles/kernel/device/cpu/kernel_arch_impl.h |   40 +-
 intern/cycles/kernel/device/cpu/kernel_avx.cpp     |    2 +-
 intern/cycles/kernel/device/cpu/kernel_avx2.cpp    |    2 +-
 intern/cycles/kernel/device/cpu/kernel_sse2.cpp    |    2 +-
 intern/cycles/kernel/device/cpu/kernel_sse3.cpp    |    2 +-
 intern/cycles/kernel/device/cpu/kernel_sse41.cpp   |    2 +-
 intern/cycles/kernel/device/cuda/compat.h          |    4 +-
 intern/cycles/kernel/device/cuda/globals.h         |    8 +-
 intern/cycles/kernel/device/gpu/kernel.h           |   34 +-
 .../kernel/device/gpu/parallel_active_index.h      |    2 +-
 .../cycles/kernel/device/gpu/parallel_prefix_sum.h |    2 +-
 .../kernel/device/gpu/parallel_sorted_index.h      |    2 +-
 intern/cycles/kernel/device/hip/compat.h           |    4 +-
 intern/cycles/kernel/device/hip/globals.h          |    8 +-
 intern/cycles/kernel/device/optix/compat.h         |    4 +-
 intern/cycles/kernel/device/optix/globals.h        |    8 +-
 intern/cycles/kernel/device/optix/kernel.cu        |   16 +-
 .../kernel/device/optix/kernel_shader_raytrace.cu  |    3 +-
 intern/cycles/kernel/film/accumulate.h             |  559 +++++++
 intern/cycles/kernel/film/adaptive_sampling.h      |  160 ++
 intern/cycles/kernel/film/film_accumulate.h        |  559 -------
 intern/cycles/kernel/film/film_adaptive_sampling.h |  160 --
 intern/cycles/kernel/film/film_id_passes.h         |  106 --
 intern/cycles/kernel/film/film_passes.h            |  342 ----
 intern/cycles/kernel/film/film_read.h              |  532 ------
 intern/cycles/kernel/film/film_write_passes.h      |   88 -
 intern/cycles/kernel/film/id_passes.h              |  106 ++
 intern/cycles/kernel/film/passes.h                 |  342 ++++
 intern/cycles/kernel/film/read.h                   |  532 ++++++
 intern/cycles/kernel/film/write_passes.h           |   88 +
 intern/cycles/kernel/geom/attribute.h              |  116 ++
 intern/cycles/kernel/geom/curve.h                  |  328 ++++
 intern/cycles/kernel/geom/curve_intersect.h        |  771 +++++++++
 intern/cycles/kernel/geom/geom.h                   |   30 +-
 intern/cycles/kernel/geom/geom_attribute.h         |  116 --
 intern/cycles/kernel/geom/geom_curve.h             |  328 ----
 intern/cycles/kernel/geom/geom_curve_intersect.h   |  771 ---------
 intern/cycles/kernel/geom/geom_motion_curve.h      |  155 --
 intern/cycles/kernel/geom/geom_motion_triangle.h   |  155 --
 .../kernel/geom/geom_motion_triangle_intersect.h   |  303 ----
 .../kernel/geom/geom_motion_triangle_shader.h      |  117 --
 intern/cycles/kernel/geom/geom_object.h            |  600 -------
 intern/cycles/kernel/geom/geom_patch.h             |  470 ------
 intern/cycles/kernel/geom/geom_primitive.h         |  351 ----
 intern/cycles/kernel/geom/geom_shader_data.h       |  447 -----
 intern/cycles/kernel/geom/geom_subd_triangle.h     |  687 --------
 intern/cycles/kernel/geom/geom_triangle.h          |  370 -----
 .../cycles/kernel/geom/geom_triangle_intersect.h   |  312 ----
 intern/cycles/kernel/geom/geom_volume.h            |   97 --
 intern/cycles/kernel/geom/motion_curve.h           |  155 ++
 intern/cycles/kernel/geom/motion_triangle.h        |  155 ++
 .../cycles/kernel/geom/motion_triangle_intersect.h |  303 ++++
 intern/cycles/kernel/geom/motion_triangle_shader.h |  117 ++
 intern/cycles/kernel/geom/object.h                 |  600 +++++++
 intern/cycles/kernel/geom/patch.h                  |  470 ++++++
 intern/cycles/kernel/geom/primitive.h              |  351 ++++
 intern/cycles/kernel/geom/shader_data.h            |  447 +++++
 intern/cycles/kernel/geom/subd_triangle.h          |  687 ++++++++
 intern/cycles/kernel/geom/triangle.h               |  370 +++++
 intern/cycles/kernel/geom/triangle_intersect.h     |  312 ++++
 intern/cycles/kernel/geom/volume.h                 |   97 ++
 intern/cycles/kernel/integrator/init_from_bake.h   |  202 +++
 intern/cycles/kernel/integrator/init_from_camera.h |  124 ++
 .../kernel/integrator/integrator_init_from_bake.h  |  202 ---
 .../integrator/integrator_init_from_camera.h       |  124 --
 .../integrator/integrator_intersect_closest.h      |  244 ---
 .../integrator/integrator_intersect_shadow.h       |  190 ---
 .../integrator/integrator_intersect_subsurface.h   |   36 -
 .../integrator/integrator_intersect_volume_stack.h |  206 ---
 .../kernel/integrator/integrator_megakernel.h      |  113 --
 .../kernel/integrator/integrator_path_state.h      |  376 -----
 .../integrator/integrator_shade_background.h       |  219 ---
 .../kernel/integrator/integrator_shade_light.h     |  128 --
 .../kernel/integrator/integrator_shade_shadow.h    |  189 ---
 .../kernel/integrator/integrator_shade_surface.h   |  557 -------
 .../kernel/integrator/integrator_shade_volume.h    | 1049 ------------
 .../kernel/integrator/integrator_shader_eval.h     |  869 ----------
 .../kernel/integrator/integrator_shadow_catcher.h  |  120 --
 .../integrator/integrator_shadow_state_template.h  |   86 -
 intern/cycles/kernel/integrator/integrator_state.h |  195 ---
 .../kernel/integrator/integrator_state_flow.h      |  148 --
 .../kernel/integrator/integrator_state_template.h  |  109 --
 .../kernel/integrator/integrator_state_util.h      |  440 -----
 .../kernel/integrator/integrator_subsurface.h      |  201 ---
 .../kernel/integrator/integrator_subsurface_disk.h |  196 ---
 .../integrator/integrator_subsurface_random_walk.h |  469 ------
 .../kernel/integrator/integrator_volume_stack.h    |  225 ---
 .../cycles/kernel/integrator/intersect_closest.h   |  244 +++
 intern/cycles/kernel/integrator/intersect_shadow.h |  190 +++
 .../kernel/integrator/intersect_subsurface.h       |   36 +
 .../kernel/integrator/intersect_volume_stack.h     |  206 +++
 intern/cycles/kernel/integrator/megakernel.h       |  113 ++
 intern/cycles/kernel/integrator/path_state.h       |  376 +++++
 intern/cycles/kernel/integrator/shade_background.h |  219 +++
 intern/cycles/kernel/integrator/shade_light.h      |  128 ++
 intern/cycles/kernel/integrator/shade_shadow.h     |  189 +++
 intern/cycles/kernel/integrator/shade_surface.h    |  557 +++++++
 intern/cycles/kernel/integrator/shade_volume.h     | 1049 ++++++++++++
 intern/cycles/kernel/integrator/shader_eval.h      |  869 ++++++++++
 intern/cycles/kernel/integrator/shadow_catcher.h   |  120 ++
 .../kernel/integrator/shadow_state_template.h      |   86 +
 intern/cycles/kernel/integrator/state.h            |  195 +++
 intern/cycles/kernel/integrator/state_flow.h       |  148 ++
 intern/cycles/kernel/integrator/state_template.h   |  109 ++
 intern/cycles/kernel/integrator/state_util.h       |  440 +++++
 intern/cycles/kernel/integrator/subsurface.h       |  201 +++
 intern/cycles/kernel/integrator/subsurface_disk.h  |  196 +++
 .../kernel/integrator/subsurface_random_walk.h     |  469 ++++++
 intern/cycles/kernel/integrator/volume_stack.h     |  225 +++
 intern/cycles/kernel/kernel_textures.h             |   89 -
 intern/cycles/kernel/kernel_types.h                | 1608 ------------------
 intern/cycles/kernel/light/background.h            |  453 +++++
 intern/cycles/kernel/light/common.h                |  227 +++
 intern/cycles/kernel/light/light.h                 |    4 +-
 intern/cycles/kernel/light/light_background.h      |  453 -----
 intern/cycles/kernel/light/light_common.h          |  227 ---
 intern/cycles/kernel/light/light_sample.h          |  271 ---
 intern/cycles/kernel/light/sample.h                |  271 +++
 intern/cycles/kernel/osl/CMakeLists.txt            |   16 +-
 intern/cycles/kernel/osl/background.cpp            |    2 +-
 intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp     |    4 +-
 intern/cycles/kernel/osl/bsdf_phong_ramp.cpp       |    4 +-
 intern/cycles/kernel/osl/bssrdf.cpp                |  122 ++
 intern/cycles/kernel/osl/closures.cpp              | 1006 ++++++++++++
 intern/cycles/kernel/osl/closures.h                |  164 ++
 intern/cycles/kernel/osl/emissive.cpp              |    4 +-
 intern/cycles/kernel/osl/globals.h                 |  109 ++
 intern/cycles/kernel/osl/osl_bssrdf.cpp            |  122 --
 intern/cycles/kernel/osl/osl_closures.cpp          | 1006 ------------
 intern/cycles/kernel/osl/osl_closures.h            |  164 --
 intern/cycles/kernel/osl/osl_globals.h             |  109 --
 intern/cycles/kernel/osl/osl_services.cpp          | 1724 --------------------
 intern/cycles/kernel/osl/osl_services.h            |  330 ----
 intern/cycles/kernel/osl/osl_shader.cpp            |  428 -----
 intern/cycles/kernel/osl/osl_shader.h              |   82 -
 intern/cycles/kernel/osl/services.cpp              | 1724 ++++++++++++++++++++
 intern/cycles/kernel/osl/services.h                |  330 ++++
 intern/cycles/kernel/osl/shader.cpp                |  428 +++++
 intern/cycles/kernel/osl/shader.h                  |   82 +
 intern/cycles/kernel/sample/jitter.h               |  169 ++
 intern/cycles/kernel/sample/lcg.h                  |   51 +
 intern/cycles/kernel/sample/mapping.h              |  177 ++
 intern/cycles/kernel/sample/mis.h                  |   64 +
 intern/cycles/kernel/sample/pattern.h              |  185 +++
 intern/cycles/kernel/sample/sample_jitter.h        |  169 --
 intern/cycles/kernel/sample/sample_mapping.h       |  177 --
 intern/cycles/kernel/sample/sample_pattern.h       |  185 ---
 intern/cycles/kernel/svm/ao.h                      |  141 ++
 intern/cycles/kernel/svm/aov.h                     |   70 +
 intern/cycles/kernel/svm/attribute.h               |  346 ++++
 intern/cycles/kernel/svm/bevel.h                   |  327 ++++
 intern/cycles/kernel/svm/blackbody.h               |   55 +
 intern/cycles/kernel/svm/brick.h                   |  141 ++
 intern/cycles/kernel/svm/brightness.h              |   39 +
 intern/cycles/kernel/svm/bump.h                    |   61 +
 intern/cycles/kernel/svm/camera.h                  |   47 +
 intern/cycles/kernel/svm/checker.h                 |   61 +
 intern/cycles/kernel/svm/clamp.h                   |   49 +
 intern/cycles/kernel/svm/closure.h                 | 1260 ++++++++++++++
 intern/cycles/kernel/svm/color_util.h              |  323 ++++
 intern/cycles/kernel/svm/convert.h                 |   78 +
 intern/cycles/kernel/svm/displace.h                |  180 ++
 intern/cycles/kernel/svm/fractal_noise.h           |  139 ++
 intern/cycles/kernel/svm/fresnel.h                 |   84 +
 intern/cycles/kernel/svm/gamma.h                   |   36 +
 intern/cycles/kernel/svm/geometry.h                |  261 +++
 intern/cycles/kernel/svm/gradient.h                |   84 +
 intern/cycles/kernel/svm/hsv.h                     |   61 +
 intern/cycles/kernel/svm/ies.h                     |  122 ++
 intern/cycles/kernel/svm/image.h                   |  253 +++
 intern/cycles/kernel/svm/invert.h                  |   43 +
 intern/cycles/kernel/svm/light_path.h              |  148 ++
 intern/cycles/kernel/svm/magic.h                   |  115 ++
 intern/cycles/kernel/svm/map_range.h               |   91 ++
 intern/cycles/kernel/svm/mapping.h                 |   86 +
 intern/cycles/kernel/svm/mapping_util.h            |   41 +
 intern/cycles/kernel/svm/math.h                    |   77 +
 intern/cycles/kernel/svm/math_util.h               |  285 ++++
 intern/cycles/kernel/svm/mix.h                     |   43 +
 intern/cycles/kernel/svm/musgrave.h                |  854 ++++++++++
 intern/cycles/kernel/svm/noise.h                   |  744 +++++++++
 intern/cycles/kernel/svm/noisetex.h                |  222 +++
 intern/cycles/kernel/svm/normal.h                  |   47 +
 intern/cycles/kernel/svm/ramp.h                    |  165 ++
 intern/cycles/kernel/svm/ramp_util.h               |   87 +
 intern/cycles/kernel/svm/sepcomb_hsv.h             |   69 +
 intern/cycles/kernel/svm/sepcomb_vector.h          |   53 +
 intern/cycles/kernel/svm/sky.h                     |  335 ++++
 intern/cycles/kernel/svm/svm.h                     |  106 +-
 intern/cycles/kernel/svm/svm_ao.h                  |  139 --
 intern/cycles/kernel/svm/svm_aov.h                 |   68 -
 intern/cycles/kernel/svm/svm_attribute.h           |  344 ----
 intern/cycles/kernel/svm/svm_bevel.h               |  325 ----
 intern/cycles/kernel/svm/svm_blackbody.h           |   51 -
 intern/cycles/kernel/svm/svm_brick.h               |  139 --
 intern/cycles/kernel/svm/svm_brightness.h          |   35 -
 intern/cycles/kernel/svm/svm_bump.h                |   59 -
 intern/cycles/kernel/svm/svm_camera.h              |   45 -
 intern/cycles/kernel/svm/svm_checker.h             |   59 -
 intern/cycles/kernel/svm/svm_clamp.h               |   47 -
 intern/cycles/kernel/svm/svm_closure.h             | 1258 --------------
 intern/cycles/kernel/svm/svm_color_util.h          |  321 ----
 intern/cycles/kernel/svm/svm_convert.h             |   76 -
 intern/cycles/kernel/svm/svm_displace.h            |  178 --
 intern/cycles/kernel/svm/svm_fractal_noise.h       |  135 --
 intern/cycles/kernel/svm/svm_fresnel.h             |   82 -
 intern/cycles/kernel/svm/svm_gamma.h               |   34 -
 intern/cycles/kernel/svm/svm_geometry.h            |  259 ---
 intern/cycles/kernel/svm/svm_gradient.h            |   82 -
 intern/cycles/kernel/svm/svm_hsv.h                 |   64 -
 intern/cycles/kernel/svm/svm_ies.h                 |  120 --
 intern/cycles/kernel/svm/svm_image.h               |  251 ---
 intern/cycles/kernel/svm/svm_invert.h              |   41 -
 intern/cycles/kernel/svm/svm_light_path.h          |  146 --
 intern/cycles/kernel/svm/svm_magic.h               |  113 --
 intern/cycles/kernel/svm/svm_map_range.h           |   89 -
 intern/cycles/kernel/svm/svm_mapping.h             |   82 -
 intern/cycles/kernel/svm/svm_mapping_util.h        |   39 -
 intern/cycles/kernel/svm/svm_math.h                |   75 -
 intern/cycles/kernel/svm/svm_math_util.h           |  283 ----
 intern/cycles/kernel/svm/svm_mix.h                 |   41 -
 intern/cycles/kernel/svm/svm_musgrave.h            |  850 ----------
 intern/cycles/kernel/svm/svm_noise.h               |  742 ---------
 intern/cycles/kernel/svm/svm_noisetex.h            |  218 ---
 intern/cycles/kernel/svm/svm_normal.h              |   45 -
 intern/cycles/kernel/svm/svm_ramp.h                |  168 --
 intern/cycles/kernel/svm/svm_ramp_util.h           |   90 -
 intern/cycles/kernel/svm/svm_sepcomb_hsv.h         |   67 -
 intern/cycles/kernel/svm/svm_sepcomb_vector.h      |   51 -
 intern/cycles/kernel/svm/svm_sky.h                 |  333 ----
 intern/cycles/kernel/svm/svm_tex_coord.h           |  424 -----
 intern/cycles/kernel/svm/svm_types.h               |  604 -------
 intern/cycles/kernel/svm/svm_value.h               |   45 -
 intern/cycles/kernel/svm/svm_vector_rotate.h       |   83 -
 intern/cycles/kernel/svm/svm_vector_transform.h    |  107 --
 intern/cycles/kernel/svm/svm_vertex_color.h        |   80 -
 intern/cycles/kernel/svm/svm_voronoi.h             | 1162 -------------
 intern/cycles/kernel/svm/svm_voxel.h               |   53 -
 intern/cycles/kernel/svm/svm_wave.h                |  131 --
 intern/cycles/kernel/svm/svm_wavelength.h          |  101 --
 intern/cycles/kernel/svm/svm_white_noise.h         |   80 -
 intern/cycles/kernel/svm/svm_wireframe.h           |  125 --
 intern/cycles/kernel/svm/tex_coord.h               |  426 +++++
 intern/cycles/kernel/svm/types.h                   |  601 +++++++
 intern/cycles/kernel/svm/value.h                   |   47 +
 intern/cycles/kernel/svm/vector_rotate.h           |   85 +
 intern/cycles/kernel/svm/vector_transform.h        |  109 ++
 intern/cycles/kernel/svm/vertex_color.h            |   82 +
 intern/cycles/kernel/svm/voronoi.h                 | 1164 +++++++++++++
 intern/cycles/kernel/svm/voxel.h                   |   55 +
 intern/cycles/kernel/svm/wave.h                    |  133 ++
 intern/cycles/kernel/svm/wavelength.h              |  103 ++
 intern/cycles/kernel/svm/white_noise.h             |   82 +
 intern/cycles/kernel/svm/wireframe.h               |  127 ++
 intern/cycles/kernel/textures.h                    |   89 +
 intern/cycles/kernel/types.h                       | 1608 ++++++++++++++++++
 intern/cycles/kernel/util/color.h                  |   35 +
 intern/cycles/kernel/util/differential.h           |  166 ++
 intern/cycles/kernel/util/lookup_table.h           |   56 +
 intern/cycles/kernel/util/profiling.h              |   40 +
 intern/cycles/kernel/util/util_color.h             |   35 -
 intern/cycles/kernel/util/util_differential.h      |  166 --
 intern/cycles/kernel/util/util_lookup_table.h      |   56 -
 intern/cycles/kernel/util/util_profiling.h         |   40 -
 intern/cycles/scene/alembic.cpp                    |   10 +-
 intern/cycles/scene/alembic.h                      |    6 +-
 intern/cycles/scene/alembic_read.cpp               |    6 +-
 intern/cycles/scene/alembic_read.h                 |    2 +-
 intern/cycles/scene/attribute.cpp                  |    6 +-
 intern/cycles/scene/attribute.h                    |   12 +-
 intern/cycles/scene/background.cpp                 |    8 +-
 intern/cycles/scene/background.h                   |    2 +-
 intern/cycles/scene/bake.cpp                       |    2 +-
 intern/cycles/scene/bake.h                         |    4 +-
 intern/cycles/scene/camera.cpp                     |   14 +-
 intern/cycles/scene/camera.h                       |   12 +-
 intern/cycles/scene/colorspace.cpp                 |   14 +-
 intern/cycles/scene/colorspace.h                   |    4 +-
 intern/cycles/scene/constant_fold.cpp              |    4 +-
 intern/cycles/scene/constant_fold.h                |    4 +-
 intern/cycles/scene/curves.cpp                     |    8 +-
 intern/cycles/scene/curves.h                       |    4 +-
 intern/cycles/scene/film.cpp                       |   10 +-
 intern/cycles/scene/film.h                         |    6 +-
 intern/cycles/scene/geometry.cpp                   |   14 +-
 intern/cycles/scene/geometry.h                     |   12 +-
 intern/cycles/scene/hair.cpp                       |    2 +-
 intern/cycles/scene/image.cpp                      |   18 +-
 intern/cycles/scene/image.h                        |   12 +-
 intern/cycles/scene/image_oiio.cpp                 |    6 +-
 intern/cycles/scene/image_sky.cpp                  |    8 +-
 intern/cycles/scene/image_vdb.cpp                  |    4 +-
 intern/cycles/scene/integrator.cpp                 |   12 +-
 intern/cycles/scene/integrator.h                   |    4 +-
 intern/cycles/scene/jitter.h                       |    2 +-
 intern/cycles/scene/light.cpp                      |   12 +-
 intern/cycles/scene/light.h                        |   10 +-
 intern/cycles/scene/mesh.cpp                       |   14 +-
 intern/cycles/scene/mesh.h                         |   18 +-
 intern/cycles/scene/mesh_displace.cpp              |    8 +-
 intern/cycles/scene/mesh_subdivision.cpp           |   12 +-
 intern/cycles/scene/object.cpp                     |   20 +-
 intern/cycles/scene/object.h                       |   14 +-
 intern/cycles/scene/osl.cpp                        |   22 +-
 intern/cycles/scene/osl.h                          |    8 +-
 intern/cycles/scene/particles.cpp                  |   12 +-
 intern/cycles/scene/particles.h                    |    4 +-
 intern/cycles/scene/pass.cpp                       |    4 +-
 intern/cycles/scene/pass.h                         |    6 +-
 intern/cycles/scene/procedural.cpp                 |    7 +-
 intern/cycles/scene/scene.cpp                      |    8 +-
 intern/cycles/scene/scene.h                        |   20 +-
 intern/cycles/scene/shader.cpp                     |    8 +-
 intern/cycles/scene/shader.h                       |   14 +-
 intern/cycles/scene/shader_graph.cpp               |   10 +-
 intern/cycles/scene/shader_graph.h                 |   16 +-
 intern/cycles/scene/shader_nodes.cpp               |   18 +-
 intern/cycles/scene/shader_nodes.h                 |    4 +-
 intern/cycles/scene/sobol.cpp                      |    4 +-
 intern/cycles/scene/sobol.h                        |    2 +-
 intern/cycles/scene/stats.cpp                      |    6 +-
 intern/cycles/scene/stats.h                        |    6 +-
 intern/cycles/scene/svm.cpp                        |    8 +-
 intern/cycles/scene/svm.h                          |    8 +-
 intern/cycles/scene/tables.cpp                     |    4 +-
 intern/cycles/scene/tables.h                       |    4 +-
 intern/cycles/scene/volume.cpp                     |   12 +-
 intern/cycles/session/buffers.cpp                  |   10 +-
 intern/cycles/session/buffers.h                    |   12 +-
 intern/cycles/session/display_driver.h             |    4 +-
 intern/cycles/session/merge.cpp                    |   10 +-
 intern/cycles/session/merge.h                      |    4 +-
 intern/cycles/session/output_driver.h              |    6 +-
 intern/cycles/session/session.cpp                  |   12 +-
 intern/cycles/session/session.h                    |   10 +-
 intern/cycles/session/tile.cpp                     |   14 +-
 intern/cycles/session/tile.h                       |    6 +-
 intern/cycles/subd/CMakeLists.txt                  |   18 +-
 intern/cycles/subd/dice.cpp                        |  283 ++++
 intern/cycles/subd/dice.h                          |  103 ++
 intern/cycles/subd/patch.cpp                       |  121 ++
 intern/cycles/subd/patch.h                         |   63 +
 intern/cycles/subd/patch_table.cpp                 |  295 ++++
 intern/cycles/subd/patch_table.h                   |   64 +
 intern/cycles/subd/split.cpp                       |  748 +++++++++
 intern/cycles/subd/split.h                         |   75 +
 intern/cycles/subd/subd_dice.cpp                   |  283 ----
 intern/cycles/subd/subd_dice.h                     |  103 --
 intern/cycles/subd/subd_patch.cpp                  |  121 --
 intern/cycles/subd/subd_patch.h                    |   63 -
 intern/cycles/subd/subd_patch_table.cpp            |  295 ----
 intern/cycles/subd/subd_patch_table.h              |   64 -
 intern/cycles/subd/subd_split.cpp                  |  748 ---------
 intern/cycles/subd/subd_split.h                    |   75 -
 intern/cycles/subd/subd_subpatch.h                 |  219 ---
 intern/cycles/subd/subpatch.h                      |  219 +++
 intern/cycles/test/CMakeLists.txt                  |    6 -
 .../test/integrator_adaptive_sampling_test.cpp     |    2 +-
 intern/cycles/test/integrator_tile_test.cpp        |    2 +-
 intern/cycles/test/render_graph_finalize_test.cpp  |   10 +-
 intern/cycles/test/util_aligned_malloc_test.cpp    |    2 +-
 intern/cycles/test/util_avxf_test.h                |    4 +-
 intern/cycles/test/util_math_test.cpp              |    2 +-
 intern/cycles/test/util_path_test.cpp              |    2 +-
 intern/cycles/test/util_string_test.cpp            |    2 +-
 intern/cycles/test/util_task_test.cpp              |    2 +-
 intern/cycles/test/util_time_test.cpp              |    2 +-
 intern/cycles/test/util_transform_test.cpp         |    4 +-
 intern/cycles/util/CMakeLists.txt                  |  240 +--
 intern/cycles/util/algorithm.h                     |   33 +
 intern/cycles/util/aligned_malloc.cpp              |   76 +
 intern/cycles/util/aligned_malloc.h                |   50 +
 intern/cycles/util/args.h                          |   31 +
 intern/cycles/util/array.h                         |  318 ++++
 intern/cycles/util/atomic.h                        |   68 +
 intern/cycles/util/avxb.h                          |  243 +++
 intern/cycles/util/avxf.h                          |  392 +++++
 intern/cycles/util/avxi.h                          |  745 +++++++++
 intern/cycles/util/boundbox.h                      |  282 ++++
 intern/cycles/util/color.h                         |  296 ++++
 intern/cycles/util/debug.cpp                       |  124 ++
 intern/cycles/util/debug.h                         |  167 ++
 intern/cycles/util/defines.h                       |  146 ++
 intern/cycles/util/deque.h                         |   28 +
 intern/cycles/util/disjoint_set.h                  |   75 +
 intern/cycles/util/foreach.h                       |   24 +
 intern/cycles/util/function.h                      |   39 +
 intern/cycles/util/guarded_allocator.cpp           |   48 +
 intern/cycles/util/guarded_allocator.h             |  185 +++
 intern/cycles/util/half.h                          |  169 ++
 intern/cycles/util/hash.h                          |  389 +++++
 intern/cycles/util/ies.cpp                         |  411 +++++
 intern/cycles/util/ies.h                           |   59 +
 intern/cycles/util/image.h                         |   98 ++
 intern/cycles/util/image_impl.h                    |  175 ++
 intern/cycles/util/list.h                          |   28 +
 intern/cycles/util/log.cpp                         |   96 ++
 intern/cycles/util/log.h                           |  101 ++
 intern/cycles/util/map.h                           |   39 +
 intern/cycles/util/math.h                          |  870 ++++++++++
 intern/cycles/util/math_cdf.cpp                    |   70 +
 intern/cycles/util/math_cdf.h                      |   75 +
 intern/cycles/util/math_fast.h                     |  652 ++++++++
 intern/cycles/util/math_float2.h                   |  269 +++
 intern/cycles/util/math_float3.h                   |  530 ++++++
 intern/cycles/util/math_float4.h                   |  536 ++++++
 intern/cycles/util/math_int2.h                     |   73 +
 intern/cycles/util/math_int3.h                     |  110 ++
 intern/cycles/util/math_int4.h                     |  156 ++
 intern/cycles/util/math_intersect.h                |  249 +++
 intern/cycles/util/math_matrix.h                   |  454 ++++++
 intern/cycles/util/md5.cpp                         |  387 +++++
 intern/cycles/util/md5.h                           |   61 +
 intern/cycles/util/murmurhash.cpp                  |  126 ++
 intern/cycles/util/murmurhash.h                    |   29 +
 intern/cycles/util/opengl.h                        |   25 +
 intern/cycles/util/openimagedenoise.h              |   44 +
 intern/cycles/util/openvdb.h                       |   68 +
 intern/cycles/util/optimization.h                  |   77 +
 intern/cycles/util/param.h                         |   40 +
 intern/cycles/util/path.cpp                        |  781 +++++++++
 intern/cycles/util/path.h                          |   74 +
 intern/cycles/util/profiling.cpp                   |  174 ++
 intern/cycles/util/profiling.h                     |  180 ++
 intern/cycles/util/progress.h                      |  370 +++++
 intern/cycles/util/projection.h                    |  217 +++
 intern/cycles/util/queue.h                         |   28 +
 intern/cycles/util/rect.h                          |   75 +
 intern/cycles/util/semaphore.h                     |   61 +
 intern/cycles/util/set.h                           |   34 +
 intern/cycles/util/simd.cpp                        |   44 +
 intern/cycles/util/simd.h                          |  572 +++++++
 intern/cycles/util/sseb.h                          |  358 ++++
 intern/cycles/util/ssef.h                          | 1104 +++++++++++++
 intern/cycles/util/ssei.h                          |  646 ++++++++
 intern/cycles/util/stack_allocator.h               |  165 ++
 intern/cycles/util/static_assert.h                 |   36 +
 intern/cycles/util/stats.h                         |   54 +
 intern/cycles/util/string.cpp                      |  268 +++
 intern/cycles/util/string.h                        |   81 +
 intern/cycles/util/system.cpp                      |  415 +++++
 intern/cycles/util/system.h                        |   73 +
 intern/cycles/util/task.cpp                        |  251 +++
 intern/cycles/util/task.h                          |  148 ++
 intern/cycles/util/tbb.h                           |   55 +
 intern/cycles/util/texture.h                       |   99 ++
 intern/cycles/util/thread.cpp                      |   72 +
 intern/cycles/util/thread.h                        |   90 +
 intern/cycles/util/time.cpp                        |  139 ++
 intern/cycles/util/time.h                          |   91 ++
 intern/cycles/util/transform.cpp                   |  345 ++++
 intern/cycles/util/transform.h                     |  512 ++++++
 intern/cycles/util/types.h                         |  138 ++
 intern/cycles/util/types_float2.h                  |   40 +
 intern/cycles/util/types_float2_impl.h             |   59 +
 intern/cycles/util/types_float3.h                  |   60 +
 intern/cycles/util/types_float3_impl.h             |  103 ++
 intern/cycles/util/types_float4.h                  |   63 +
 intern/cycles/util/types_float4_impl.h             |  109 ++
 intern/cycles/util/types_float8.h                  |   74 +
 intern/cycles/util/types_float8_impl.h             |  112 ++
 intern/cycles/util/types_int2.h                    |   39 +
 intern/cycles/util/types_int2_impl.h               |   50 +
 intern/cycles/util/types_int3.h                    |   60 +
 intern/cycles/util/types_int3_impl.h               |  104 ++
 intern/cycles/util/types_int4.h                    |   66 +
 intern/cycles/util/types_int4_impl.h               |  123 ++
 intern/cycles/util/types_uchar2.h                  |   39 +
 intern/cycles/util/types_uchar2_impl.h             |   50 +
 intern/cycles/util/types_uchar3.h                  |   39 +
 intern/cycles/util/types_uchar3_impl.h             |   50 +
 intern/cycles/util/types_uchar4.h                  |   39 +
 intern/cycles/util/types_uchar4_impl.h             |   50 +
 intern/cycles/util/types_uint2.h                   |   39 +
 intern/cycles/util/types_uint2_impl.h              |   48 +
 intern/cycles/util/types_uint3.h                   |   39 +
 intern/cycles/util/types_uint3_impl.h              |   48 +
 intern/cycles/util/types_uint4.h                   |   39 +
 intern/cycles/util/types_uint4_impl.h              |   48 +
 intern/cycles/util/types_ushort4.h                 |   36 +
 intern/cycles/util/types_vector3.h                 |   39 +
 intern/cycles/util/types_vector3_impl.h            |   43 +
 intern/cycles/util/unique_ptr.h                    |   29 +
 intern/cycles/util/util_algorithm.h                |   33 -
 intern/cycles/util/util_aligned_malloc.cpp         |   76 -
 intern/cycles/util/util_aligned_malloc.h           |   50 -
 intern/cycles/util/util_args.h                     |   31 -
 intern/cycles/util/util_array.h                    |  318 ----
 intern/cycles/util/util_atomic.h                   |   68 -
 intern/cycles/util/util_avxb.h                     |  243 ---
 intern/cycles/util/util_avxf.h                     |  392 -----
 intern/cycles/util/util_avxi.h                     |  745 ---------
 intern/cycles/util/util_boundbox.h                 |  282 ----
 intern/cycles/util/util_color.h                    |  296 ----
 intern/cycles/util/util_debug.cpp                  |  124 --
 intern/cycles/util/util_debug.h                    |  167 --
 intern/cycles/util/util_defines.h                  |  146 --
 intern/cycles/util/util_deque.h                    |   28 -
 intern/cycles/util/util_disjoint_set.h             |   75 -
 intern/cycles/util/util_foreach.h                  |   24 -
 intern/cycles/util/util_function.h                 |   39 -
 intern/cycles/util/util_guarded_allocator.cpp      |   48 -
 intern/cycles/util/util_guarded_allocator.h        |  185 ---
 intern/cycles/util/util_half.h                     |  169 --
 intern/cycles/util/util_hash.h                     |  389 -----
 intern/cycles/util/util_ies.cpp                    |  411 -----
 intern/cycles/util/util_ies.h                      |   59 -
 intern/cycles/util/util_image.h                    |   98 --
 intern/cycles/util/util_image_impl.h               |  175 --
 intern/cycles/util/util_list.h                     |   28 -
 intern/cycles/util/util_logging.cpp                |   96 --
 intern/cycles/util/util_logging.h                  |  101 --
 intern/cycles/util/util_map.h                      |   39 -
 intern/cycles/util/util_math.h                     |  870 ----------
 intern/cycles/util/util_math_cdf.cpp               |   70 -
 intern/cycles/util/util_math_cdf.h                 |   75 -
 intern/cycles/util/util_math_fast.h                |  652 --------
 intern/cycles/util/util_math_float2.h              |  269 ---
 intern/cycles/util/util_math_float3.h              |  530 ------
 intern/cycles/util/util_math_float4.h              |  536 ------
 intern/cycles/util/util_math_int2.h                |   73 -
 intern/cycles/util/util_math_int3.h                |  110 --
 intern/cycles/util/util_math_int4.h                |  156 --
 intern/cycles/util/util_math_intersect.h           |  249 ---
 intern/cycles/util/util_math_matrix.h              |  454 ------
 intern/cycles/util/util_md5.cpp                    |  387 -----
 intern/cycles/util/util_md5.h                      |   61 -
 intern/cycles/util/util_murmurhash.cpp             |  126 --
 intern/cycles/util/util_murmurhash.h               |   29 -
 intern/cycles/util/util_opengl.h                   |   25 -
 intern/cycles/util/util_openimagedenoise.h         |   44 -
 intern/cycles/util/util_openvdb.h                  |   68 -
 intern/cycles/util/util_optimization.h             |   77 -
 intern/cycles/util/util_param.h                    |   40 -
 intern/cycles/util/util_path.cpp                   |  781 ---------
 intern/cycles/util/util_path.h                     |   74 -
 intern/cycles/util/util_profiling.cpp              |  174 --
 intern/cycles/util/util_profiling.h                |  180 --
 intern/cycles/util/util_progress.h                 |  370 -----
 intern/cycles/util/util_projection.h               |  217 ---
 intern/cycles/util/util_queue.h                    |   28 -
 intern/cycles/util/util_rect.h                     |   75 -
 intern/cycles/util/util_semaphore.h                |   61 -
 intern/cycles/util/util_set.h                      |   34 -
 intern/cycles/util/util_simd.cpp                   |   44 -
 intern/cycles/util/util_simd.h                     |  572 -------
 intern/cycles/util/util_sseb.h                     |  358 ----
 intern/cycles/util/util_ssef.h                     | 1104 -------------
 intern/cycles/util/util_ssei.h                     |  646 --------
 intern/cycles/util/util_stack_allocator.h          |  165 --
 intern/cycles/util/util_static_assert.h            |   36 -
 intern/cycles/util/util_stats.h                    |   54 -
 intern/cycles/util/util_string.cpp                 |  268 ---
 intern/cycles/util/util_string.h                   |   84 -
 intern/cycles/util/util_system.cpp                 |  415 -----
 intern/cycles/util/util_system.h                   |   73 -
 intern/cycles/util/util_task.cpp                   |  251 ---
 intern/cycles/util/util_task.h                     |  148 --
 intern/cycles/util/util_tbb.h                      |   55 -
 intern/cycles/util/util_texture.h                  |   99 --
 intern/cycles/util/util_thread.cpp                 |   72 -
 intern/cycles/util/util_thread.h                   |   90 -
 intern/cycles/util/util_time.cpp                   |  139 --
 intern/cycles/util/util_time.h                     |   91 --
 intern/cycles/util/util_transform.cpp              |  345 ----
 intern/cycles/util/util_transform.h                |  512 ------
 intern/cycles/util/util_types.h                    |  138 --
 intern/cycles/util/util_types_float2.h             |   40 -
 intern/cycles/util/util_types_float2_impl.h        |   59 -
 intern/cycles/util/util_types_float3.h             |   60 -
 intern/cycles/util/util_types_float3_impl.h        |  103 --
 intern/cycles/util/util_types_float4.h             |   63 -
 intern/cycles/util/util_types_float4_impl.h        |  109 --
 intern/cycles/util/util_types_float8.h             |   74 -
 intern/cycles/util/util_types_float8_impl.h        |  112 --
 intern/cycles/util/util_types_int2.h               |   39 -
 intern/cycles/util/util_types_int2_impl.h          |   50 -
 intern/cycles/util/util_types_int3.h               |   60 -
 intern/cycles/util/util_types_int3_impl.h          |  104 --
 intern/cycles/util/util_types_int4.h               |   66 -
 intern/cycles/util/util_types_int4_impl.h          |  123 --
 intern/cycles/util/util_types_uchar2.h             |   39 -
 intern/cycles/util/util_types_uchar2_impl.h        |   50 -
 intern/cycles/util/util_types_uchar3.h             |   39 -
 intern/cycles/util/util_types_uchar3_impl.h        |   50 -
 intern/cycles/util/util_types_uchar4.h             |   39 -
 intern/cycles/util/util_types_uchar4_impl.h        |   50 -
 intern/cycles/util/util_types_uint2.h              |   39 -
 intern/cycles/util/util_types_uint2_impl.h         |   48 -
 intern/cycles/util/util_types_uint3.h              |   39 -
 intern/cycles/util/util_types_uint3_impl.h         |   48 -
 intern/cycles/util/util_types_uint4.h              |   39 -
 intern/cycles/util/util_types_uint4_impl.h         |   48 -
 intern/cycles/util/util_types_ushort4.h            |   36 -
 intern/cycles/util/util_types_vector3.h            |   39 -
 intern/cycles/util/util_types_vector3_impl.h       |   43 -
 intern/cycles/util/util_unique_ptr.h               |   29 -
 intern/cycles/util/util_vector.h                   |   59 -
 intern/cycles/util/util_version.h                  |   35 -
 intern/cycles/util/util_view.cpp                   |  282 ----
 intern/cycles/util/util_view.h                     |   48 -
 intern/cycles/util/util_windows.cpp                |   54 -
 intern/cycles/util/util_windows.h                  |   42 -
 intern/cycles/util/util_xml.h                      |   41 -
 intern/cycles/util/vector.h                        |   59 +
 intern/cycles/util/version.h                       |   35 +
 intern/cycles/util/view.cpp                        |  282 ++++
 intern/cycles/util/view.h                          |   48 +
 intern/cycles/util/windows.cpp                     |   54 +
 intern/cycles/util/windows.h                       |   42 +
 intern/cycles/util/xml.h                           |   41 +
 849 files changed, 76506 insertions(+), 76304 deletions(-)
 delete mode 100644 intern/cycles/blender/blender_camera.cpp
 delete mode 100644 intern/cycles/blender/blender_curves.cpp
 delete mode 100644 intern/cycles/blender/blender_device.cpp
 delete mode 100644 intern/cycles/blender/blender_device.h
 delete mode 100644 intern/cycles/blender/blender_display_driver.cpp
 delete mode 100644 intern/cycles/blender/blender_display_driver.h
 delete mode 100644 intern/cycles/blender/blender_geometry.cpp
 delete mode 100644 intern/cycles/blender/blender_id_map.h
 delete mode 100644 intern/cycles/blender/blender_image.cpp
 delete mode 100644 intern/cycles/blender/blender_image.h
 delete mode 100644 intern/cycles/blender/blender_light.cpp
 delete mode 100644 intern/cycles/blender/blender_logging.cpp
 delete mode 100644 intern/cycles/blender/blender_mesh.cpp
 delete mode 100644 intern/cycles/blender/blender_object.cpp
 delete mode 100644 intern/cycles/blender/blender_object_cull.cpp
 delete mode 100644 intern/cycles/blender/blender_object_cull.h
 delete mode 100644 intern/cycles/blender/blender_output_driver.cpp
 delete mode 100644 intern/cycles/blender/blender_output_driver.h
 delete mode 100644 intern/cycles/blender/blender_particles.cpp
 delete mode 100644 intern/cycles/blender/blender_python.cpp
 delete mode 100644 intern/cycles/blender/blender_session.cpp
 delete mode 100644 intern/cycles/blender/blender_session.h
 delete mode 100644 intern/cycles/blender/blender_shader.cpp
 delete mode 100644 intern/cycles/blender/blender_sync.cpp
 delete mode 100644 intern/cycles/blender/blender_sync.h
 delete mode 100644 intern/cycles/blender/blender_texture.cpp
 delete mode 100644 intern/cycles/blender/blender_texture.h
 delete mode 100644 intern/cycles/blender/blender_util.h
 delete mode 100644 intern/cycles/blender/blender_viewport.cpp
 delete mode 100644 intern/cycles/blender/blender_viewport.h
 delete mode 100644 intern/cycles/blender/blender_volume.cpp
 create mode 100644 intern/cycles/blender/camera.cpp
 create mode 100644 intern/cycles/blender/curves.cpp
 create mode 100644 intern/cycles/blender/device.cpp
 create mode 100644 intern/cycles/blender/device.h
 create mode 100644 intern/cycles/blender/display_driver.cpp
 create mode 100644 intern/cycles/blender/display_driver.h
 create mode 100644 intern/cycles/blender/geometry.cpp
 create mode 100644 intern/cycles/blender/id_map.h
 create mode 100644 intern/cycles/blender/image.cpp
 create mode 100644 intern/cycles/blender/image.h
 create mode 100644 intern/cycles/blender/light.cpp
 create mode 100644 intern/cycles/blender/logging.cpp
 create mode 100644 intern/cycles/blender/mesh.cpp
 create mode 100644 intern/cycles/blender/object.cpp
 create mode 100644 intern/cycles/blender/object_cull.cpp
 create mode 100644 intern/cycles/blender/object_cull.h
 create mode 100644 intern/cycles/blender/output_driver.cpp
 create mode 100644 intern/cycles/blender/output_driver.h
 create mode 100644 intern/cycles/blender/particles.cpp
 create mode 100644 intern/cycles/blender/python.cpp
 create mode 100644 intern/cycles/blender/session.cpp
 create mode 100644 intern/cycles/blender/session.h
 create mode 100644 intern/cycles/blender/shader.cpp
 create mode 100644 intern/cycles/blender/sync.cpp
 create mode 100644 intern/cycles/blender/sync.h
 create mode 100644 intern/cycles/blender/texture.cpp
 create mode 100644 intern/cycles/blender/texture.h
 create mode 100644 intern/cycles/blender/util.h
 create mode 100644 intern/cycles/blender/viewport.cpp
 create mode 100644 intern/cycles/blender/viewport.h
 create mode 100644 intern/cycles/blender/volume.cpp
 create mode 100644 intern/cycles/bvh/binning.cpp
 create mode 100644 intern/cycles/bvh/binning.h
 create mode 100644 intern/cycles/bvh/build.cpp
 create mode 100644 intern/cycles/bvh/build.h
 delete mode 100644 intern/cycles/bvh/bvh_binning.cpp
 delete mode 100644 intern/cycles/bvh/bvh_binning.h
 delete mode 100644 intern/cycles/bvh/bvh_build.cpp
 delete mode 100644 intern/cycles/bvh/bvh_build.h
 delete mode 100644 intern/cycles/bvh/bvh_embree.cpp
 delete mode 100644 intern/cycles/bvh/bvh_embree.h
 delete mode 100644 intern/cycles/bvh/bvh_multi.cpp
 delete mode 100644 intern/cycles/bvh/bvh_multi.h
 delete mode 100644 intern/cycles/bvh/bvh_node.cpp
 delete mode 100644 intern/cycles/bvh/bvh_node.h
 delete mode 100644 intern/cycles/bvh/bvh_optix.cpp
 delete mode 100644 intern/cycles/bvh/bvh_optix.h
 delete mode 100644 intern/cycles/bvh/bvh_params.h
 delete mode 100644 intern/cycles/bvh/bvh_sort.cpp
 delete mode 100644 intern/cycles/bvh/bvh_sort.h
 delete mode 100644 intern/cycles/bvh/bvh_split.cpp
 delete mode 100644 intern/cycles/bvh/bvh_split.h
 delete mode 100644 intern/cycles/bvh/bvh_unaligned.cpp
 delete mode 100644 intern/cycles/bvh/bvh_unaligned.h
 create mode 100644 intern/cycles/bvh/embree.cpp
 create mode 100644 intern/cycles/bvh/embree.h
 create mode 100644 intern/cycles/bvh/multi.cpp
 create mode 100644 intern/cycles/bvh/multi.h
 create mode 100644 intern/cycles/bvh/node.cpp
 create mode 100644 intern/cycles/bvh/node.h
 create mode 100644 intern/cycles/bvh/optix.cpp
 create mode 100644 intern/cycles/bvh/optix.h
 create mode 100644 intern/cycles/bvh/params.h
 create mode 100644 intern/cycles/bvh/sort.cpp
 create mode 100644 intern/cycles/bvh/sort.h
 create mode 100644 intern/cycles/bvh/split.cpp
 create mode 100644 intern/cycles/bvh/split.h
 create mode 100644 intern/cycles/bvh/unaligned.cpp
 create mode 100644 intern/cycles/bvh/unaligned.h
 create mode 100644 intern/cycles/device/denoise.cpp
 create mode 100644 intern/cycles/device/denoise.h
 delete mode 100644 intern/cycles/device/device_denoise.cpp
 delete mode 100644 intern/cycles/device/device_denoise.h
 delete mode 100644 intern/cycles/device/device_graphics_interop.cpp
 delete mode 100644 intern/cycles/device/device_graphics_interop.h
 delete mode 100644 intern/cycles/device/device_kernel.cpp
 delete mode 100644 intern/cycles/device/device_kernel.h
 delete mode 100644 intern/cycles/device/device_memory.cpp
 delete mode 100644 intern/cycles/device/device_memory.h
 delete mode 100644 intern/cycles/device/device_queue.cpp
 delete mode 100644 intern/cycles/device/device_queue.h
 create mode 100644 intern/cycles/device/graphics_interop.cpp
 create mode 100644 intern/cycles/device/graphics_interop.h
 create mode 100644 intern/cycles/device/kernel.cpp
 create mode 100644 intern/cycles/device/kernel.h
 create mode 100644 intern/cycles/device/memory.cpp
 create mode 100644 intern/cycles/device/memory.h
 create mode 100644 intern/cycles/device/queue.cpp
 create mode 100644 intern/cycles/device/queue.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_embree.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_local.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_nodes.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_shadow_all.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_traversal.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_types.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_util.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_volume.h
 delete mode 100644 intern/cycles/kernel/bvh/bvh_volume_all.h
 create mode 100644 intern/cycles/kernel/bvh/embree.h
 create mode 100644 intern/cycles/kernel/bvh/local.h
 create mode 100644 intern/cycles/kernel/bvh/nodes.h
 create mode 100644 intern/cycles/kernel/bvh/shadow_all.h
 create mode 100644 intern/cycles/kernel/bvh/traversal.h
 create mode 100644 intern/cycles/kernel/bvh/types.h
 create mode 100644 intern/cycles/kernel/bvh/util.h
 create mode 100644 intern/cycles/kernel/bvh/volume.h
 create mode 100644 intern/cycles/kernel/bvh/volume_all.h
 delete mode 100644 intern/cycles/kernel/camera/camera_projection.h
 create mode 100644 intern/cycles/kernel/camera/projection.h
 create mode 100644 intern/cycles/kernel/film/accumulate.h
 create mode 100644 intern/cycles/kernel/film/adaptive_sampling.h
 delete mode 100644 intern/cycles/kernel/film/film_accumulate.h
 delete mode 100644 intern/cycles/kernel/film/film_adaptive_sampling.h
 delete mode 100644 intern/cycles/kernel/film/film_id_passes.h
 delete mode 100644 intern/cycles/kernel/film/film_passes.h
 delete mode 100644 intern/cycles/kernel/film/film_read.h
 delete mode 100644 intern/cycles/kernel/film/film_write_passes.h
 create mode 100644 intern/cycles/kernel/film/id_passes.h
 create mode 100644 intern/cycles/kernel/film/passes.h
 create mode 100644 intern/cycles/kernel/film/read.h
 create mode 100644 intern/cycles/kernel/film/write_passes.h
 create mode 100644 intern/cycles/kernel/geom/attribute.h
 create mode 100644 intern/cycles/kernel/geom/curve.h
 create mode 100644 intern/cycles/kernel/geom/curve_intersect.h
 delete mode 100644 intern/cycles/kernel/geom/geom_attribute.h
 delete mode 100644 intern/cycles/kernel/geom/geom_curve.h
 delete mode 100644 intern/cycles/kernel/geom/geom_curve_intersect.h
 delete mode 100644 intern/cycles/kernel/geom/geom_motion_curve.h
 delete mode 100644 intern/cycles/kernel/geom/geom_motion_triangle.h
 delete mode 100644 intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
 delete mode 100644 intern/cycles/kernel/geom/geom_motion_triangle_shader.h
 delete mode 100644 intern/cycles/kernel/geom/geom_object.h
 delete mode 100644 intern/cycles/kernel/geom/geom_patch.h
 delete mode 100644 intern/cycles/kernel/geom/geom_primitive.h
 delete mode 100644 intern/cycles/kernel/geom/geom_shader_data.h
 delete mode 100644 intern/cycles/kernel/geom/geom_subd_triangle.h
 delete mode 100644 intern/cycles/kernel/geom/geom_triangle.h
 delete mode 100644 intern/cycles/kernel/geom/geom_triangle_intersect.h
 delete mode 100644 intern/cycles/kernel/geom/geom_volume.h
 create mode 100644 intern/cycles/kernel/geom/motion_curve.h
 create mode 100644 intern/cycles/kernel/geom/motion_triangle.h
 create mode 100644 intern/cycles/kernel/geom/motion_triangle_intersect.h
 create mode 100644 intern/cycles/kernel/geom/motion_triangle_shader.h
 create mode 100644 intern/cycles/kernel/geom/object.h
 create mode 100644 intern/cycles/kernel/geom/patch.h
 create mode 100644 intern/cycles/kernel/geom/primitive.h
 create mode 100644 intern/cycles/kernel/geom/shader_data.h
 create mode 100644 intern/cycles/kernel/geom/subd_triangle.h
 create mode 100644 intern/cycles/kernel/geom/triangle.h
 create mode 100644 intern/cycles/kernel/geom/triangle_intersect.h
 create mode 100644 intern/cycles/kernel/geom/volume.h
 create mode 100644 intern/cycles/kernel/integrator/init_from_bake.h
 create mode 100644 intern/cycles/kernel/integrator/init_from_camera.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_init_from_bake.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_init_from_camera.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_intersect_closest.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_intersect_shadow.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_megakernel.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_path_state.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shade_background.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shade_light.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shade_shadow.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shade_surface.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shade_volume.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shader_eval.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shadow_catcher.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_shadow_state_template.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_state.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_state_flow.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_state_template.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_state_util.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_subsurface.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_subsurface_disk.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
 delete mode 100644 intern/cycles/kernel/integrator/integrator_volume_stack.h
 create mode 100644 intern/cycles/kernel/integrator/intersect_closest.h
 create mode 100644 intern/cycles/kernel/integrator/intersect_shadow.h
 create mode 100644 intern/cycles/kernel/integrator/intersect_subsurface.h
 create mode 100644 intern/cycles/kernel/integrator/intersect_volume_stack.h
 create mode 100644 intern/cycles/kernel/integrator/megakernel.h
 create mode 100644 intern/cycles/kernel/integrator/path_state.h
 create mode 100644 intern/cycles/kernel/integrator/shade_background.h
 create mode 100644 intern/cycles/kernel/integrator/shade_light.h
 create mode 100644 intern/cycles/kernel/integrator/shade_shadow.h
 create mode 100644 intern/cycles/kernel/integrator/shade_surface.h
 create mode 100644 intern/cycles/kernel/integrator/shade_volume.h
 create mode 100644 intern/cycles/kernel/integrator/shader_eval.h
 create mode 100644 intern/cycles/kernel/integrator/shadow_catcher.h
 create mode 100644 intern/cycles/kernel/integrator/shadow_state_template.h
 create mode 100644 intern/cycles/kernel/integrator/state.h
 create mode 100644 intern/cycles/kernel/integrator/state_flow.h
 create mode 100644 intern/cycles/kernel/integrator/state_template.h
 create mode 100644 intern/cycles/kernel/integrator/state_util.h
 create mode 100644 intern/cycles/kernel/integrator/subsurface.h
 create mode 100644 intern/cycles/kernel/integrator/subsurface_disk.h
 create mode 100644 intern/cycles/kernel/integrator/subsurface_random_walk.h
 create mode 100644 intern/cycles/kernel/integrator/volume_stack.h
 delete mode 100644 intern/cycles/kernel/kernel_textures.h
 delete mode 100644 intern/cycles/kernel/kernel_types.h
 create mode 100644 intern/cycles/kernel/light/background.h
 create mode 100644 intern/cycles/kernel/light/common.h
 delete mode 100644 intern/cycles/kernel/light/light_background.h
 delete mode 100644 intern/cycles/kernel/light/light_common.h
 delete mode 100644 intern/cycles/kernel/light/light_sample.h
 create mode 100644 intern/cycles/kernel/light/sample.h
 create mode 100644 intern/cycles/kernel/osl/bssrdf.cpp
 create mode 100644 intern/cycles/kernel/osl/closures.cpp
 create mode 100644 intern/cycles/kernel/osl/closures.h
 create mode 100644 intern/cycles/kernel/osl/globals.h
 delete mode 100644 intern/cycles/kernel/osl/osl_bssrdf.cpp
 delete mode 100644 intern/cycles/kernel/osl/osl_closures.cpp
 delete mode 100644 intern/cycles/kernel/osl/osl_closures.h
 delete mode 100644 intern/cycles/kernel/osl/osl_globals.h
 delete mode 100644 intern/cycles/kernel/osl/osl_services.cpp
 delete mode 100644 intern/cycles/kernel/osl/osl_services.h
 delete mode 100644 intern/cycles/kernel/osl/osl_shader.cpp
 delete mode 100644 intern/cycles/kernel/osl/osl_shader.h
 create mode 100644 intern/cycles/kernel/osl/services.cpp
 create mode 100644 intern/cycles/kernel/osl/services.h
 create mode 100644 intern/cycles/kernel/osl/shader.cpp
 create mode 100644 intern/cycles/kernel/osl/shader.h
 create mode 100644 intern/cycles/kernel/sample/jitter.h
 create mode 100644 intern/cycles/kernel/sample/lcg.h
 create mode 100644 intern/cycles/kernel/sample/mapping.h
 create mode 100644 intern/cycles/kernel/sample/mis.h
 create mode 100644 intern/cycles/kernel/sample/pattern.h
 delete mode 100644 intern/cycles/kernel/sample/sample_jitter.h
 delete mode 100644 intern/cycles/kernel/sample/sample_mapping.h
 delete mode 100644 intern/cycles/kernel/sample/sample_pattern.h
 create mode 100644 intern/cycles/kernel/svm/ao.h
 create mode 100644 intern/cycles/kernel/svm/aov.h
 create mode 100644 intern/cycles/kernel/svm/attribute.h
 create mode 100644 intern/cycles/kernel/svm/bevel.h
 create mode 100644 intern/cycles/kernel/svm/blackbody.h
 create mode 100644 intern/cycles/kernel/svm/brick.h
 create mode 100644 intern/cycles/kernel/svm/brightness.h
 create mode 100644 intern/cycles/kernel/svm/bump.h
 create mode 100644 intern/cycles/kernel/svm/camera.h
 create mode 100644 intern/cycles/kernel/svm/checker.h
 create mode 100644 intern/cycles/kernel/svm/clamp.h
 create mode 100644 intern/cycles/kernel/svm/closure.h
 create mode 100644 intern/cycles/kernel/svm/color_util.h
 create mode 100644 intern/cycles/kernel/svm/convert.h
 create mode 100644 intern/cycles/kernel/svm/displace.h
 create mode 100644 intern/cycles/kernel/svm/fractal_noise.h
 create mode 100644 intern/cycles/kernel/svm/fresnel.h
 create mode 100644 intern/cycles/kernel/svm/gamma.h
 create mode 100644 intern/cycles/kernel/svm/geometry.h
 create mode 100644 intern/cycles/kernel/svm/gradient.h
 create mode 100644 intern/cycles/kernel/svm/hsv.h
 create mode 100644 intern/cycles/kernel/svm/ies.h
 create mode 100644 intern/cycles/kernel/svm/image.h
 create mode 100644 intern/cycles/kernel/svm/invert.h
 create mode 100644 intern/cycles/kernel/svm/light_path.h
 create mode 100644 intern/cycles/kernel/svm/magic.h
 create mode 100644 intern/cycles/kernel/svm/map_range.h
 create mode 100644 intern/cycles/kernel/svm/mapping.h
 create mode 100644 intern/cycles/kernel/svm/mapping_util.h
 create mode 100644 intern/cycles/kernel/svm/math.h
 create mode 100644 intern/cycles/kernel/svm/math_util.h
 create mode 100644 intern/cycles/kernel/svm/mix.h
 create mode 100644 intern/cycles/kernel/svm/musgrave.h
 create mode 100644 intern/cycles/kernel/svm/noise.h
 create mode 100644 intern/cycles/kernel/svm/noisetex.h
 create mode 100644 intern/cycles/kernel/svm/normal.h
 create mode 100644 intern/cycles/kernel/svm/ramp.h
 create mode 100644 intern/cycles/kernel/svm/ramp_util.h
 create mode 100644 intern/cycles/kernel/svm/sepcomb_hsv.h
 create mode 100644 intern/cycles/kernel/svm/sepcomb_vector.h
 create mode 100644 intern/cycles/kernel/svm/sky.h
 delete mode 100644 intern/cycles/kernel/svm/svm_ao.h
 delete mode 100644 intern/cycles/kernel/svm/svm_aov.h
 delete mode 100644 intern/cycles/kernel/svm/svm_attribute.h
 delete mode 100644 intern/cycles/kernel/svm/svm_bevel.h
 delete mode 100644 intern/cycles/kernel/svm/svm_blackbody.h
 delete mode 100644 intern/cycles/kernel/svm/svm_brick.h
 delete mode 100644 intern/cycles/kernel/svm/svm_brightness.h
 delete mode 100644 intern/cycles/kernel/svm/svm_bump.h
 delete mode 100644 intern/cycles/kernel/svm/svm_camera.h
 delete mode 100644 intern/cycles/kernel/svm/svm_checker.h
 delete mode 100644 intern/cycles/kernel/svm/svm_clamp.h
 delete mode 100644 intern/cycles/kernel/svm/svm_closure.h
 delete mode 100644 intern/cycles/kernel/svm/svm_color_util.h
 delete mode 100644 intern/cycles/kernel/svm/svm_convert.h
 delete mode 100644 intern/cycles/kernel/svm/svm_displace.h
 delete mode 100644 intern/cycles/kernel/svm/svm_fractal_noise.h
 delete mode 100644 intern/cycles/kernel/svm/svm_fresnel.h
 delete mode 100644 intern/cycles/kernel/svm/svm_gamma.h
 delete mode 100644 intern/cycles/kernel/svm/svm_geometry.h
 delete mode 100644 intern/cycles/kernel/svm/svm_gradient.h
 delete mode 100644 intern/cycles/kernel/svm/svm_hsv.h
 delete mode 100644 intern/cycles/kernel/svm/svm_ies.h
 delete mode 100644 intern/cycles/kernel/svm/svm_image.h
 delete mode 100644 intern/cycles/kernel/svm/svm_invert.h
 delete mode 100644 intern/cycles/kernel/svm/svm_light_path.h
 delete mode 100644 intern/cycles/kernel/svm/svm_magic.h
 delete mode 100644 intern/cycles/kernel/svm/svm_map_range.h
 delete mode 100644 intern/cycles/kernel/svm/svm_mapping.h
 delete mode 100644 intern/cycles/kernel/svm/svm_mapping_util.h
 delete mode 100644 intern/cycles/kernel/svm/svm_math.h
 delete mode 100644 intern/cycles/kernel/svm/svm_math_util.h
 delete mode 100644 intern/cycles/kernel/svm/svm_mix.h
 delete mode 100644 intern/cycles/kernel/svm/svm_musgrave.h
 delete mode 100644 intern/cycles/kernel/svm/svm_noise.h
 delete mode 100644 intern/cycles/kernel/svm/svm_noisetex.h
 delete mode 100644 intern/cycles/kernel/svm/svm_normal.h
 delete mode 100644 intern/cycles/kernel/svm/svm_ramp.h
 delete mode 100644 intern/cycles/kernel/svm/svm_ramp_util.h
 delete mode 100644 intern/cycles/kernel/svm/svm_sepcomb_hsv.h
 delete mode 100644 intern/cycles/kernel/svm/svm_sepcomb_vector.h
 delete mode 100644 intern/cycles/kernel/svm/svm_sky.h
 delete mode 100644 intern/cycles/kernel/svm/svm_tex_coord.h
 delete mode 100644 intern/cycles/kernel/svm/svm_types.h
 delete mode 100644 intern/cycles/kernel/svm/svm_value.h
 delete mode 100644 intern/cycles/kernel/svm/svm_vector_rotate.h
 delete mode 100644 intern/cycles/kernel/svm/svm_vector_transform.h
 delete mode 100644 intern/cycles/kernel/svm/svm_vertex_color.h
 delete mode 100644 intern/cycles/kernel/svm/svm_voronoi.h
 delete mode 100644 intern/cycles/kernel/svm/svm_voxel.h
 delete mode 100644 intern/cycles/kernel/svm/svm_wave.h
 delete mode 100644 intern/cycles/kernel/svm/svm_wavelength.h
 delete mode 100644 intern/cycles/kernel/svm/svm_white_noise.h
 delete mode 100644 intern/cycles/kernel/svm/svm_wireframe.h
 create mode 100644 intern/cycles/kernel/svm/tex_coord.h
 create mode 100644 intern/cycles/kernel/svm/types.h
 create mode 100644 intern/cycles/kernel/svm/value.h
 create mode 100644 intern/cycles/kernel/svm/vector_rotate.h
 create mode 100644 intern/cycles/kernel/svm/vector_transform.h
 create mode 100644 intern/cycles/kernel/svm/vertex_color.h
 create mode 100644 intern/cycles/kernel/svm/voronoi.h
 create mode 100644 intern/cycles/kernel/svm/voxel.h
 create mode 100644 intern/cycles/kernel/svm/wave.h
 create mode 100644 intern/cycles/kernel/svm/wavelength.h
 create mode 100644 intern/cycles/kernel/svm/white_noise.h
 create mode 100644 intern/cycles/kernel/svm/wireframe.h
 create mode 100644 intern/cycles/kernel/textures.h
 create mode 100644 intern/cycles/kernel/types.h
 create mode 100644 intern/cycles/kernel/util/color.h
 create mode 100644 intern/cycles/kernel/util/differential.h
 create mode 100644 intern/cycles/kernel/util/lookup_table.h
 create mode 100644 intern/cycles/kernel/util/profiling.h
 delete mode 100644 intern/cycles/kernel/util/util_color.h
 delete mode 100644 intern/cycles/kernel/util/util_differential.h
 delete mode 100644 intern/cycles/kernel/util/util_lookup_table.h
 delete mode 100644 intern/cycles/kernel/util/util_profiling.h
 create mode 100644 intern/cycles/subd/dice.cpp
 create mode 100644 intern/cycles/subd/dice.h
 create mode 100644 intern/cycles/subd/patch.cpp
 create mode 100644 intern/cycles/subd/patch.h
 create mode 100644 intern/cycles/subd/patch_table.cpp
 create mode 100644 intern/cycles/subd/patch_table.h
 create mode 100644 intern/cycles/subd/split.cpp
 create mode 100644 intern/cycles/subd/split.h
 delete mode 100644 intern/cycles/subd/subd_dice.cpp
 delete mode 100644 intern/cycles/subd/subd_dice.h
 delete mode 100644 intern/cycles/subd/subd_patch.cpp
 delete mode 100644 intern/cycles/subd/subd_patch.h
 delete mode 100644 intern/cycles/subd/subd_patch_table.cpp
 delete mode 100644 intern/cycles/subd/subd_patch_table.h
 delete mode 100644 intern/cycles/subd/subd_split.cpp
 delete mode 100644 intern/cycles/subd/subd_split.h
 delete mode 100644 intern/cycles/subd/subd_subpatch.h
 create mode 100644 intern/cycles/subd/subpatch.h
 create mode 100644 intern/cycles/util/algorithm.h
 create mode 100644 intern/cycles/util/aligned_malloc.cpp
 create mode 100644 intern/cycles/util/aligned_malloc.h
 create mode 100644 intern/cycles/util/args.h
 create mode 100644 intern/cycles/util/array.h
 create mode 100644 intern/cycles/util/atomic.h
 create mode 100644 intern/cycles/util/avxb.h
 create mode 100644 intern/cycles/util/avxf.h
 create mode 100644 intern/cycles/util/avxi.h
 create mode 100644 intern/cycles/util/boundbox.h
 create mode 100644 intern/cycles/util/color.h
 create mode 100644 intern/cycles/util/debug.cpp
 create mode 100644 intern/cycles/util/debug.h
 create mode 100644 intern/cycles/util/defines.h
 create mode 100644 intern/cycles/util/deque.h
 create mode 100644 intern/cycles/util/disjoint_set.h
 create mode 100644 intern/cycles/util/foreach.h
 create mode 100644 intern/cycles/util/function.h
 create mode 100644 intern/cycles/util/guarded_allocator.cpp
 create mode 100644 intern/cycles/util/guarded_allocator.h
 create mode 100644 intern/cycles/util/half.h
 create mode 100644 intern/cycles/util/hash.h
 create mode 100644 intern/cycles/util/ies.cpp
 create mode 100644 intern/cycles/util/ies.h
 create mode 100644 intern/cycles/util/image.h
 create mode 100644 intern/cycles/util/image_impl.h
 create mode 100644 intern/cycles/util/list.h
 create mode 100644 intern/cycles/util/log.cpp
 create mode 100644 intern/cycles/util/log.h
 create mode 100644 intern/cycles/util/map.h
 create mode 100644 intern/cycles/util/math.h
 create mode 100644 intern/cycles/util/math_cdf.cpp
 create mode 100644 intern/cycles/util/math_cdf.h
 create mode 100644 intern/cycles/util/math_fast.h
 create mode 100644 intern/cycles/util/math_float2.h
 create mode 100644 intern/cycles/util/math_float3.h
 create mode 100644 intern/cycles/util/math_float4.h
 create mode 100644 intern/cycles/util/math_int2.h
 create mode 100644 intern/cycles/util/math_int3.h
 create mode 100644 intern/cycles/util/math_int4.h
 create mode 100644 intern/cycles/util/math_intersect.h
 create mode 100644 intern/cycles/util/math_matrix.h
 create mode 100644 intern/cycles/util/md5.cpp
 create mode 100644 intern/cycles/util/md5.h
 create mode 100644 intern/cycles/util/murmurhash.cpp
 create mode 100644 intern/cycles/util/murmurhash.h
 create mode 100644 intern/cycles/util/opengl.h
 create mode 100644 intern/cycles/util/openimagedenoise.h
 create mode 100644 intern/cycles/util/openvdb.h
 create mode 100644 intern/cycles/util/optimization.h
 create mode 100644 intern/cycles/util/param.h
 create mode 100644 intern/cycles/util/path.cpp
 create mode 100644 intern/cycles/util/path.h
 create mode 100644 intern/cycles/util/profiling.cpp
 create mode 100644 intern/cycles/util/profiling.h
 create mode 100644 intern/cycles/util/progress.h
 create mode 100644 intern/cycles/util/projection.h
 create mode 100644 intern/cycles/util/queue.h
 create mode 100644 intern/cycles/util/rect.h
 create mode 100644 intern/cycles/util/semaphore.h
 create mode 100644 intern/cycles/util/set.h
 create mode 100644 intern/cycles/util/simd.cpp
 create mode 100644 intern/cycles/util/simd.h
 create mode 100644 intern/cycles/util/sseb.h
 create mode 100644 intern/cycles/util/ssef.h
 create mode 100644 intern/cycles/util/ssei.h
 create mode 100644 intern/cycles/util/stack_allocator.h
 create mode 100644 intern/cycles/util/static_assert.h
 create mode 100644 intern/cycles/util/stats.h
 create mode 100644 intern/cycles/util/string.cpp
 create mode 100644 intern/cycles/util/string.h
 create mode 100644 intern/cycles/util/system.cpp
 create mode 100644 intern/cycles/util/system.h
 create mode 100644 intern/cycles/util/task.cpp
 create mode 100644 intern/cycles/util/task.h
 create mode 100644 intern/cycles/util/tbb.h
 create mode 100644 intern/cycles/util/texture.h
 create mode 100644 intern/cycles/util/thread.cpp
 create mode 100644 intern/cycles/util/thread.h
 create mode 100644 intern/cycles/util/time.cpp
 create mode 100644 intern/cycles/util/time.h
 create mode 100644 intern/cycles/util/transform.cpp
 create mode 100644 intern/cycles/util/transform.h
 create mode 100644 intern/cycles/util/types.h
 create mode 100644 intern/cycles/util/types_float2.h
 create mode 100644 intern/cycles/util/types_float2_impl.h
 create mode 100644 intern/cycles/util/types_float3.h
 create mode 100644 intern/cycles/util/types_float3_impl.h
 create mode 100644 intern/cycles/util/types_float4.h
 create mode 100644 intern/cycles/util/types_float4_impl.h
 create mode 100644 intern/cycles/util/types_float8.h
 create mode 100644 intern/cycles/util/types_float8_impl.h
 create mode 100644 intern/cycles/util/types_int2.h
 create mode 100644 intern/cycles/util/types_int2_impl.h
 create mode 100644 intern/cycles/util/types_int3.h
 create mode 100644 intern/cycles/util/types_int3_impl.h
 create mode 100644 intern/cycles/util/types_int4.h
 create mode 100644 intern/cycles/util/types_int4_impl.h
 create mode 100644 intern/cycles/util/types_uchar2.h
 create mode 100644 intern/cycles/util/types_uchar2_impl.h
 create mode 100644 intern/cycles/util/types_uchar3.h
 create mode 100644 intern/cycles/util/types_uchar3_impl.h
 create mode 100644 intern/cycles/util/types_uchar4.h
 create mode 100644 intern/cycles/util/types_uchar4_impl.h
 create mode 100644 intern/cycles/util/types_uint2.h
 create mode 100644 intern/cycles/util/types_uint2_impl.h
 create mode 100644 intern/cycles/util/types_uint3.h
 create mode 100644 intern/cycles/util/types_uint3_impl.h
 create mode 100644 intern/cycles/util/types_uint4.h
 create mode 100644 intern/cycles/util/types_uint4_impl.h
 create mode 100644 intern/cycles/util/types_ushort4.h
 create mode 100644 intern/cycles/util/types_vector3.h
 create mode 100644 intern/cycles/util/types_vector3_impl.h
 create mode 100644 intern/cycles/util/unique_ptr.h
 delete mode 100644 intern/cycles/util/util_algorithm.h
 delete mode 100644 intern/cycles/util/util_aligned_malloc.cpp
 delete mode 100644 intern/cycles/util/util_aligned_malloc.h
 delete mode 100644 intern/cycles/util/util_args.h
 delete mode 100644 intern/cycles/util/util_array.h
 delete mode 100644 intern/cycles/util/util_atomic.h
 delete mode 100644 intern/cycles/util/util_avxb.h
 delete mode 100644 intern/cycles/util/util_avxf.h
 delete mode 100644 intern/cycles/util/util_avxi.h
 delete mode 100644 intern/cycles/util/util_boundbox.h
 delete mode 100644 intern/cycles/util/util_color.h
 delete mode 100644 intern/cycles/util/util_debug.cpp
 delete mode 100644 intern/cycles/util/util_debug.h
 delete mode 100644 intern/cycles/util/util_defines.h
 delete mode 100644 intern/cycles/util/util_deque.h
 delete mode 100644 intern/cycles/util/util_disjoint_set.h
 delete mode 100644 intern/cycles/util/util_foreach.h
 delete mode 100644 intern/cycles/util/util_function.h
 delete mode 100644 intern/cycles/util/util_guarded_allocator.cpp
 delete mode 100644 intern/cycles/util/util_guarded_allocator.h
 delete mode 100644 intern/cycles/util/util_half.h
 delete mode 100644 intern/cycles/util/util_hash.h
 delete mode 100644 intern/cycles/util/util_ies.cpp
 delete mode 100644 intern/cycles/util/util_ies.h
 delete mode 100644 intern/cycles/util/util_image.h
 delete mode 100644 intern/cycles/util/util_image_impl.h
 delete mode 100644 intern/cycles/util/util_list.h
 delete mode 100644 intern/cycles/util/util_logging.cpp
 delete mode 100644 intern/cycles/util/util_logging.h
 delete mode 100644 intern/cycles/util/util_map.h
 delete mode 100644 intern/cycles/util/util_math.h
 delete mode 100644 intern/cycles/util/util_math_cdf.cpp
 delete mode 100644 intern/cycles/util/util_math_cdf.h
 delete mode 100644 intern/cycles/util/util_math_fast.h
 delete mode 100644 intern/cycles/util/util_math_float2.h
 delete mode 100644 intern/cycles/util/util_math_float3.h
 delete mode 100644 intern/cycles/util/util_math_float4.h
 delete mode 100644 intern/cycles/util/util_math_int2.h
 delete mode 100644 intern/cycles/util/util_math_int3.h
 delete mode 100644 intern/cycles/util/util_math_int4.h
 delete mode 100644 intern/cycles/util/util_math_intersect.h
 delete mode 100644 intern/cycles/util/util_math_matrix.h
 delete mode 100644 intern/cycles/util/util_md5.cpp
 delete mode 100644 intern/cycles/util/util_md5.h
 delete mode 100644 intern/cycles/util/util_murmurhash.cpp
 delete mode 100644 intern/cycles/util/util_murmurhash.h
 delete mode 100644 intern/cycles/util/util_opengl.h
 delete mode 100644 intern/cycles/util/util_openimagedenoise.h
 delete mode 100644 intern/cycles/util/util_openvdb.h
 delete mode 100644 intern/cycles/util/util_optimization.h
 delete mode 100644 intern/cycles/util/util_param.h
 delete mode 100644 intern/cycles/util/util_path.cpp
 delete mode 100644 intern/cycles/util/util_path.h
 delete mode 100644 intern/cycles/util/util_profiling.cpp
 delete mode 100644 intern/cycles/util/util_profiling.h
 delete mode 100644 intern/cycles/util/util_progress.h
 delete mode 100644 intern/cycles/util/util_projection.h
 delete mode 100644 intern/cycles/util/util_queue.h
 delete mode 100644 intern/cycles/util/util_rect.h
 delete mode 100644 intern/cycles/util/util_semaphore.h
 delete mode 100644 intern/cycles/util/util_set.h
 delete mode 100644 intern/cycles/util/util_simd.cpp
 delete mode 100644 intern/cycles/util/util_simd.h
 delete mode 100644 intern/cycles/util/util_sseb.h
 delete mode 100644 intern/cycles/util/util_ssef.h
 delete mode 100644 intern/cycles/util/util_ssei.h
 delete mode 100644 intern/cycles/util/util_stack_allocator.h
 delete mode 100644 intern/cycles/util/util_static_assert.h
 delete mode 100644 intern/cycles/util/util_stats.h
 delete mode 100644 intern/cycles/util/util_string.cpp
 delete mode 100644 intern/cycles/util/util_string.h
 delete mode 100644 intern/cycles/util/util_system.cpp
 delete mode 100644 intern/cycles/util/util_system.h
 delete mode 100644 intern/cycles/util/util_task.cpp
 delete mode 100644 intern/cycles/util/util_task.h
 delete mode 100644 intern/cycles/util/util_tbb.h
 delete mode 100644 intern/cycles/util/util_texture.h
 delete mode 100644 intern/cycles/util/util_thread.cpp
 delete mode 100644 intern/cycles/util/util_thread.h
 delete mode 100644 intern/cycles/util/util_time.cpp
 delete mode 100644 intern/cycles/util/util_time.h
 delete mode 100644 intern/cycles/util/util_transform.cpp
 delete mode 100644 intern/cycles/util/util_transform.h
 delete mode 100644 intern/cycles/util/util_types.h
 delete mode 100644 intern/cycles/util/util_types_float2.h
 delete mode 100644 intern/cycles/util/util_types_float2_impl.h
 delete mode 100644 intern/cycles/util/util_types_float3.h
 delete mode 100644 intern/cycles/util/util_types_float3_impl.h
 delete mode 100644 intern/cycles/util/util_types_float4.h
 delete mode 100644 intern/cycles/util/util_types_float4_impl.h
 delete mode 100644 intern/cycles/util/util_types_float8.h
 delete mode 100644 intern/cycles/util/util_types_float8_impl.h
 delete mode 100644 intern/cycles/util/util_types_int2.h
 delete mode 100644 intern/cycles/util/util_types_int2_impl.h
 delete mode 100644 intern/cycles/util/util_types_int3.h
 delete mode 100644 intern/cycles/util/util_types_int3_impl.h
 delete mode 100644 intern/cycles/util/util_types_int4.h
 delete mode 100644 intern/cycles/util/util_types_int4_impl.h
 delete mode 100644 intern/cycles/util/util_types_uchar2.h
 delete mode 100644 intern/cycles/util/util_types_uchar2_impl.h
 delete mode 100644 intern/cycles/util/util_types_uchar3.h
 delete mode 100644 intern/cycles/util/util_types_uchar3_impl.h
 delete mode 100644 intern/cycles/util/util_types_uchar4.h
 delete mode 100644 intern/cycles/util/util_types_uchar4_impl.h
 delete mode 100644 intern/cycles/util/util_types_uint2.h
 delete mode 100644 intern/cycles/util/util_types_uint2_impl.h
 delete mode 100644 intern/cycles/util/util_types_uint3.h
 delete mode 100644 intern/cycles/util/util_types_uint3_impl.h
 delete mode 100644 intern/cycles/util/util_types_uint4.h
 delete mode 100644 intern/cycles/util/util_types_uint4_impl.h
 delete mode 100644 intern/cycles/util/util_types_ushort4.h
 delete mode 100644 intern/cycles/util/util_types_vector3.h
 delete mode 100644 intern/cycles/util/util_types_vector3_impl.h
 delete mode 100644 intern/cycles/util/util_unique_ptr.h
 delete mode 100644 intern/cycles/util/util_vector.h
 delete mode 100644 intern/cycles/util/util_version.h
 delete mode 100644 intern/cycles/util/util_view.cpp
 delete mode 100644 intern/cycles/util/util_view.h
 delete mode 100644 intern/cycles/util/util_windows.cpp
 delete mode 100644 intern/cycles/util/util_windows.h
 delete mode 100644 intern/cycles/util/util_xml.h
 create mode 100644 intern/cycles/util/vector.h
 create mode 100644 intern/cycles/util/version.h
 create mode 100644 intern/cycles/util/view.cpp
 create mode 100644 intern/cycles/util/view.h
 create mode 100644 intern/cycles/util/windows.cpp
 create mode 100644 intern/cycles/util/windows.h
 create mode 100644 intern/cycles/util/xml.h

diff --git a/intern/cycles/app/cycles_server.cpp b/intern/cycles/app/cycles_server.cpp
index 1ad70a376ed..38771b8aed8 100644
--- a/intern/cycles/app/cycles_server.cpp
+++ b/intern/cycles/app/cycles_server.cpp
@@ -18,13 +18,13 @@
 
 #include "device/device.h"
 
-#include "util/util_args.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
-#include "util/util_stats.h"
-#include "util/util_string.h"
-#include "util/util_task.h"
+#include "util/args.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/stats.h"
+#include "util/string.h"
+#include "util/task.h"
 
 using namespace ccl;
 
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index 800227ccf48..0032938b116 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -23,24 +23,24 @@
 #include "session/buffers.h"
 #include "session/session.h"
 
-#include "util/util_args.h"
-#include "util/util_foreach.h"
-#include "util/util_function.h"
-#include "util/util_image.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
-#include "util/util_progress.h"
-#include "util/util_string.h"
-#include "util/util_time.h"
-#include "util/util_transform.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_version.h"
+#include "util/args.h"
+#include "util/foreach.h"
+#include "util/function.h"
+#include "util/image.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/progress.h"
+#include "util/string.h"
+#include "util/time.h"
+#include "util/transform.h"
+#include "util/unique_ptr.h"
+#include "util/version.h"
 
 #include "app/cycles_xml.h"
 #include "app/oiio_output_driver.h"
 
 #ifdef WITH_CYCLES_STANDALONE_GUI
-#  include "util/util_view.h"
+#  include "util/view.h"
 #endif
 
 #include "app/cycles_xml.h"
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 1ced74b6136..6144d2c60a9 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -35,14 +35,14 @@
 #include "scene/shader_graph.h"
 #include "scene/shader_nodes.h"
 
-#include "subd/subd_patch.h"
-#include "subd/subd_split.h"
-
-#include "util/util_foreach.h"
-#include "util/util_path.h"
-#include "util/util_projection.h"
-#include "util/util_transform.h"
-#include "util/util_xml.h"
+#include "subd/patch.h"
+#include "subd/split.h"
+
+#include "util/foreach.h"
+#include "util/path.h"
+#include "util/projection.h"
+#include "util/transform.h"
+#include "util/xml.h"
 
 #include "app/cycles_xml.h"
 
diff --git a/intern/cycles/app/oiio_output_driver.h b/intern/cycles/app/oiio_output_driver.h
index a6984938fe7..a5c88e0e890 100644
--- a/intern/cycles/app/oiio_output_driver.h
+++ b/intern/cycles/app/oiio_output_driver.h
@@ -16,11 +16,11 @@
 
 #include "session/output_driver.h"
 
-#include "util/util_function.h"
-#include "util/util_image.h"
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
+#include "util/function.h"
+#include "util/image.h"
+#include "util/string.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index d948f2b3118..149967ad331 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -29,39 +29,39 @@ set(INC_SYS
 )
 
 set(SRC
-  blender_camera.cpp
-  blender_device.cpp
-  blender_display_driver.cpp
-  blender_image.cpp
-  blender_geometry.cpp
-  blender_light.cpp
-  blender_mesh.cpp
-  blender_object.cpp
-  blender_object_cull.cpp
-  blender_output_driver.cpp
-  blender_particles.cpp
-  blender_curves.cpp
-  blender_logging.cpp
-  blender_python.cpp
-  blender_session.cpp
-  blender_shader.cpp
-  blender_sync.cpp
-  blender_texture.cpp
-  blender_viewport.cpp
-  blender_volume.cpp
+  camera.cpp
+  device.cpp
+  display_driver.cpp
+  image.cpp
+  geometry.cpp
+  light.cpp
+  mesh.cpp
+  object.cpp
+  object_cull.cpp
+  output_driver.cpp
+  particles.cpp
+  curves.cpp
+  logging.cpp
+  python.cpp
+  session.cpp
+  shader.cpp
+  sync.cpp
+  texture.cpp
+  viewport.cpp
+  volume.cpp
 
   CCL_api.h
-  blender_device.h
-  blender_display_driver.h
-  blender_id_map.h
-  blender_image.h
-  blender_object_cull.h
-  blender_output_driver.h
-  blender_sync.h
-  blender_session.h
-  blender_texture.h
-  blender_util.h
-  blender_viewport.h
+  device.h
+  display_driver.h
+  id_map.h
+  image.h
+  object_cull.h
+  output_driver.h
+  sync.h
+  session.h
+  texture.h
+  util.h
+  viewport.h
 )
 
 set(LIB
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
deleted file mode 100644
index 670e25841f5..00000000000
--- a/intern/cycles/blender/blender_camera.cpp
+++ /dev/null
@@ -1,965 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/camera.h"
-#include "scene/scene.h"
-
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_logging.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Blender Camera Intermediate: we first convert both the offline and 3d view
- * render camera to this, and from there convert to our native camera format. */
-
-struct BlenderCamera {
-  float nearclip;
-  float farclip;
-
-  CameraType type;
-  float ortho_scale;
-
-  float lens;
-  float shuttertime;
-  Camera::MotionPosition motion_position;
-  array<float> shutter_curve;
-
-  Camera::RollingShutterType rolling_shutter_type;
-  float rolling_shutter_duration;
-
-  float aperturesize;
-  uint apertureblades;
-  float aperturerotation;
-  float focaldistance;
-
-  float2 shift;
-  float2 offset;
-  float zoom;
-
-  float2 pixelaspect;
-
-  float aperture_ratio;
-
-  PanoramaType panorama_type;
-  float fisheye_fov;
-  float fisheye_lens;
-  float latitude_min;
-  float latitude_max;
-  float longitude_min;
-  float longitude_max;
-  bool use_spherical_stereo;
-  float interocular_distance;
-  float convergence_distance;
-  bool use_pole_merge;
-  float pole_merge_angle_from;
-  float pole_merge_angle_to;
-
-  enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
-  float sensor_width;
-  float sensor_height;
-
-  int full_width;
-  int full_height;
-
-  int render_width;
-  int render_height;
-
-  BoundBox2D border;
-  BoundBox2D viewport_camera_border;
-  BoundBox2D pano_viewplane;
-  float pano_aspectratio;
-
-  float passepartout_alpha;
-
-  Transform matrix;
-
-  float offscreen_dicing_scale;
-
-  int motion_steps;
-};
-
-static void blender_camera_init(BlenderCamera *bcam, BL::RenderSettings &b_render)
-{
-  memset((void *)bcam, 0, sizeof(BlenderCamera));
-
-  bcam->nearclip = 1e-5f;
-  bcam->farclip = 1e5f;
-
-  bcam->type = CAMERA_PERSPECTIVE;
-  bcam->ortho_scale = 1.0f;
-
-  bcam->lens = 50.0f;
-  bcam->shuttertime = 1.0f;
-
-  bcam->rolling_shutter_type = Camera::ROLLING_SHUTTER_NONE;
-  bcam->rolling_shutter_duration = 0.1f;
-
-  bcam->aperturesize = 0.0f;
-  bcam->apertureblades = 0;
-  bcam->aperturerotation = 0.0f;
-  bcam->focaldistance = 10.0f;
-
-  bcam->zoom = 1.0f;
-  bcam->pixelaspect = one_float2();
-  bcam->aperture_ratio = 1.0f;
-
-  bcam->sensor_width = 36.0f;
-  bcam->sensor_height = 24.0f;
-  bcam->sensor_fit = BlenderCamera::AUTO;
-  bcam->motion_position = Camera::MOTION_POSITION_CENTER;
-  bcam->border.right = 1.0f;
-  bcam->border.top = 1.0f;
-  bcam->viewport_camera_border.right = 1.0f;
-  bcam->viewport_camera_border.top = 1.0f;
-  bcam->pano_viewplane.right = 1.0f;
-  bcam->pano_viewplane.top = 1.0f;
-  bcam->pano_aspectratio = 0.0f;
-  bcam->passepartout_alpha = 0.5f;
-  bcam->offscreen_dicing_scale = 1.0f;
-  bcam->matrix = transform_identity();
-
-  /* render resolution */
-  bcam->render_width = render_resolution_x(b_render);
-  bcam->render_height = render_resolution_y(b_render);
-  bcam->full_width = bcam->render_width;
-  bcam->full_height = bcam->render_height;
-}
-
-static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
-                                           BL::Object &b_ob,
-                                           BL::Camera &b_camera,
-                                           BlenderCamera *bcam)
-{
-  BL::Object b_dof_object = b_camera.dof().focus_object();
-
-  if (!b_dof_object)
-    return b_camera.dof().focus_distance();
-
-  /* for dof object, return distance along camera Z direction */
-  BL::Array<float, 16> b_ob_matrix;
-  b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
-  Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
-  Transform dofmat = get_transform(b_dof_object.matrix_world());
-  float3 view_dir = normalize(transform_get_column(&obmat, 2));
-  float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
-  return fabsf(dot(view_dir, dof_dir));
-}
-
-static void blender_camera_from_object(BlenderCamera *bcam,
-                                       BL::RenderEngine &b_engine,
-                                       BL::Object &b_ob,
-                                       bool skip_panorama = false)
-{
-  BL::ID b_ob_data = b_ob.data();
-
-  if (b_ob_data.is_a(&RNA_Camera)) {
-    BL::Camera b_camera(b_ob_data);
-    PointerRNA ccamera = RNA_pointer_get(&b_camera.ptr, "cycles");
-
-    bcam->nearclip = b_camera.clip_start();
-    bcam->farclip = b_camera.clip_end();
-
-    switch (b_camera.type()) {
-      case BL::Camera::type_ORTHO:
-        bcam->type = CAMERA_ORTHOGRAPHIC;
-        break;
-      case BL::Camera::type_PANO:
-        if (!skip_panorama)
-          bcam->type = CAMERA_PANORAMA;
-        else
-          bcam->type = CAMERA_PERSPECTIVE;
-        break;
-      case BL::Camera::type_PERSP:
-      default:
-        bcam->type = CAMERA_PERSPECTIVE;
-        break;
-    }
-
-    bcam->panorama_type = (PanoramaType)get_enum(
-        ccamera, "panorama_type", PANORAMA_NUM_TYPES, PANORAMA_EQUIRECTANGULAR);
-
-    bcam->fisheye_fov = RNA_float_get(&ccamera, "fisheye_fov");
-    bcam->fisheye_lens = RNA_float_get(&ccamera, "fisheye_lens");
-    bcam->latitude_min = RNA_float_get(&ccamera, "latitude_min");
-    bcam->latitude_max = RNA_float_get(&ccamera, "latitude_max");
-    bcam->longitude_min = RNA_float_get(&ccamera, "longitude_min");
-    bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");
-
-    bcam->interocular_distance = b_camera.stereo().interocular_distance();
-    if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
-      bcam->convergence_distance = FLT_MAX;
-    }
-    else {
-      bcam->convergence_distance = b_camera.stereo().convergence_distance();
-    }
-    bcam->use_spherical_stereo = b_engine.use_spherical_stereo(b_ob);
-
-    bcam->use_pole_merge = b_camera.stereo().use_pole_merge();
-    bcam->pole_merge_angle_from = b_camera.stereo().pole_merge_angle_from();
-    bcam->pole_merge_angle_to = b_camera.stereo().pole_merge_angle_to();
-
-    bcam->ortho_scale = b_camera.ortho_scale();
-
-    bcam->lens = b_camera.lens();
-
-    bcam->passepartout_alpha = b_camera.show_passepartout() ? b_camera.passepartout_alpha() : 0.0f;
-
-    if (b_camera.dof().use_dof()) {
-      /* allow f/stop number to change aperture_size but still
-       * give manual control over aperture radius */
-      float fstop = b_camera.dof().aperture_fstop();
-      fstop = max(fstop, 1e-5f);
-
-      if (bcam->type == CAMERA_ORTHOGRAPHIC)
-        bcam->aperturesize = 1.0f / (2.0f * fstop);
-      else
-        bcam->aperturesize = (bcam->lens * 1e-3f) / (2.0f * fstop);
-
-      bcam->apertureblades = b_camera.dof().aperture_blades();
-      bcam->aperturerotation = b_camera.dof().aperture_rotation();
-      bcam->focaldistance = blender_camera_focal_distance(b_engine, b_ob, b_camera, bcam);
-      bcam->aperture_ratio = b_camera.dof().aperture_ratio();
-    }
-    else {
-      /* DOF is turned of for the camera. */
-      bcam->aperturesize = 0.0f;
-      bcam->apertureblades = 0;
-      bcam->aperturerotation = 0.0f;
-      bcam->focaldistance = 0.0f;
-      bcam->aperture_ratio = 1.0f;
-    }
-
-    bcam->shift.x = b_engine.camera_shift_x(b_ob, bcam->use_spherical_stereo);
-    bcam->shift.y = b_camera.shift_y();
-
-    bcam->sensor_width = b_camera.sensor_width();
-    bcam->sensor_height = b_camera.sensor_height();
-
-    if (b_camera.sensor_fit() == BL::Camera::sensor_fit_AUTO)
-      bcam->sensor_fit = BlenderCamera::AUTO;
-    else if (b_camera.sensor_fit() == BL::Camera::sensor_fit_HORIZONTAL)
-      bcam->sensor_fit = BlenderCamera::HORIZONTAL;
-    else
-      bcam->sensor_fit = BlenderCamera::VERTICAL;
-  }
-  else if (b_ob_data.is_a(&RNA_Light)) {
-    /* Can also look through spot light. */
-    BL::SpotLight b_light(b_ob_data);
-    float lens = 16.0f / tanf(b_light.spot_size() * 0.5f);
-    if (lens > 0.0f) {
-      bcam->lens = lens;
-    }
-  }
-
-  bcam->motion_steps = object_motion_steps(b_ob, b_ob);
-}
-
-static Transform blender_camera_matrix(const Transform &tfm,
-                                       const CameraType type,
-                                       const PanoramaType panorama_type)
-{
-  Transform result;
-
-  if (type == CAMERA_PANORAMA) {
-    if (panorama_type == PANORAMA_MIRRORBALL) {
-      /* Mirror ball camera is looking into the negative Y direction
-       * which matches texture mirror ball mapping.
-       */
-      result = tfm * make_transform(
-                         1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f);
-    }
-    else {
-      /* Make it so environment camera needs to be pointed in the direction
-       * of the positive x-axis to match an environment texture, this way
-       * it is looking at the center of the texture
-       */
-      result = tfm * make_transform(
-                         0.0f, -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 0.0f, 0.0f);
-    }
-  }
-  else {
-    /* note the blender camera points along the negative z-axis */
-    result = tfm * transform_scale(1.0f, 1.0f, -1.0f);
-  }
-
-  return transform_clear_scale(result);
-}
-
-static void blender_camera_viewplane(BlenderCamera *bcam,
-                                     int width,
-                                     int height,
-                                     BoundBox2D *viewplane,
-                                     float *aspectratio,
-                                     float *sensor_size)
-{
-  /* dimensions */
-  float xratio = (float)width * bcam->pixelaspect.x;
-  float yratio = (float)height * bcam->pixelaspect.y;
-
-  /* compute x/y aspect and ratio */
-  float xaspect, yaspect;
-  bool horizontal_fit;
-
-  /* sensor fitting */
-  if (bcam->sensor_fit == BlenderCamera::AUTO) {
-    horizontal_fit = (xratio > yratio);
-    if (sensor_size != NULL) {
-      *sensor_size = bcam->sensor_width;
-    }
-  }
-  else if (bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
-    horizontal_fit = true;
-    if (sensor_size != NULL) {
-      *sensor_size = bcam->sensor_width;
-    }
-  }
-  else {
-    horizontal_fit = false;
-    if (sensor_size != NULL) {
-      *sensor_size = bcam->sensor_height;
-    }
-  }
-
-  if (horizontal_fit) {
-    if (aspectratio != NULL) {
-      *aspectratio = xratio / yratio;
-    }
-    xaspect = *aspectratio;
-    yaspect = 1.0f;
-  }
-  else {
-    if (aspectratio != NULL) {
-      *aspectratio = yratio / xratio;
-    }
-    xaspect = 1.0f;
-    yaspect = *aspectratio;
-  }
-
-  /* modify aspect for orthographic scale */
-  if (bcam->type == CAMERA_ORTHOGRAPHIC) {
-    xaspect = xaspect * bcam->ortho_scale / (*aspectratio * 2.0f);
-    yaspect = yaspect * bcam->ortho_scale / (*aspectratio * 2.0f);
-    if (aspectratio != NULL) {
-      *aspectratio = bcam->ortho_scale / 2.0f;
-    }
-  }
-
-  if (bcam->type == CAMERA_PANORAMA) {
-    /* Set viewplane for panoramic camera. */
-    if (viewplane != NULL) {
-      *viewplane = bcam->pano_viewplane;
-
-      /* Modify viewplane for camera shift. */
-      const float shift_factor = (bcam->pano_aspectratio == 0.0f) ?
-                                     1.0f :
-                                     *aspectratio / bcam->pano_aspectratio;
-      const float dx = bcam->shift.x * shift_factor;
-      const float dy = bcam->shift.y * shift_factor;
-
-      viewplane->left += dx;
-      viewplane->right += dx;
-      viewplane->bottom += dy;
-      viewplane->top += dy;
-    }
-  }
-  else {
-    /* set viewplane */
-    if (viewplane != NULL) {
-      viewplane->left = -xaspect;
-      viewplane->right = xaspect;
-      viewplane->bottom = -yaspect;
-      viewplane->top = yaspect;
-
-      /* zoom for 3d camera view */
-      *viewplane = (*viewplane) * bcam->zoom;
-
-      /* modify viewplane with camera shift and 3d camera view offset */
-      const float dx = 2.0f * (*aspectratio * bcam->shift.x + bcam->offset.x * xaspect * 2.0f);
-      const float dy = 2.0f * (*aspectratio * bcam->shift.y + bcam->offset.y * yaspect * 2.0f);
-
-      viewplane->left += dx;
-      viewplane->right += dx;
-      viewplane->bottom += dy;
-      viewplane->top += dy;
-    }
-  }
-}
-
-static void blender_camera_sync(Camera *cam,
-                                BlenderCamera *bcam,
-                                int width,
-                                int height,
-                                const char *viewname,
-                                PointerRNA *cscene)
-{
-  float aspectratio, sensor_size;
-
-  /* viewplane */
-  BoundBox2D viewplane;
-  blender_camera_viewplane(bcam, width, height, &viewplane, &aspectratio, &sensor_size);
-
-  cam->set_viewplane_left(viewplane.left);
-  cam->set_viewplane_right(viewplane.right);
-  cam->set_viewplane_top(viewplane.top);
-  cam->set_viewplane_bottom(viewplane.bottom);
-
-  cam->set_full_width(width);
-  cam->set_full_height(height);
-
-  /* panorama sensor */
-  if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
-    float fit_xratio = (float)bcam->render_width * bcam->pixelaspect.x;
-    float fit_yratio = (float)bcam->render_height * bcam->pixelaspect.y;
-    bool horizontal_fit;
-    float sensor_size;
-
-    if (bcam->sensor_fit == BlenderCamera::AUTO) {
-      horizontal_fit = (fit_xratio > fit_yratio);
-      sensor_size = bcam->sensor_width;
-    }
-    else if (bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
-      horizontal_fit = true;
-      sensor_size = bcam->sensor_width;
-    }
-    else { /* vertical */
-      horizontal_fit = false;
-      sensor_size = bcam->sensor_height;
-    }
-
-    if (horizontal_fit) {
-      cam->set_sensorwidth(sensor_size);
-      cam->set_sensorheight(sensor_size * fit_yratio / fit_xratio);
-    }
-    else {
-      cam->set_sensorwidth(sensor_size * fit_xratio / fit_yratio);
-      cam->set_sensorheight(sensor_size);
-    }
-  }
-
-  /* clipping distances */
-  cam->set_nearclip(bcam->nearclip);
-  cam->set_farclip(bcam->farclip);
-
-  /* type */
-  cam->set_camera_type(bcam->type);
-
-  /* panorama */
-  cam->set_panorama_type(bcam->panorama_type);
-  cam->set_fisheye_fov(bcam->fisheye_fov);
-  cam->set_fisheye_lens(bcam->fisheye_lens);
-  cam->set_latitude_min(bcam->latitude_min);
-  cam->set_latitude_max(bcam->latitude_max);
-
-  cam->set_longitude_min(bcam->longitude_min);
-  cam->set_longitude_max(bcam->longitude_max);
-
-  /* panorama stereo */
-  cam->set_interocular_distance(bcam->interocular_distance);
-  cam->set_convergence_distance(bcam->convergence_distance);
-  cam->set_use_spherical_stereo(bcam->use_spherical_stereo);
-
-  if (cam->get_use_spherical_stereo()) {
-    if (strcmp(viewname, "left") == 0)
-      cam->set_stereo_eye(Camera::STEREO_LEFT);
-    else if (strcmp(viewname, "right") == 0)
-      cam->set_stereo_eye(Camera::STEREO_RIGHT);
-    else
-      cam->set_stereo_eye(Camera::STEREO_NONE);
-  }
-
-  cam->set_use_pole_merge(bcam->use_pole_merge);
-  cam->set_pole_merge_angle_from(bcam->pole_merge_angle_from);
-  cam->set_pole_merge_angle_to(bcam->pole_merge_angle_to);
-
-  /* anamorphic lens bokeh */
-  cam->set_aperture_ratio(bcam->aperture_ratio);
-
-  /* perspective */
-  cam->set_fov(2.0f * atanf((0.5f * sensor_size) / bcam->lens / aspectratio));
-  cam->set_focaldistance(bcam->focaldistance);
-  cam->set_aperturesize(bcam->aperturesize);
-  cam->set_blades(bcam->apertureblades);
-  cam->set_bladesrotation(bcam->aperturerotation);
-
-  /* transform */
-  cam->set_matrix(blender_camera_matrix(bcam->matrix, bcam->type, bcam->panorama_type));
-
-  array<Transform> motion;
-  motion.resize(bcam->motion_steps, cam->get_matrix());
-  cam->set_motion(motion);
-  cam->set_use_perspective_motion(false);
-
-  cam->set_shuttertime(bcam->shuttertime);
-  cam->set_fov_pre(cam->get_fov());
-  cam->set_fov_post(cam->get_fov());
-  cam->set_motion_position(bcam->motion_position);
-
-  cam->set_rolling_shutter_type(bcam->rolling_shutter_type);
-  cam->set_rolling_shutter_duration(bcam->rolling_shutter_duration);
-
-  cam->set_shutter_curve(bcam->shutter_curve);
-
-  /* border */
-  cam->set_border_left(bcam->border.left);
-  cam->set_border_right(bcam->border.right);
-  cam->set_border_top(bcam->border.top);
-  cam->set_border_bottom(bcam->border.bottom);
-
-  cam->set_viewport_camera_border_left(bcam->viewport_camera_border.left);
-  cam->set_viewport_camera_border_right(bcam->viewport_camera_border.right);
-  cam->set_viewport_camera_border_top(bcam->viewport_camera_border.top);
-  cam->set_viewport_camera_border_bottom(bcam->viewport_camera_border.bottom);
-
-  bcam->offscreen_dicing_scale = RNA_float_get(cscene, "offscreen_dicing_scale");
-  cam->set_offscreen_dicing_scale(bcam->offscreen_dicing_scale);
-}
-
-/* Sync Render Camera */
-
-void BlenderSync::sync_camera(BL::RenderSettings &b_render,
-                              BL::Object &b_override,
-                              int width,
-                              int height,
-                              const char *viewname)
-{
-  BlenderCamera bcam;
-  blender_camera_init(&bcam, b_render);
-
-  /* pixel aspect */
-  bcam.pixelaspect.x = b_render.pixel_aspect_x();
-  bcam.pixelaspect.y = b_render.pixel_aspect_y();
-  bcam.shuttertime = b_render.motion_blur_shutter();
-
-  BL::CurveMapping b_shutter_curve(b_render.motion_blur_shutter_curve());
-  curvemapping_to_array(b_shutter_curve, bcam.shutter_curve, RAMP_TABLE_SIZE);
-
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  bcam.motion_position = (Camera::MotionPosition)get_enum(cscene,
-                                                          "motion_blur_position",
-                                                          Camera::MOTION_NUM_POSITIONS,
-                                                          Camera::MOTION_POSITION_CENTER);
-  bcam.rolling_shutter_type = (Camera::RollingShutterType)get_enum(
-      cscene,
-      "rolling_shutter_type",
-      Camera::ROLLING_SHUTTER_NUM_TYPES,
-      Camera::ROLLING_SHUTTER_NONE);
-  bcam.rolling_shutter_duration = RNA_float_get(&cscene, "rolling_shutter_duration");
-
-  /* border */
-  if (b_render.use_border()) {
-    bcam.border.left = b_render.border_min_x();
-    bcam.border.right = b_render.border_max_x();
-    bcam.border.bottom = b_render.border_min_y();
-    bcam.border.top = b_render.border_max_y();
-  }
-
-  /* camera object */
-  BL::Object b_ob = b_scene.camera();
-
-  if (b_override)
-    b_ob = b_override;
-
-  if (b_ob) {
-    BL::Array<float, 16> b_ob_matrix;
-    blender_camera_from_object(&bcam, b_engine, b_ob);
-    b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
-    bcam.matrix = get_transform(b_ob_matrix);
-  }
-
-  /* sync */
-  Camera *cam = scene->camera;
-  blender_camera_sync(cam, &bcam, width, height, viewname, &cscene);
-
-  /* dicing camera */
-  b_ob = BL::Object(RNA_pointer_get(&cscene, "dicing_camera"));
-  if (b_ob) {
-    BL::Array<float, 16> b_ob_matrix;
-    blender_camera_from_object(&bcam, b_engine, b_ob);
-    b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
-    bcam.matrix = get_transform(b_ob_matrix);
-
-    blender_camera_sync(scene->dicing_camera, &bcam, width, height, viewname, &cscene);
-  }
-  else {
-    *scene->dicing_camera = *cam;
-  }
-}
-
-void BlenderSync::sync_camera_motion(
-    BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time)
-{
-  if (!b_ob)
-    return;
-
-  Camera *cam = scene->camera;
-  BL::Array<float, 16> b_ob_matrix;
-  b_engine.camera_model_matrix(b_ob, cam->get_use_spherical_stereo(), b_ob_matrix);
-  Transform tfm = get_transform(b_ob_matrix);
-  tfm = blender_camera_matrix(tfm, cam->get_camera_type(), cam->get_panorama_type());
-
-  if (motion_time == 0.0f) {
-    /* When motion blur is not centered in frame, cam->matrix gets reset. */
-    cam->set_matrix(tfm);
-  }
-
-  /* Set transform in motion array. */
-  int motion_step = cam->motion_step(motion_time);
-  if (motion_step >= 0) {
-    array<Transform> motion = cam->get_motion();
-    motion[motion_step] = tfm;
-    cam->set_motion(motion);
-  }
-
-  if (cam->get_camera_type() == CAMERA_PERSPECTIVE) {
-    BlenderCamera bcam;
-    float aspectratio, sensor_size;
-    blender_camera_init(&bcam, b_render);
-
-    /* TODO(sergey): Consider making it a part of blender_camera_init(). */
-    bcam.pixelaspect.x = b_render.pixel_aspect_x();
-    bcam.pixelaspect.y = b_render.pixel_aspect_y();
-
-    blender_camera_from_object(&bcam, b_engine, b_ob);
-    blender_camera_viewplane(&bcam, width, height, NULL, &aspectratio, &sensor_size);
-    /* TODO(sergey): De-duplicate calculation with camera sync. */
-    float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
-    if (fov != cam->get_fov()) {
-      VLOG(1) << "Camera " << b_ob.name() << " FOV change detected.";
-      if (motion_time == 0.0f) {
-        cam->set_fov(fov);
-      }
-      else if (motion_time == -1.0f) {
-        cam->set_fov_pre(fov);
-        cam->set_use_perspective_motion(true);
-      }
-      else if (motion_time == 1.0f) {
-        cam->set_fov_post(fov);
-        cam->set_use_perspective_motion(true);
-      }
-    }
-  }
-}
-
-/* Sync 3D View Camera */
-
-static void blender_camera_view_subset(BL::RenderEngine &b_engine,
-                                       BL::RenderSettings &b_render,
-                                       BL::Scene &b_scene,
-                                       BL::Object &b_ob,
-                                       BL::SpaceView3D &b_v3d,
-                                       BL::RegionView3D &b_rv3d,
-                                       int width,
-                                       int height,
-                                       BoundBox2D *view_box,
-                                       BoundBox2D *cam_box,
-                                       float *view_aspect);
-
-static void blender_camera_from_view(BlenderCamera *bcam,
-                                     BL::RenderEngine &b_engine,
-                                     BL::Scene &b_scene,
-                                     BL::SpaceView3D &b_v3d,
-                                     BL::RegionView3D &b_rv3d,
-                                     int width,
-                                     int height,
-                                     bool skip_panorama = false)
-{
-  /* 3d view parameters */
-  bcam->nearclip = b_v3d.clip_start();
-  bcam->farclip = b_v3d.clip_end();
-  bcam->lens = b_v3d.lens();
-  bcam->shuttertime = b_scene.render().motion_blur_shutter();
-
-  BL::CurveMapping b_shutter_curve(b_scene.render().motion_blur_shutter_curve());
-  curvemapping_to_array(b_shutter_curve, bcam->shutter_curve, RAMP_TABLE_SIZE);
-
-  if (b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA) {
-    /* camera view */
-    BL::Object b_ob = (b_v3d.use_local_camera()) ? b_v3d.camera() : b_scene.camera();
-
-    if (b_ob) {
-      blender_camera_from_object(bcam, b_engine, b_ob, skip_panorama);
-
-      if (!skip_panorama && bcam->type == CAMERA_PANORAMA) {
-        /* in panorama camera view, we map viewplane to camera border */
-        BoundBox2D view_box, cam_box;
-        float view_aspect;
-
-        BL::RenderSettings b_render_settings(b_scene.render());
-        blender_camera_view_subset(b_engine,
-                                   b_render_settings,
-                                   b_scene,
-                                   b_ob,
-                                   b_v3d,
-                                   b_rv3d,
-                                   width,
-                                   height,
-                                   &view_box,
-                                   &cam_box,
-                                   &view_aspect);
-
-        bcam->pano_viewplane = view_box.make_relative_to(cam_box);
-        bcam->pano_aspectratio = view_aspect;
-      }
-      else {
-        /* magic zoom formula */
-        bcam->zoom = (float)b_rv3d.view_camera_zoom();
-        bcam->zoom = (1.41421f + bcam->zoom / 50.0f);
-        bcam->zoom *= bcam->zoom;
-        bcam->zoom = 2.0f / bcam->zoom;
-
-        /* offset */
-        bcam->offset = get_float2(b_rv3d.view_camera_offset());
-      }
-    }
-  }
-  else if (b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_ORTHO) {
-    /* orthographic view */
-    bcam->farclip *= 0.5f;
-    bcam->nearclip = -bcam->farclip;
-
-    float sensor_size;
-    if (bcam->sensor_fit == BlenderCamera::VERTICAL)
-      sensor_size = bcam->sensor_height;
-    else
-      sensor_size = bcam->sensor_width;
-
-    bcam->type = CAMERA_ORTHOGRAPHIC;
-    bcam->ortho_scale = b_rv3d.view_distance() * sensor_size / b_v3d.lens();
-  }
-
-  bcam->zoom *= 2.0f;
-
-  /* 3d view transform */
-  bcam->matrix = transform_inverse(get_transform(b_rv3d.view_matrix()));
-
-  /* dimensions */
-  bcam->full_width = width;
-  bcam->full_height = height;
-}
-
-static void blender_camera_view_subset(BL::RenderEngine &b_engine,
-                                       BL::RenderSettings &b_render,
-                                       BL::Scene &b_scene,
-                                       BL::Object &b_ob,
-                                       BL::SpaceView3D &b_v3d,
-                                       BL::RegionView3D &b_rv3d,
-                                       int width,
-                                       int height,
-                                       BoundBox2D *view_box,
-                                       BoundBox2D *cam_box,
-                                       float *view_aspect)
-{
-  BoundBox2D cam, view;
-  float cam_aspect, sensor_size;
-
-  /* Get viewport viewplane. */
-  BlenderCamera view_bcam;
-  blender_camera_init(&view_bcam, b_render);
-  blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true);
-
-  blender_camera_viewplane(&view_bcam, width, height, &view, view_aspect, &sensor_size);
-
-  /* Get camera viewplane. */
-  BlenderCamera cam_bcam;
-  blender_camera_init(&cam_bcam, b_render);
-  blender_camera_from_object(&cam_bcam, b_engine, b_ob, true);
-
-  /* Camera border is affect by aspect, viewport is not. */
-  cam_bcam.pixelaspect.x = b_render.pixel_aspect_x();
-  cam_bcam.pixelaspect.y = b_render.pixel_aspect_y();
-
-  blender_camera_viewplane(
-      &cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size);
-
-  /* Return */
-  *view_box = view * (1.0f / *view_aspect);
-  *cam_box = cam * (1.0f / cam_aspect);
-}
-
-static void blender_camera_border_subset(BL::RenderEngine &b_engine,
-                                         BL::RenderSettings &b_render,
-                                         BL::Scene &b_scene,
-                                         BL::SpaceView3D &b_v3d,
-                                         BL::RegionView3D &b_rv3d,
-                                         BL::Object &b_ob,
-                                         int width,
-                                         int height,
-                                         const BoundBox2D &border,
-                                         BoundBox2D *result)
-{
-  /* Determine camera viewport subset. */
-  BoundBox2D view_box, cam_box;
-  float view_aspect;
-  blender_camera_view_subset(b_engine,
-                             b_render,
-                             b_scene,
-                             b_ob,
-                             b_v3d,
-                             b_rv3d,
-                             width,
-                             height,
-                             &view_box,
-                             &cam_box,
-                             &view_aspect);
-
-  /* Determine viewport subset matching given border. */
-  cam_box = cam_box.make_relative_to(view_box);
-  *result = cam_box.subset(border);
-}
-
-static void blender_camera_border(BlenderCamera *bcam,
-                                  BL::RenderEngine &b_engine,
-                                  BL::RenderSettings &b_render,
-                                  BL::Scene &b_scene,
-                                  BL::SpaceView3D &b_v3d,
-                                  BL::RegionView3D &b_rv3d,
-                                  int width,
-                                  int height)
-{
-  bool is_camera_view;
-
-  /* camera view? */
-  is_camera_view = b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA;
-
-  if (!is_camera_view) {
-    /* for non-camera view check whether render border is enabled for viewport
-     * and if so use border from 3d viewport
-     * assume viewport has got correctly clamped border already
-     */
-    if (b_v3d.use_render_border()) {
-      bcam->border.left = b_v3d.render_border_min_x();
-      bcam->border.right = b_v3d.render_border_max_x();
-      bcam->border.bottom = b_v3d.render_border_min_y();
-      bcam->border.top = b_v3d.render_border_max_y();
-    }
-    return;
-  }
-
-  BL::Object b_ob = (b_v3d.use_local_camera()) ? b_v3d.camera() : b_scene.camera();
-
-  if (!b_ob)
-    return;
-
-  /* Determine camera border inside the viewport. */
-  BoundBox2D full_border;
-  blender_camera_border_subset(b_engine,
-                               b_render,
-                               b_scene,
-                               b_v3d,
-                               b_rv3d,
-                               b_ob,
-                               width,
-                               height,
-                               full_border,
-                               &bcam->viewport_camera_border);
-
-  if (b_render.use_border()) {
-    bcam->border.left = b_render.border_min_x();
-    bcam->border.right = b_render.border_max_x();
-    bcam->border.bottom = b_render.border_min_y();
-    bcam->border.top = b_render.border_max_y();
-  }
-  else if (bcam->passepartout_alpha == 1.0f) {
-    bcam->border = full_border;
-  }
-  else {
-    return;
-  }
-
-  /* Determine viewport subset matching camera border. */
-  blender_camera_border_subset(b_engine,
-                               b_render,
-                               b_scene,
-                               b_v3d,
-                               b_rv3d,
-                               b_ob,
-                               width,
-                               height,
-                               bcam->border,
-                               &bcam->border);
-  bcam->border = bcam->border.clamp();
-}
-
-void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
-                            BL::RegionView3D &b_rv3d,
-                            int width,
-                            int height)
-{
-  BlenderCamera bcam;
-  BL::RenderSettings b_render_settings(b_scene.render());
-  blender_camera_init(&bcam, b_render_settings);
-  blender_camera_from_view(&bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height);
-  blender_camera_border(&bcam, b_engine, b_render_settings, b_scene, b_v3d, b_rv3d, width, height);
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  blender_camera_sync(scene->camera, &bcam, width, height, "", &cscene);
-
-  /* dicing camera */
-  BL::Object b_ob = BL::Object(RNA_pointer_get(&cscene, "dicing_camera"));
-  if (b_ob) {
-    BL::Array<float, 16> b_ob_matrix;
-    blender_camera_from_object(&bcam, b_engine, b_ob);
-    b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
-    bcam.matrix = get_transform(b_ob_matrix);
-
-    blender_camera_sync(scene->dicing_camera, &bcam, width, height, "", &cscene);
-  }
-  else {
-    *scene->dicing_camera = *scene->camera;
-  }
-}
-
-BufferParams BlenderSync::get_buffer_params(
-    BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height)
-{
-  BufferParams params;
-  bool use_border = false;
-
-  params.full_width = width;
-  params.full_height = height;
-
-  if (b_v3d && b_rv3d && b_rv3d.view_perspective() != BL::RegionView3D::view_perspective_CAMERA)
-    use_border = b_v3d.use_render_border();
-  else
-    /* the camera can always have a passepartout */
-    use_border = true;
-
-  if (use_border) {
-    /* border render */
-    /* the viewport may offset the border outside the view */
-    BoundBox2D border = cam->border.clamp();
-    params.full_x = (int)(border.left * (float)width);
-    params.full_y = (int)(border.bottom * (float)height);
-    params.width = (int)(border.right * (float)width) - params.full_x;
-    params.height = (int)(border.top * (float)height) - params.full_y;
-
-    /* survive in case border goes out of view or becomes too small */
-    params.width = max(params.width, 1);
-    params.height = max(params.height, 1);
-  }
-  else {
-    params.width = width;
-    params.height = height;
-  }
-
-  params.window_width = params.width;
-  params.window_height = params.height;
-
-  return params;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
deleted file mode 100644
index 84333faaa23..00000000000
--- a/intern/cycles/blender/blender_curves.cpp
+++ /dev/null
@@ -1,915 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/attribute.h"
-#include "scene/camera.h"
-#include "scene/curves.h"
-#include "scene/hair.h"
-#include "scene/object.h"
-#include "scene/scene.h"
-
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_color.h"
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-
-CCL_NAMESPACE_BEGIN
-
-ParticleCurveData::ParticleCurveData()
-{
-}
-
-ParticleCurveData::~ParticleCurveData()
-{
-}
-
-static float shaperadius(float shape, float root, float tip, float time)
-{
-  assert(time >= 0.0f);
-  assert(time <= 1.0f);
-  float radius = 1.0f - time;
-
-  if (shape != 0.0f) {
-    if (shape < 0.0f)
-      radius = powf(radius, 1.0f + shape);
-    else
-      radius = powf(radius, 1.0f / (1.0f - shape));
-  }
-  return (radius * (root - tip)) + tip;
-}
-
-/* curve functions */
-
-static bool ObtainCacheParticleData(
-    Hair *hair, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
-{
-  int curvenum = 0;
-  int keyno = 0;
-
-  if (!(hair && b_mesh && b_ob && CData))
-    return false;
-
-  Transform tfm = get_transform(b_ob->matrix_world());
-  Transform itfm = transform_quick_inverse(tfm);
-
-  for (BL::Modifier &b_mod : b_ob->modifiers) {
-    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
-        (background ? b_mod.show_render() : b_mod.show_viewport())) {
-      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
-      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
-      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
-
-      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
-          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
-        int shader = clamp(b_part.material() - 1, 0, hair->get_used_shaders().size() - 1);
-        int display_step = background ? b_part.render_step() : b_part.display_step();
-        int totparts = b_psys.particles.length();
-        int totchild = background ? b_psys.child_particles.length() :
-                                    (int)((float)b_psys.child_particles.length() *
-                                          (float)b_part.display_percentage() / 100.0f);
-        int totcurves = totchild;
-
-        if (b_part.child_type() == 0 || totchild == 0)
-          totcurves += totparts;
-
-        if (totcurves == 0)
-          continue;
-
-        int ren_step = (1 << display_step) + 1;
-        if (b_part.kink() == BL::ParticleSettings::kink_SPIRAL)
-          ren_step += b_part.kink_extra_steps();
-
-        CData->psys_firstcurve.push_back_slow(curvenum);
-        CData->psys_curvenum.push_back_slow(totcurves);
-        CData->psys_shader.push_back_slow(shader);
-
-        float radius = b_part.radius_scale() * 0.5f;
-
-        CData->psys_rootradius.push_back_slow(radius * b_part.root_radius());
-        CData->psys_tipradius.push_back_slow(radius * b_part.tip_radius());
-        CData->psys_shape.push_back_slow(b_part.shape());
-        CData->psys_closetip.push_back_slow(b_part.use_close_tip());
-
-        int pa_no = 0;
-        if (!(b_part.child_type() == 0) && totchild != 0)
-          pa_no = totparts;
-
-        int num_add = (totparts + totchild - pa_no);
-        CData->curve_firstkey.reserve(CData->curve_firstkey.size() + num_add);
-        CData->curve_keynum.reserve(CData->curve_keynum.size() + num_add);
-        CData->curve_length.reserve(CData->curve_length.size() + num_add);
-        CData->curvekey_co.reserve(CData->curvekey_co.size() + num_add * ren_step);
-        CData->curvekey_time.reserve(CData->curvekey_time.size() + num_add * ren_step);
-
-        for (; pa_no < totparts + totchild; pa_no++) {
-          int keynum = 0;
-          CData->curve_firstkey.push_back_slow(keyno);
-
-          float curve_length = 0.0f;
-          float3 prev_co_world = zero_float3();
-          float3 prev_co_object = zero_float3();
-          for (int step_no = 0; step_no < ren_step; step_no++) {
-            float3 co_world = prev_co_world;
-            b_psys.co_hair(*b_ob, pa_no, step_no, &co_world.x);
-            float3 co_object = transform_point(&itfm, co_world);
-            if (step_no > 0) {
-              const float step_length = len(co_object - prev_co_object);
-              curve_length += step_length;
-            }
-            CData->curvekey_co.push_back_slow(co_object);
-            CData->curvekey_time.push_back_slow(curve_length);
-            prev_co_object = co_object;
-            prev_co_world = co_world;
-            keynum++;
-          }
-          keyno += keynum;
-
-          CData->curve_keynum.push_back_slow(keynum);
-          CData->curve_length.push_back_slow(curve_length);
-          curvenum++;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
-static bool ObtainCacheParticleUV(Hair *hair,
-                                  BL::Mesh *b_mesh,
-                                  BL::Object *b_ob,
-                                  ParticleCurveData *CData,
-                                  bool background,
-                                  int uv_num)
-{
-  if (!(hair && b_mesh && b_ob && CData))
-    return false;
-
-  CData->curve_uv.clear();
-
-  for (BL::Modifier &b_mod : b_ob->modifiers) {
-    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
-        (background ? b_mod.show_render() : b_mod.show_viewport())) {
-      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
-      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
-      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
-
-      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
-          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
-        int totparts = b_psys.particles.length();
-        int totchild = background ? b_psys.child_particles.length() :
-                                    (int)((float)b_psys.child_particles.length() *
-                                          (float)b_part.display_percentage() / 100.0f);
-        int totcurves = totchild;
-
-        if (b_part.child_type() == 0 || totchild == 0)
-          totcurves += totparts;
-
-        if (totcurves == 0)
-          continue;
-
-        int pa_no = 0;
-        if (!(b_part.child_type() == 0) && totchild != 0)
-          pa_no = totparts;
-
-        int num_add = (totparts + totchild - pa_no);
-        CData->curve_uv.reserve(CData->curve_uv.size() + num_add);
-
-        BL::ParticleSystem::particles_iterator b_pa;
-        b_psys.particles.begin(b_pa);
-        for (; pa_no < totparts + totchild; pa_no++) {
-          /* Add UVs */
-          BL::Mesh::uv_layers_iterator l;
-          b_mesh->uv_layers.begin(l);
-
-          float2 uv = zero_float2();
-          if (b_mesh->uv_layers.length())
-            b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
-          CData->curve_uv.push_back_slow(uv);
-
-          if (pa_no < totparts && b_pa != b_psys.particles.end())
-            ++b_pa;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
-static bool ObtainCacheParticleVcol(Hair *hair,
-                                    BL::Mesh *b_mesh,
-                                    BL::Object *b_ob,
-                                    ParticleCurveData *CData,
-                                    bool background,
-                                    int vcol_num)
-{
-  if (!(hair && b_mesh && b_ob && CData))
-    return false;
-
-  CData->curve_vcol.clear();
-
-  for (BL::Modifier &b_mod : b_ob->modifiers) {
-    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
-        (background ? b_mod.show_render() : b_mod.show_viewport())) {
-      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
-      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
-      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
-
-      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
-          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
-        int totparts = b_psys.particles.length();
-        int totchild = background ? b_psys.child_particles.length() :
-                                    (int)((float)b_psys.child_particles.length() *
-                                          (float)b_part.display_percentage() / 100.0f);
-        int totcurves = totchild;
-
-        if (b_part.child_type() == 0 || totchild == 0)
-          totcurves += totparts;
-
-        if (totcurves == 0)
-          continue;
-
-        int pa_no = 0;
-        if (!(b_part.child_type() == 0) && totchild != 0)
-          pa_no = totparts;
-
-        int num_add = (totparts + totchild - pa_no);
-        CData->curve_vcol.reserve(CData->curve_vcol.size() + num_add);
-
-        BL::ParticleSystem::particles_iterator b_pa;
-        b_psys.particles.begin(b_pa);
-        for (; pa_no < totparts + totchild; pa_no++) {
-          /* Add vertex colors */
-          BL::Mesh::vertex_colors_iterator l;
-          b_mesh->vertex_colors.begin(l);
-
-          float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-          if (b_mesh->vertex_colors.length())
-            b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
-          CData->curve_vcol.push_back_slow(vcol);
-
-          if (pa_no < totparts && b_pa != b_psys.particles.end())
-            ++b_pa;
-        }
-      }
-    }
-  }
-
-  return true;
-}
-
-static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CData)
-{
-  int num_keys = 0;
-  int num_curves = 0;
-
-  if (hair->num_curves())
-    return;
-
-  Attribute *attr_intercept = NULL;
-  Attribute *attr_length = NULL;
-  Attribute *attr_random = NULL;
-
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
-    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
-    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
-    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
-
-  /* compute and reserve size of arrays */
-  for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-    for (int curve = CData->psys_firstcurve[sys];
-         curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
-         curve++) {
-      num_keys += CData->curve_keynum[curve];
-      num_curves++;
-    }
-  }
-
-  if (num_curves > 0) {
-    VLOG(1) << "Exporting curve segments for mesh " << hair->name;
-  }
-
-  hair->reserve_curves(hair->num_curves() + num_curves, hair->get_curve_keys().size() + num_keys);
-
-  num_keys = 0;
-  num_curves = 0;
-
-  /* actually export */
-  for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-    for (int curve = CData->psys_firstcurve[sys];
-         curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
-         curve++) {
-      size_t num_curve_keys = 0;
-
-      for (int curvekey = CData->curve_firstkey[curve];
-           curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
-           curvekey++) {
-        const float3 ickey_loc = CData->curvekey_co[curvekey];
-        const float curve_time = CData->curvekey_time[curvekey];
-        const float curve_length = CData->curve_length[curve];
-        const float time = (curve_length > 0.0f) ? curve_time / curve_length : 0.0f;
-        float radius = shaperadius(
-            CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
-        if (CData->psys_closetip[sys] &&
-            (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) {
-          radius = 0.0f;
-        }
-        hair->add_curve_key(ickey_loc, radius);
-        if (attr_intercept)
-          attr_intercept->add(time);
-
-        num_curve_keys++;
-      }
-
-      if (attr_length != NULL) {
-        attr_length->add(CData->curve_length[curve]);
-      }
-
-      if (attr_random != NULL) {
-        attr_random->add(hash_uint2_to_float(num_curves, 0));
-      }
-
-      hair->add_curve(num_keys, CData->psys_shader[sys]);
-      num_keys += num_curve_keys;
-      num_curves++;
-    }
-  }
-
-  /* check allocation */
-  if ((hair->get_curve_keys().size() != num_keys) || (hair->num_curves() != num_curves)) {
-    VLOG(1) << "Allocation failed, clearing data";
-    hair->clear(true);
-  }
-}
-
-static float4 CurveSegmentMotionCV(ParticleCurveData *CData, int sys, int curve, int curvekey)
-{
-  const float3 ickey_loc = CData->curvekey_co[curvekey];
-  const float curve_time = CData->curvekey_time[curvekey];
-  const float curve_length = CData->curve_length[curve];
-  float time = (curve_length > 0.0f) ? curve_time / curve_length : 0.0f;
-  float radius = shaperadius(
-      CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
-
-  if (CData->psys_closetip[sys] &&
-      (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
-    radius = 0.0f;
-
-  /* curve motion keys store both position and radius in float4 */
-  float4 mP = float3_to_float4(ickey_loc);
-  mP.w = radius;
-  return mP;
-}
-
-static float4 LerpCurveSegmentMotionCV(ParticleCurveData *CData, int sys, int curve, float step)
-{
-  assert(step >= 0.0f);
-  assert(step <= 1.0f);
-  const int first_curve_key = CData->curve_firstkey[curve];
-  const float curve_key_f = step * (CData->curve_keynum[curve] - 1);
-  int curvekey = (int)floorf(curve_key_f);
-  const float remainder = curve_key_f - curvekey;
-  if (remainder == 0.0f) {
-    return CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey);
-  }
-  int curvekey2 = curvekey + 1;
-  if (curvekey2 >= (CData->curve_keynum[curve] - 1)) {
-    curvekey2 = (CData->curve_keynum[curve] - 1);
-    curvekey = curvekey2 - 1;
-  }
-  const float4 mP = CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey);
-  const float4 mP2 = CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey2);
-  return lerp(mP, mP2, remainder);
-}
-
-static void export_hair_motion_validate_attribute(Hair *hair,
-                                                  int motion_step,
-                                                  int num_motion_keys,
-                                                  bool have_motion)
-{
-  Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  const int num_keys = hair->get_curve_keys().size();
-
-  if (num_motion_keys != num_keys || !have_motion) {
-    /* No motion or hair "topology" changed, remove attributes again. */
-    if (num_motion_keys != num_keys) {
-      VLOG(1) << "Hair topology changed, removing attribute.";
-    }
-    else {
-      VLOG(1) << "No motion, removing attribute.";
-    }
-    hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
-  }
-  else if (motion_step > 0) {
-    VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
-
-    /* Motion, fill up previous steps that we might have skipped because
-     * they had no motion, but we need them anyway now. */
-    for (int step = 0; step < motion_step; step++) {
-      float4 *mP = attr_mP->data_float4() + step * num_keys;
-
-      for (int key = 0; key < num_keys; key++) {
-        mP[key] = float3_to_float4(hair->get_curve_keys()[key]);
-        mP[key].w = hair->get_curve_radius()[key];
-      }
-    }
-  }
-}
-
-static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
-{
-  VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
-          << motion_step;
-
-  /* find attribute */
-  Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  bool new_attribute = false;
-
-  /* add new attribute if it doesn't exist already */
-  if (!attr_mP) {
-    VLOG(1) << "Creating new motion vertex position attribute";
-    attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
-    new_attribute = true;
-  }
-
-  /* export motion vectors for curve keys */
-  size_t numkeys = hair->get_curve_keys().size();
-  float4 *mP = attr_mP->data_float4() + motion_step * numkeys;
-  bool have_motion = false;
-  int i = 0;
-  int num_curves = 0;
-
-  for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
-    for (int curve = CData->psys_firstcurve[sys];
-         curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
-         curve++) {
-      /* Curve lengths may not match! Curves can be clipped. */
-      int curve_key_end = (num_curves + 1 < (int)hair->get_curve_first_key().size() ?
-                               hair->get_curve_first_key()[num_curves + 1] :
-                               (int)hair->get_curve_keys().size());
-      const int num_center_curve_keys = curve_key_end - hair->get_curve_first_key()[num_curves];
-      const int is_num_keys_different = CData->curve_keynum[curve] - num_center_curve_keys;
-
-      if (!is_num_keys_different) {
-        for (int curvekey = CData->curve_firstkey[curve];
-             curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
-             curvekey++) {
-          if (i < hair->get_curve_keys().size()) {
-            mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey);
-            if (!have_motion) {
-              /* unlike mesh coordinates, these tend to be slightly different
-               * between frames due to particle transforms into/out of object
-               * space, so we use an epsilon to detect actual changes */
-              float4 curve_key = float3_to_float4(hair->get_curve_keys()[i]);
-              curve_key.w = hair->get_curve_radius()[i];
-              if (len_squared(mP[i] - curve_key) > 1e-5f * 1e-5f)
-                have_motion = true;
-            }
-          }
-          i++;
-        }
-      }
-      else {
-        /* Number of keys has changed. Generate an interpolated version
-         * to preserve motion blur. */
-        const float step_size = num_center_curve_keys > 1 ? 1.0f / (num_center_curve_keys - 1) :
-                                                            0.0f;
-        for (int step_index = 0; step_index < num_center_curve_keys; ++step_index) {
-          const float step = step_index * step_size;
-          mP[i] = LerpCurveSegmentMotionCV(CData, sys, curve, step);
-          i++;
-        }
-        have_motion = true;
-      }
-      num_curves++;
-    }
-  }
-
-  /* In case of new attribute, we verify if there really was any motion. */
-  if (new_attribute) {
-    export_hair_motion_validate_attribute(hair, motion_step, i, have_motion);
-  }
-}
-
-/* Hair Curve Sync */
-
-bool BlenderSync::object_has_particle_hair(BL::Object b_ob)
-{
-  /* Test if the object has a particle modifier with hair. */
-  for (BL::Modifier &b_mod : b_ob.modifiers) {
-    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
-        (preview ? b_mod.show_viewport() : b_mod.show_render())) {
-      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
-      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
-      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
-
-      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
-          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-/* Old particle hair. */
-void BlenderSync::sync_particle_hair(
-    Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step)
-{
-  if (!b_ob_info.is_real_object_data()) {
-    return;
-  }
-  BL::Object b_ob = b_ob_info.real_object;
-
-  /* obtain general settings */
-  if (b_ob.mode() == b_ob.mode_PARTICLE_EDIT || b_ob.mode() == b_ob.mode_EDIT) {
-    return;
-  }
-
-  /* Extract particle hair data - should be combined with connecting to mesh later. */
-
-  ParticleCurveData CData;
-
-  ObtainCacheParticleData(hair, &b_mesh, &b_ob, &CData, !preview);
-
-  /* add hair geometry */
-  if (motion)
-    ExportCurveSegmentsMotion(hair, &CData, motion_step);
-  else
-    ExportCurveSegments(scene, hair, &CData);
-
-  /* generated coordinates from first key. we should ideally get this from
-   * blender to handle deforming objects */
-  if (!motion) {
-    if (hair->need_attribute(scene, ATTR_STD_GENERATED)) {
-      float3 loc, size;
-      mesh_texture_space(b_mesh, loc, size);
-
-      Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED);
-      float3 *generated = attr_generated->data_float3();
-
-      for (size_t i = 0; i < hair->num_curves(); i++) {
-        float3 co = hair->get_curve_keys()[hair->get_curve(i).first_key];
-        generated[i] = co * size - loc;
-      }
-    }
-  }
-
-  /* create vertex color attributes */
-  if (!motion) {
-    BL::Mesh::vertex_colors_iterator l;
-    int vcol_num = 0;
-
-    for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l, vcol_num++) {
-      if (!hair->need_attribute(scene, ustring(l->name().c_str())))
-        continue;
-
-      ObtainCacheParticleVcol(hair, &b_mesh, &b_ob, &CData, !preview, vcol_num);
-
-      Attribute *attr_vcol = hair->attributes.add(
-          ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CURVE);
-
-      float4 *fdata = attr_vcol->data_float4();
-
-      if (fdata) {
-        size_t i = 0;
-
-        /* Encode vertex color using the sRGB curve. */
-        for (size_t curve = 0; curve < CData.curve_vcol.size(); curve++) {
-          fdata[i++] = color_srgb_to_linear_v4(CData.curve_vcol[curve]);
-        }
-      }
-    }
-  }
-
-  /* create UV attributes */
-  if (!motion) {
-    BL::Mesh::uv_layers_iterator l;
-    int uv_num = 0;
-
-    for (b_mesh.uv_layers.begin(l); l != b_mesh.uv_layers.end(); ++l, uv_num++) {
-      bool active_render = l->active_render();
-      AttributeStandard std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
-      ustring name = ustring(l->name().c_str());
-
-      /* UV map */
-      if (hair->need_attribute(scene, name) || hair->need_attribute(scene, std)) {
-        Attribute *attr_uv;
-
-        ObtainCacheParticleUV(hair, &b_mesh, &b_ob, &CData, !preview, uv_num);
-
-        if (active_render)
-          attr_uv = hair->attributes.add(std, name);
-        else
-          attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
-
-        float2 *uv = attr_uv->data_float2();
-
-        if (uv) {
-          size_t i = 0;
-
-          for (size_t curve = 0; curve < CData.curve_uv.size(); curve++) {
-            uv[i++] = CData.curve_uv[curve];
-          }
-        }
-      }
-    }
-  }
-}
-
-#ifdef WITH_HAIR_NODES
-static float4 hair_point_as_float4(BL::HairPoint b_point)
-{
-  float4 mP = float3_to_float4(get_float3(b_point.co()));
-  mP.w = b_point.radius();
-  return mP;
-}
-
-static float4 interpolate_hair_points(BL::Hair b_hair,
-                                      const int first_point_index,
-                                      const int num_points,
-                                      const float step)
-{
-  const float curve_t = step * (num_points - 1);
-  const int point_a = clamp((int)curve_t, 0, num_points - 1);
-  const int point_b = min(point_a + 1, num_points - 1);
-  const float t = curve_t - (float)point_a;
-  return lerp(hair_point_as_float4(b_hair.points[first_point_index + point_a]),
-              hair_point_as_float4(b_hair.points[first_point_index + point_b]),
-              t);
-}
-
-static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
-{
-  /* TODO: optimize so we can straight memcpy arrays from Blender? */
-
-  /* Add requested attributes. */
-  Attribute *attr_intercept = NULL;
-  Attribute *attr_length = NULL;
-  Attribute *attr_random = NULL;
-
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
-    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
-  }
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH)) {
-    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
-  }
-  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
-    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
-  }
-
-  /* Reserve memory. */
-  const int num_keys = b_hair.points.length();
-  const int num_curves = b_hair.curves.length();
-
-  if (num_curves > 0) {
-    VLOG(1) << "Exporting curve segments for hair " << hair->name;
-  }
-
-  hair->reserve_curves(num_curves, num_keys);
-
-  /* Export curves and points. */
-  vector<float> points_length;
-
-  for (BL::HairCurve &b_curve : b_hair.curves) {
-    const int first_point_index = b_curve.first_point_index();
-    const int num_points = b_curve.num_points();
-
-    float3 prev_co = zero_float3();
-    float length = 0.0f;
-    if (attr_intercept) {
-      points_length.clear();
-      points_length.reserve(num_points);
-    }
-
-    /* Position and radius. */
-    for (int i = 0; i < num_points; i++) {
-      BL::HairPoint b_point = b_hair.points[first_point_index + i];
-
-      const float3 co = get_float3(b_point.co());
-      const float radius = b_point.radius();
-      hair->add_curve_key(co, radius);
-
-      if (attr_intercept) {
-        if (i > 0) {
-          length += len(co - prev_co);
-          points_length.push_back(length);
-        }
-        prev_co = co;
-      }
-    }
-
-    /* Normalized 0..1 attribute along curve. */
-    if (attr_intercept) {
-      for (int i = 0; i < num_points; i++) {
-        attr_intercept->add((length == 0.0f) ? 0.0f : points_length[i] / length);
-      }
-    }
-
-    if (attr_length) {
-      attr_length->add(length);
-    }
-
-    /* Random number per curve. */
-    if (attr_random != NULL) {
-      attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
-    }
-
-    /* Curve. */
-    const int shader_index = 0;
-    hair->add_curve(first_point_index, shader_index);
-  }
-}
-
-static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
-{
-  VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
-          << motion_step;
-
-  /* Find or add attribute. */
-  Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  bool new_attribute = false;
-
-  if (!attr_mP) {
-    VLOG(1) << "Creating new motion vertex position attribute";
-    attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
-    new_attribute = true;
-  }
-
-  /* Export motion keys. */
-  const int num_keys = hair->get_curve_keys().size();
-  float4 *mP = attr_mP->data_float4() + motion_step * num_keys;
-  bool have_motion = false;
-  int num_motion_keys = 0;
-  int curve_index = 0;
-
-  for (BL::HairCurve &b_curve : b_hair.curves) {
-    const int first_point_index = b_curve.first_point_index();
-    const int num_points = b_curve.num_points();
-
-    Hair::Curve curve = hair->get_curve(curve_index);
-    curve_index++;
-
-    if (num_points == curve.num_keys) {
-      /* Number of keys matches. */
-      for (int i = 0; i < num_points; i++) {
-        int point_index = first_point_index + i;
-
-        if (point_index < num_keys) {
-          mP[num_motion_keys] = hair_point_as_float4(b_hair.points[point_index]);
-          num_motion_keys++;
-
-          if (!have_motion) {
-            /* TODO: use epsilon for comparison? Was needed for particles due to
-             * transform, but ideally should not happen anymore. */
-            float4 curve_key = float3_to_float4(hair->get_curve_keys()[i]);
-            curve_key.w = hair->get_curve_radius()[i];
-            have_motion = !(mP[i] == curve_key);
-          }
-        }
-      }
-    }
-    else {
-      /* Number of keys has changed. Generate an interpolated version
-       * to preserve motion blur. */
-      const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f;
-      for (int i = 0; i < curve.num_keys; i++) {
-        const float step = i * step_size;
-        mP[num_motion_keys] = interpolate_hair_points(b_hair, first_point_index, num_points, step);
-        num_motion_keys++;
-      }
-      have_motion = true;
-    }
-  }
-
-  /* In case of new attribute, we verify if there really was any motion. */
-  if (new_attribute) {
-    export_hair_motion_validate_attribute(hair, motion_step, num_motion_keys, have_motion);
-  }
-}
-
-/* Hair object. */
-void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
-{
-  /* Convert Blender hair to Cycles curves. */
-  BL::Hair b_hair(b_ob_info.object_data);
-  if (motion) {
-    export_hair_curves_motion(hair, b_hair, motion_step);
-  }
-  else {
-    export_hair_curves(scene, hair, b_hair);
-  }
-}
-#else
-void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
-{
-  (void)hair;
-  (void)b_ob_info;
-  (void)motion;
-  (void)motion_step;
-}
-#endif
-
-void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
-{
-  /* make a copy of the shaders as the caller in the main thread still need them for syncing the
-   * attributes */
-  array<Node *> used_shaders = hair->get_used_shaders();
-
-  Hair new_hair;
-  new_hair.set_used_shaders(used_shaders);
-
-  if (view_layer.use_hair) {
-    if (b_ob_info.object_data.is_a(&RNA_Hair)) {
-      /* Hair object. */
-      sync_hair(&new_hair, b_ob_info, false);
-    }
-    else {
-      /* Particle hair. */
-      bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
-      BL::Mesh b_mesh = object_to_mesh(
-          b_data, b_ob_info, b_depsgraph, need_undeformed, Mesh::SUBDIVISION_NONE);
-
-      if (b_mesh) {
-        sync_particle_hair(&new_hair, b_mesh, b_ob_info, false);
-        free_object_to_mesh(b_data, b_ob_info, b_mesh);
-      }
-    }
-  }
-
-  /* update original sockets */
-
-  for (const SocketType &socket : new_hair.type->inputs) {
-    /* Those sockets are updated in sync_object, so do not modify them. */
-    if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
-        socket.name == "used_shaders") {
-      continue;
-    }
-    hair->set_value(socket, new_hair, socket);
-  }
-
-  hair->attributes.update(std::move(new_hair.attributes));
-
-  /* tag update */
-
-  /* Compares curve_keys rather than strands in order to handle quick hair
-   * adjustments in dynamic BVH - other methods could probably do this better. */
-  const bool rebuild = (hair->curve_keys_is_modified() || hair->curve_radius_is_modified());
-
-  hair->tag_update(scene, rebuild);
-}
-
-void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
-                                   BObjectInfo &b_ob_info,
-                                   Hair *hair,
-                                   int motion_step)
-{
-  /* Skip if nothing exported. */
-  if (hair->num_keys() == 0) {
-    return;
-  }
-
-  /* Export deformed coordinates. */
-  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
-    if (b_ob_info.object_data.is_a(&RNA_Hair)) {
-      /* Hair object. */
-      sync_hair(hair, b_ob_info, true, motion_step);
-      return;
-    }
-    else {
-      /* Particle hair. */
-      BL::Mesh b_mesh = object_to_mesh(
-          b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
-      if (b_mesh) {
-        sync_particle_hair(hair, b_mesh, b_ob_info, true, motion_step);
-        free_object_to_mesh(b_data, b_ob_info, b_mesh);
-        return;
-      }
-    }
-  }
-
-  /* No deformation on this frame, copy coordinates if other frames did have it. */
-  hair->copy_center_to_motion_step(motion_step);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp
deleted file mode 100644
index 7bed33855c2..00000000000
--- a/intern/cycles/blender/blender_device.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_device.h"
-#include "blender/blender_session.h"
-#include "blender/blender_util.h"
-
-#include "util/util_foreach.h"
-
-CCL_NAMESPACE_BEGIN
-
-enum ComputeDevice {
-  COMPUTE_DEVICE_CPU = 0,
-  COMPUTE_DEVICE_CUDA = 1,
-  COMPUTE_DEVICE_OPTIX = 3,
-  COMPUTE_DEVICE_HIP = 4,
-
-  COMPUTE_DEVICE_NUM
-};
-
-int blender_device_threads(BL::Scene &b_scene)
-{
-  BL::RenderSettings b_r = b_scene.render();
-
-  if (b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
-    return b_r.threads();
-  else
-    return 0;
-}
-
-DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scene, bool background)
-{
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-  /* Find cycles preferences. */
-  PointerRNA cpreferences;
-  for (BL::Addon &b_addon : b_preferences.addons) {
-    if (b_addon.module() == "cycles") {
-      cpreferences = b_addon.preferences().ptr;
-      break;
-    }
-  }
-
-  /* Default to CPU device. */
-  DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front();
-
-  if (BlenderSession::device_override != DEVICE_MASK_ALL) {
-    vector<DeviceInfo> devices = Device::available_devices(BlenderSession::device_override);
-
-    if (devices.empty()) {
-      device = Device::dummy_device("Found no Cycles device of the specified type");
-    }
-    else {
-      int threads = blender_device_threads(b_scene);
-      device = Device::get_multi_device(devices, threads, background);
-    }
-  }
-  else if (get_enum(cscene, "device") == 1) {
-    /* Test if we are using GPU devices. */
-    ComputeDevice compute_device = (ComputeDevice)get_enum(
-        cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
-
-    if (compute_device != COMPUTE_DEVICE_CPU) {
-      /* Query GPU devices with matching types. */
-      uint mask = DEVICE_MASK_CPU;
-      if (compute_device == COMPUTE_DEVICE_CUDA) {
-        mask |= DEVICE_MASK_CUDA;
-      }
-      else if (compute_device == COMPUTE_DEVICE_OPTIX) {
-        mask |= DEVICE_MASK_OPTIX;
-      }
-      else if (compute_device == COMPUTE_DEVICE_HIP) {
-        mask |= DEVICE_MASK_HIP;
-      }
-      vector<DeviceInfo> devices = Device::available_devices(mask);
-
-      /* Match device preferences and available devices. */
-      vector<DeviceInfo> used_devices;
-      RNA_BEGIN (&cpreferences, device, "devices") {
-        if (get_boolean(device, "use")) {
-          string id = get_string(device, "id");
-          foreach (DeviceInfo &info, devices) {
-            if (info.id == id) {
-              used_devices.push_back(info);
-              break;
-            }
-          }
-        }
-      }
-      RNA_END;
-
-      if (!used_devices.empty()) {
-        int threads = blender_device_threads(b_scene);
-        device = Device::get_multi_device(used_devices, threads, background);
-      }
-      /* Else keep using the CPU device that was set before. */
-    }
-  }
-
-  if (!get_boolean(cpreferences, "peer_memory")) {
-    device.has_peer_memory = false;
-  }
-
-  return device;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_device.h b/intern/cycles/blender/blender_device.h
deleted file mode 100644
index 8d2ecac7483..00000000000
--- a/intern/cycles/blender/blender_device.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_DEVICE_H__
-#define __BLENDER_DEVICE_H__
-
-#include "MEM_guardedalloc.h"
-#include "RNA_access.h"
-#include "RNA_blender_cpp.h"
-#include "RNA_types.h"
-
-#include "device/device.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Get number of threads to use for rendering. */
-int blender_device_threads(BL::Scene &b_scene);
-
-/* Convert Blender settings to device specification. */
-DeviceInfo blender_device_info(BL::Preferences &b_preferences,
-                               BL::Scene &b_scene,
-                               bool background);
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_DEVICE_H__ */
diff --git a/intern/cycles/blender/blender_display_driver.cpp b/intern/cycles/blender/blender_display_driver.cpp
deleted file mode 100644
index cdf175f91d0..00000000000
--- a/intern/cycles/blender/blender_display_driver.cpp
+++ /dev/null
@@ -1,771 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_display_driver.h"
-
-#include "device/device.h"
-#include "util/util_logging.h"
-#include "util/util_opengl.h"
-
-extern "C" {
-struct RenderEngine;
-
-bool RE_engine_has_render_context(struct RenderEngine *engine);
-void RE_engine_render_context_enable(struct RenderEngine *engine);
-void RE_engine_render_context_disable(struct RenderEngine *engine);
-
-bool DRW_opengl_context_release();
-void DRW_opengl_context_activate(bool drw_state);
-
-void *WM_opengl_context_create();
-void WM_opengl_context_activate(void *gl_context);
-void WM_opengl_context_dispose(void *gl_context);
-void WM_opengl_context_release(void *context);
-}
-
-CCL_NAMESPACE_BEGIN
-
-/* --------------------------------------------------------------------
- * BlenderDisplayShader.
- */
-
-unique_ptr<BlenderDisplayShader> BlenderDisplayShader::create(BL::RenderEngine &b_engine,
-                                                              BL::Scene &b_scene)
-{
-  if (b_engine.support_display_space_shader(b_scene)) {
-    return make_unique<BlenderDisplaySpaceShader>(b_engine, b_scene);
-  }
-
-  return make_unique<BlenderFallbackDisplayShader>();
-}
-
-int BlenderDisplayShader::get_position_attrib_location()
-{
-  if (position_attribute_location_ == -1) {
-    const uint shader_program = get_shader_program();
-    position_attribute_location_ = glGetAttribLocation(shader_program, position_attribute_name);
-  }
-  return position_attribute_location_;
-}
-
-int BlenderDisplayShader::get_tex_coord_attrib_location()
-{
-  if (tex_coord_attribute_location_ == -1) {
-    const uint shader_program = get_shader_program();
-    tex_coord_attribute_location_ = glGetAttribLocation(shader_program, tex_coord_attribute_name);
-  }
-  return tex_coord_attribute_location_;
-}
-
-/* --------------------------------------------------------------------
- * BlenderFallbackDisplayShader.
- */
-
-/* TODO move shaders to standalone .glsl file. */
-static const char *FALLBACK_VERTEX_SHADER =
-    "#version 330\n"
-    "uniform vec2 fullscreen;\n"
-    "in vec2 texCoord;\n"
-    "in vec2 pos;\n"
-    "out vec2 texCoord_interp;\n"
-    "\n"
-    "vec2 normalize_coordinates()\n"
-    "{\n"
-    "   return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
-    "}\n"
-    "\n"
-    "void main()\n"
-    "{\n"
-    "   gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
-    "   texCoord_interp = texCoord;\n"
-    "}\n\0";
-
-static const char *FALLBACK_FRAGMENT_SHADER =
-    "#version 330\n"
-    "uniform sampler2D image_texture;\n"
-    "in vec2 texCoord_interp;\n"
-    "out vec4 fragColor;\n"
-    "\n"
-    "void main()\n"
-    "{\n"
-    "   fragColor = texture(image_texture, texCoord_interp);\n"
-    "}\n\0";
-
-static void shader_print_errors(const char *task, const char *log, const char *code)
-{
-  LOG(ERROR) << "Shader: " << task << " error:";
-  LOG(ERROR) << "===== shader string ====";
-
-  stringstream stream(code);
-  string partial;
-
-  int line = 1;
-  while (getline(stream, partial, '\n')) {
-    if (line < 10) {
-      LOG(ERROR) << " " << line << " " << partial;
-    }
-    else {
-      LOG(ERROR) << line << " " << partial;
-    }
-    line++;
-  }
-  LOG(ERROR) << log;
-}
-
-static int compile_fallback_shader(void)
-{
-  const struct Shader {
-    const char *source;
-    const GLenum type;
-  } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
-                  {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
-
-  const GLuint program = glCreateProgram();
-
-  for (int i = 0; i < 2; i++) {
-    const GLuint shader = glCreateShader(shaders[i].type);
-
-    string source_str = shaders[i].source;
-    const char *c_str = source_str.c_str();
-
-    glShaderSource(shader, 1, &c_str, NULL);
-    glCompileShader(shader);
-
-    GLint compile_status;
-    glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
-
-    if (!compile_status) {
-      GLchar log[5000];
-      GLsizei length = 0;
-      glGetShaderInfoLog(shader, sizeof(log), &length, log);
-      shader_print_errors("compile", log, c_str);
-      return 0;
-    }
-
-    glAttachShader(program, shader);
-  }
-
-  /* Link output. */
-  glBindFragDataLocation(program, 0, "fragColor");
-
-  /* Link and error check. */
-  glLinkProgram(program);
-
-  /* TODO(sergey): Find a way to nicely de-duplicate the error checking. */
-  GLint link_status;
-  glGetProgramiv(program, GL_LINK_STATUS, &link_status);
-  if (!link_status) {
-    GLchar log[5000];
-    GLsizei length = 0;
-    /* TODO(sergey): Is it really program passed to glGetShaderInfoLog? */
-    glGetShaderInfoLog(program, sizeof(log), &length, log);
-    shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
-    shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
-    return 0;
-  }
-
-  return program;
-}
-
-void BlenderFallbackDisplayShader::bind(int width, int height)
-{
-  create_shader_if_needed();
-
-  if (!shader_program_) {
-    return;
-  }
-
-  glUseProgram(shader_program_);
-  glUniform1i(image_texture_location_, 0);
-  glUniform2f(fullscreen_location_, width, height);
-}
-
-void BlenderFallbackDisplayShader::unbind()
-{
-}
-
-uint BlenderFallbackDisplayShader::get_shader_program()
-{
-  return shader_program_;
-}
-
-void BlenderFallbackDisplayShader::create_shader_if_needed()
-{
-  if (shader_program_ || shader_compile_attempted_) {
-    return;
-  }
-
-  shader_compile_attempted_ = true;
-
-  shader_program_ = compile_fallback_shader();
-  if (!shader_program_) {
-    return;
-  }
-
-  glUseProgram(shader_program_);
-
-  image_texture_location_ = glGetUniformLocation(shader_program_, "image_texture");
-  if (image_texture_location_ < 0) {
-    LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
-    destroy_shader();
-    return;
-  }
-
-  fullscreen_location_ = glGetUniformLocation(shader_program_, "fullscreen");
-  if (fullscreen_location_ < 0) {
-    LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
-    destroy_shader();
-    return;
-  }
-}
-
-void BlenderFallbackDisplayShader::destroy_shader()
-{
-  glDeleteProgram(shader_program_);
-  shader_program_ = 0;
-}
-
-/* --------------------------------------------------------------------
- * BlenderDisplaySpaceShader.
- */
-
-BlenderDisplaySpaceShader::BlenderDisplaySpaceShader(BL::RenderEngine &b_engine,
-                                                     BL::Scene &b_scene)
-    : b_engine_(b_engine), b_scene_(b_scene)
-{
-  DCHECK(b_engine_.support_display_space_shader(b_scene_));
-}
-
-void BlenderDisplaySpaceShader::bind(int /*width*/, int /*height*/)
-{
-  b_engine_.bind_display_space_shader(b_scene_);
-}
-
-void BlenderDisplaySpaceShader::unbind()
-{
-  b_engine_.unbind_display_space_shader();
-}
-
-uint BlenderDisplaySpaceShader::get_shader_program()
-{
-  if (!shader_program_) {
-    glGetIntegerv(GL_CURRENT_PROGRAM, reinterpret_cast<int *>(&shader_program_));
-  }
-
-  if (!shader_program_) {
-    LOG(ERROR) << "Error retrieving shader program for display space shader.";
-  }
-
-  return shader_program_;
-}
-
-/* --------------------------------------------------------------------
- * BlenderDisplayDriver.
- */
-
-BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
-    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
-{
-  /* Create context while on the main thread. */
-  gl_context_create();
-}
-
-BlenderDisplayDriver::~BlenderDisplayDriver()
-{
-  gl_resources_destroy();
-}
-
-/* --------------------------------------------------------------------
- * Update procedure.
- */
-
-bool BlenderDisplayDriver::update_begin(const Params &params,
-                                        int texture_width,
-                                        int texture_height)
-{
-  /* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
-   * the texture does not happen at the same time. This is achieved indirectly.
-   *
-   * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
-   * This same lock is also held when do_draw() is called, which together ensure mutual
-   * exclusion.
-   *
-   * This locking is not performed on the Cycles side, because that would cause lock inversion. */
-  if (!gl_context_enable()) {
-    return false;
-  }
-
-  if (gl_render_sync_) {
-    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  if (!gl_texture_resources_ensure()) {
-    gl_context_disable();
-    return false;
-  }
-
-  /* Update texture dimensions if needed. */
-  if (texture_.width != texture_width || texture_.height != texture_height) {
-    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
-    glTexImage2D(
-        GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0);
-    texture_.width = texture_width;
-    texture_.height = texture_height;
-    glBindTexture(GL_TEXTURE_2D, 0);
-
-    /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to
-     * avoid undefined content. */
-    texture_.need_clear = true;
-  }
-
-  /* Update PBO dimensions if needed.
-   *
-   * NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
-   * at a resolution divider 1. This was we don't need to recreate graphics interoperability
-   * objects which are costly and which are tied to the specific underlying buffer size.
-   * The downside of this approach is that when graphics interoperability is not used we are
-   * sending too much data to GPU when resolution divider is not 1. */
-  /* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop
-   * mode faster. */
-  const int buffer_width = params.full_size.x;
-  const int buffer_height = params.full_size.y;
-  if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
-    const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
-    glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW);
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-    texture_.buffer_width = buffer_width;
-    texture_.buffer_height = buffer_height;
-  }
-
-  /* New content will be provided to the texture in one way or another, so mark this in a
-   * centralized place. */
-  texture_.need_update = true;
-
-  texture_.params = params;
-
-  return true;
-}
-
-void BlenderDisplayDriver::update_end()
-{
-  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-  glFlush();
-
-  gl_context_disable();
-}
-
-/* --------------------------------------------------------------------
- * Texture buffer mapping.
- */
-
-half4 *BlenderDisplayDriver::map_texture_buffer()
-{
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
-
-  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
-      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
-  if (!mapped_rgba_pixels) {
-    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
-  }
-
-  if (texture_.need_clear) {
-    const int64_t texture_width = texture_.width;
-    const int64_t texture_height = texture_.height;
-    memset(reinterpret_cast<void *>(mapped_rgba_pixels),
-           0,
-           texture_width * texture_height * sizeof(half4));
-    texture_.need_clear = false;
-  }
-
-  return mapped_rgba_pixels;
-}
-
-void BlenderDisplayDriver::unmap_texture_buffer()
-{
-  glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
-
-/* --------------------------------------------------------------------
- * Graphics interoperability.
- */
-
-BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
-{
-  GraphicsInterop interop_dst;
-
-  interop_dst.buffer_width = texture_.buffer_width;
-  interop_dst.buffer_height = texture_.buffer_height;
-  interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
-
-  interop_dst.need_clear = texture_.need_clear;
-  texture_.need_clear = false;
-
-  return interop_dst;
-}
-
-void BlenderDisplayDriver::graphics_interop_activate()
-{
-  gl_context_enable();
-}
-
-void BlenderDisplayDriver::graphics_interop_deactivate()
-{
-  gl_context_disable();
-}
-
-/* --------------------------------------------------------------------
- * Drawing.
- */
-
-void BlenderDisplayDriver::clear()
-{
-  texture_.need_clear = true;
-}
-
-void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
-{
-  zoom_ = make_float2(zoom_x, zoom_y);
-}
-
-void BlenderDisplayDriver::draw(const Params &params)
-{
-  /* See do_update_begin() for why no locking is required here. */
-  const bool transparent = true;  // TODO(sergey): Derive this from Film.
-
-  if (!gl_draw_resources_ensure()) {
-    return;
-  }
-
-  if (use_gl_context_) {
-    gl_context_mutex_.lock();
-  }
-
-  if (texture_.need_clear) {
-    /* Texture is requested to be cleared and was not yet cleared.
-     *
-     * Do early return which should be equivalent of drawing all-zero texture.
-     * Watch out for the lock though so that the clear happening during update is properly
-     * synchronized here. */
-    gl_context_mutex_.unlock();
-    return;
-  }
-
-  if (gl_upload_sync_) {
-    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
-  }
-
-  if (transparent) {
-    glEnable(GL_BLEND);
-    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
-  }
-
-  display_shader_->bind(params.full_size.x, params.full_size.y);
-
-  glActiveTexture(GL_TEXTURE0);
-  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
-
-  /* Trick to keep sharp rendering without jagged edges on all GPUs.
-   *
-   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
-   * in.
-   * For the render result with a resolution divider in effect we always use nearest interpolation.
-   *
-   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
-   * the zoom level 1. The MAG filter is always NEAREST. */
-  const float zoomed_width = params.size.x * zoom_.x;
-  const float zoomed_height = params.size.y * zoom_.y;
-  if (texture_.width != params.size.x || texture_.height != params.size.y) {
-    /* Resolution divider is different from 1, force nearest interpolation. */
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  }
-  else {
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-  }
-
-  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
-
-  texture_update_if_needed();
-  vertex_buffer_update(params);
-
-  /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
-  GLuint vertex_array_object;
-  glGenVertexArrays(1, &vertex_array_object);
-  glBindVertexArray(vertex_array_object);
-
-  const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location();
-  const int position_attribute = display_shader_->get_position_attrib_location();
-
-  glEnableVertexAttribArray(texcoord_attribute);
-  glEnableVertexAttribArray(position_attribute);
-
-  glVertexAttribPointer(
-      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
-  glVertexAttribPointer(position_attribute,
-                        2,
-                        GL_FLOAT,
-                        GL_FALSE,
-                        4 * sizeof(float),
-                        (const GLvoid *)(sizeof(float) * 2));
-
-  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
-  glBindBuffer(GL_ARRAY_BUFFER, 0);
-  glBindTexture(GL_TEXTURE_2D, 0);
-
-  glDeleteVertexArrays(1, &vertex_array_object);
-
-  display_shader_->unbind();
-
-  if (transparent) {
-    glDisable(GL_BLEND);
-  }
-
-  gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-  glFlush();
-
-  if (use_gl_context_) {
-    gl_context_mutex_.unlock();
-  }
-}
-
-void BlenderDisplayDriver::gl_context_create()
-{
-  /* When rendering in viewport there is no render context available via engine.
-   * Check whether own context is to be created here.
-   *
-   * NOTE: If the `b_engine_`'s context is not available, we are expected to be on a main thread
-   * here. */
-  use_gl_context_ = !RE_engine_has_render_context(
-      reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
-
-  if (use_gl_context_) {
-    const bool drw_state = DRW_opengl_context_release();
-    gl_context_ = WM_opengl_context_create();
-    if (gl_context_) {
-      /* On Windows an old context is restored after creation, and subsequent release of context
-       * generates a Win32 error. Harmless for users, but annoying to have possible misleading
-       * error prints in the console. */
-#ifndef _WIN32
-      WM_opengl_context_release(gl_context_);
-#endif
-    }
-    else {
-      LOG(ERROR) << "Error creating OpenGL context.";
-    }
-
-    DRW_opengl_context_activate(drw_state);
-  }
-}
-
-bool BlenderDisplayDriver::gl_context_enable()
-{
-  if (use_gl_context_) {
-    if (!gl_context_) {
-      return false;
-    }
-    gl_context_mutex_.lock();
-    WM_opengl_context_activate(gl_context_);
-    return true;
-  }
-
-  RE_engine_render_context_enable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
-  return true;
-}
-
-void BlenderDisplayDriver::gl_context_disable()
-{
-  if (use_gl_context_) {
-    if (gl_context_) {
-      WM_opengl_context_release(gl_context_);
-      gl_context_mutex_.unlock();
-    }
-    return;
-  }
-
-  RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
-}
-
-void BlenderDisplayDriver::gl_context_dispose()
-{
-  if (gl_context_) {
-    const bool drw_state = DRW_opengl_context_release();
-
-    WM_opengl_context_activate(gl_context_);
-    WM_opengl_context_dispose(gl_context_);
-
-    DRW_opengl_context_activate(drw_state);
-  }
-}
-
-bool BlenderDisplayDriver::gl_draw_resources_ensure()
-{
-  if (!texture_.gl_id) {
-    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
-     * can not continue. Note that this is not an unrecoverable error, so once the texture is known
-     * we will come back here and create all the GPU resources needed for draw. */
-    return false;
-  }
-
-  if (gl_draw_resource_creation_attempted_) {
-    return gl_draw_resources_created_;
-  }
-  gl_draw_resource_creation_attempted_ = true;
-
-  if (!vertex_buffer_) {
-    glGenBuffers(1, &vertex_buffer_);
-    if (!vertex_buffer_) {
-      LOG(ERROR) << "Error creating vertex buffer.";
-      return false;
-    }
-  }
-
-  gl_draw_resources_created_ = true;
-
-  return true;
-}
-
-void BlenderDisplayDriver::gl_resources_destroy()
-{
-  gl_context_enable();
-
-  if (vertex_buffer_ != 0) {
-    glDeleteBuffers(1, &vertex_buffer_);
-  }
-
-  if (texture_.gl_pbo_id) {
-    glDeleteBuffers(1, &texture_.gl_pbo_id);
-    texture_.gl_pbo_id = 0;
-  }
-
-  if (texture_.gl_id) {
-    glDeleteTextures(1, &texture_.gl_id);
-    texture_.gl_id = 0;
-  }
-
-  gl_context_disable();
-
-  gl_context_dispose();
-}
-
-bool BlenderDisplayDriver::gl_texture_resources_ensure()
-{
-  if (texture_.creation_attempted) {
-    return texture_.is_created;
-  }
-  texture_.creation_attempted = true;
-
-  DCHECK(!texture_.gl_id);
-  DCHECK(!texture_.gl_pbo_id);
-
-  /* Create texture. */
-  glGenTextures(1, &texture_.gl_id);
-  if (!texture_.gl_id) {
-    LOG(ERROR) << "Error creating texture.";
-    return false;
-  }
-
-  /* Configure the texture. */
-  glActiveTexture(GL_TEXTURE0);
-  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
-  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-  glBindTexture(GL_TEXTURE_2D, 0);
-
-  /* Create PBO for the texture. */
-  glGenBuffers(1, &texture_.gl_pbo_id);
-  if (!texture_.gl_pbo_id) {
-    LOG(ERROR) << "Error creating texture pixel buffer object.";
-    return false;
-  }
-
-  /* Creation finished with a success. */
-  texture_.is_created = true;
-
-  return true;
-}
-
-void BlenderDisplayDriver::texture_update_if_needed()
-{
-  if (!texture_.need_update) {
-    return;
-  }
-
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
-  glTexSubImage2D(
-      GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0);
-  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-  texture_.need_update = false;
-}
-
-void BlenderDisplayDriver::vertex_buffer_update(const Params & /*params*/)
-{
-  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
-   * texture during bordered-rendered camera view without flickering. The validness of the display
-   * parameters for a texture is guaranteed by the initial "clear" state which makes drawing to
-   * have an early output.
-   *
-   * Such approach can cause some extra "jelly" effect during panning, but it is not more jelly
-   * than overlay of selected objects. Also, it's possible to redraw texture at an intersection of
-   * the texture draw parameters and the latest updated draw parameters (although, complexity of
-   * doing it might not worth it. */
-  const int x = texture_.params.full_offset.x;
-  const int y = texture_.params.full_offset.y;
-
-  const int width = texture_.params.size.x;
-  const int height = texture_.params.size.y;
-
-  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
-   * rendered. */
-  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
-
-  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
-  if (!vpointer) {
-    return;
-  }
-
-  vpointer[0] = 0.0f;
-  vpointer[1] = 0.0f;
-  vpointer[2] = x;
-  vpointer[3] = y;
-
-  vpointer[4] = 1.0f;
-  vpointer[5] = 0.0f;
-  vpointer[6] = x + width;
-  vpointer[7] = y;
-
-  vpointer[8] = 1.0f;
-  vpointer[9] = 1.0f;
-  vpointer[10] = x + width;
-  vpointer[11] = y + height;
-
-  vpointer[12] = 0.0f;
-  vpointer[13] = 1.0f;
-  vpointer[14] = x;
-  vpointer[15] = y + height;
-
-  glUnmapBuffer(GL_ARRAY_BUFFER);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_display_driver.h b/intern/cycles/blender/blender_display_driver.h
deleted file mode 100644
index 800d0791041..00000000000
--- a/intern/cycles/blender/blender_display_driver.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <atomic>
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_blender_cpp.h"
-
-#include "session/display_driver.h"
-
-#include "util/util_thread.h"
-#include "util/util_unique_ptr.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Base class of shader used for display driver rendering. */
-class BlenderDisplayShader {
- public:
-  static constexpr const char *position_attribute_name = "pos";
-  static constexpr const char *tex_coord_attribute_name = "texCoord";
-
-  /* Create shader implementation suitable for the given render engine and scene configuration. */
-  static unique_ptr<BlenderDisplayShader> create(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-
-  BlenderDisplayShader() = default;
-  virtual ~BlenderDisplayShader() = default;
-
-  virtual void bind(int width, int height) = 0;
-  virtual void unbind() = 0;
-
-  /* Get attribute location for position and texture coordinate respectively.
-   * NOTE: The shader needs to be bound to have access to those. */
-  virtual int get_position_attrib_location();
-  virtual int get_tex_coord_attrib_location();
-
- protected:
-  /* Get program of this display shader.
-   * NOTE: The shader needs to be bound to have access to this. */
-  virtual uint get_shader_program() = 0;
-
-  /* Cached values of various OpenGL resources. */
-  int position_attribute_location_ = -1;
-  int tex_coord_attribute_location_ = -1;
-};
-
-/* Implementation of display rendering shader used in the case when render engine does not support
- * display space shader. */
-class BlenderFallbackDisplayShader : public BlenderDisplayShader {
- public:
-  virtual void bind(int width, int height) override;
-  virtual void unbind() override;
-
- protected:
-  virtual uint get_shader_program() override;
-
-  void create_shader_if_needed();
-  void destroy_shader();
-
-  uint shader_program_ = 0;
-  int image_texture_location_ = -1;
-  int fullscreen_location_ = -1;
-
-  /* Shader compilation attempted. Which means, that if the shader program is 0 then compilation or
-   * linking has failed. Do not attempt to re-compile the shader. */
-  bool shader_compile_attempted_ = false;
-};
-
-class BlenderDisplaySpaceShader : public BlenderDisplayShader {
- public:
-  BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-
-  virtual void bind(int width, int height) override;
-  virtual void unbind() override;
-
- protected:
-  virtual uint get_shader_program() override;
-
-  BL::RenderEngine b_engine_;
-  BL::Scene &b_scene_;
-
-  /* Cached values of various OpenGL resources. */
-  uint shader_program_ = 0;
-};
-
-/* Display driver implementation which is specific for Blender viewport integration. */
-class BlenderDisplayDriver : public DisplayDriver {
- public:
-  BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
-  ~BlenderDisplayDriver();
-
-  virtual void graphics_interop_activate() override;
-  virtual void graphics_interop_deactivate() override;
-
-  virtual void clear() override;
-
-  void set_zoom(float zoom_x, float zoom_y);
-
- protected:
-  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
-  virtual void update_end() override;
-
-  virtual half4 *map_texture_buffer() override;
-  virtual void unmap_texture_buffer() override;
-
-  virtual GraphicsInterop graphics_interop_get() override;
-
-  virtual void draw(const Params &params) override;
-
-  /* Helper function which allocates new GPU context. */
-  void gl_context_create();
-  bool gl_context_enable();
-  void gl_context_disable();
-  void gl_context_dispose();
-
-  /* Make sure texture is allocated and its initial configuration is performed. */
-  bool gl_texture_resources_ensure();
-
-  /* Ensure all runtime GPU resources needed for drawing are allocated.
-   * Returns true if all resources needed for drawing are available. */
-  bool gl_draw_resources_ensure();
-
-  /* Destroy all GPU resources which are being used by this object. */
-  void gl_resources_destroy();
-
-  /* Update GPU texture dimensions and content if needed (new pixel data was provided).
-   *
-   * NOTE: The texture needs to be bound. */
-  void texture_update_if_needed();
-
-  /* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
-   * This buffer is used to render texture in the viewport.
-   *
-   * NOTE: The buffer needs to be bound. */
-  void vertex_buffer_update(const Params &params);
-
-  BL::RenderEngine b_engine_;
-
-  /* OpenGL context which is used the render engine doesn't have its own. */
-  void *gl_context_ = nullptr;
-  /* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
-   * its own context. */
-  bool use_gl_context_ = false;
-  /* Mutex used to guard the `gl_context_`. */
-  thread_mutex gl_context_mutex_;
-
-  /* Texture which contains pixels of the render result. */
-  struct {
-    /* Indicates whether texture creation was attempted and succeeded.
-     * Used to avoid multiple attempts of texture creation on GPU issues or GPU context
-     * misconfiguration. */
-    bool creation_attempted = false;
-    bool is_created = false;
-
-    /* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write
-     * pixels to it.
-     *
-     * NOTE: Allocated on the engine's context. */
-    uint gl_id = 0;
-    uint gl_pbo_id = 0;
-
-    /* Is true when new data was written to the PBO, meaning, the texture might need to be resized
-     * and new data is to be uploaded to the GPU. */
-    bool need_update = false;
-
-    /* Content of the texture is to be filled with zeroes. */
-    std::atomic<bool> need_clear = true;
-
-    /* Dimensions of the texture in pixels. */
-    int width = 0;
-    int height = 0;
-
-    /* Dimensions of the underlying PBO. */
-    int buffer_width = 0;
-    int buffer_height = 0;
-
-    /* Display parameters the texture has been updated for. */
-    Params params;
-  } texture_;
-
-  unique_ptr<BlenderDisplayShader> display_shader_;
-
-  /* Special track of whether GPU resources were attempted to be created, to avoid attempts of
-   * their re-creation on failure on every redraw. */
-  bool gl_draw_resource_creation_attempted_ = false;
-  bool gl_draw_resources_created_ = false;
-
-  /* Vertex buffer which hold vertices of a triangle fan which is textures with the texture
-   * holding the render result. */
-  uint vertex_buffer_ = 0;
-
-  void *gl_render_sync_ = nullptr;
-  void *gl_upload_sync_ = nullptr;
-
-  float2 zoom_ = make_float2(1.0f, 1.0f);
-};
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_geometry.cpp b/intern/cycles/blender/blender_geometry.cpp
deleted file mode 100644
index b4b0d04d104..00000000000
--- a/intern/cycles/blender/blender_geometry.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/curves.h"
-#include "scene/hair.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/volume.h"
-
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_foreach.h"
-#include "util/util_task.h"
-
-CCL_NAMESPACE_BEGIN
-
-static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
-{
-  if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
-    return Geometry::HAIR;
-  }
-
-  if (b_ob_info.object_data.is_a(&RNA_Volume) ||
-      (b_ob_info.object_data == b_ob_info.real_object.data() &&
-       object_fluid_gas_domain_find(b_ob_info.real_object))) {
-    return Geometry::VOLUME;
-  }
-
-  return Geometry::MESH;
-}
-
-array<Node *> BlenderSync::find_used_shaders(BL::Object &b_ob)
-{
-  BL::Material material_override = view_layer.material_override;
-  Shader *default_shader = (b_ob.type() == BL::Object::type_VOLUME) ? scene->default_volume :
-                                                                      scene->default_surface;
-
-  array<Node *> used_shaders;
-
-  for (BL::MaterialSlot &b_slot : b_ob.material_slots) {
-    if (material_override) {
-      find_shader(material_override, used_shaders, default_shader);
-    }
-    else {
-      BL::ID b_material(b_slot.material());
-      find_shader(b_material, used_shaders, default_shader);
-    }
-  }
-
-  if (used_shaders.size() == 0) {
-    if (material_override)
-      find_shader(material_override, used_shaders, default_shader);
-    else
-      used_shaders.push_back_slow(default_shader);
-  }
-
-  return used_shaders;
-}
-
-Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
-                                     BObjectInfo &b_ob_info,
-                                     bool object_updated,
-                                     bool use_particle_hair,
-                                     TaskPool *task_pool)
-{
-  /* Test if we can instance or if the object is modified. */
-  Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
-  BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
-                     BKE_object_is_modified(b_ob_info.real_object)) ?
-                        b_ob_info.real_object :
-                        b_ob_info.object_data;
-  GeometryKey key(b_key_id.ptr.data, geom_type);
-
-  /* Find shader indices. */
-  array<Node *> used_shaders = find_used_shaders(b_ob_info.iter_object);
-
-  /* Ensure we only sync instanced geometry once. */
-  Geometry *geom = geometry_map.find(key);
-  if (geom) {
-    if (geometry_synced.find(geom) != geometry_synced.end()) {
-      return geom;
-    }
-  }
-
-  /* Test if we need to sync. */
-  bool sync = true;
-  if (geom == NULL) {
-    /* Add new geometry if it did not exist yet. */
-    if (geom_type == Geometry::HAIR) {
-      geom = scene->create_node<Hair>();
-    }
-    else if (geom_type == Geometry::VOLUME) {
-      geom = scene->create_node<Volume>();
-    }
-    else {
-      geom = scene->create_node<Mesh>();
-    }
-    geometry_map.add(key, geom);
-  }
-  else {
-    /* Test if we need to update existing geometry. */
-    sync = geometry_map.update(geom, b_key_id);
-  }
-
-  if (!sync) {
-    /* If transform was applied to geometry, need full update. */
-    if (object_updated && geom->transform_applied) {
-      ;
-    }
-    /* Test if shaders changed, these can be object level so geometry
-     * does not get tagged for recalc. */
-    else if (geom->get_used_shaders() != used_shaders) {
-      ;
-    }
-    else {
-      /* Even if not tagged for recalc, we may need to sync anyway
-       * because the shader needs different geometry attributes. */
-      bool attribute_recalc = false;
-
-      foreach (Node *node, geom->get_used_shaders()) {
-        Shader *shader = static_cast<Shader *>(node);
-        if (shader->need_update_geometry()) {
-          attribute_recalc = true;
-        }
-      }
-
-      if (!attribute_recalc) {
-        return geom;
-      }
-    }
-  }
-
-  geometry_synced.insert(geom);
-
-  geom->name = ustring(b_ob_info.object_data.name().c_str());
-
-  /* Store the shaders immediately for the object attribute code. */
-  geom->set_used_shaders(used_shaders);
-
-  auto sync_func = [=]() mutable {
-    if (progress.get_cancel())
-      return;
-
-    progress.set_sync_status("Synchronizing object", b_ob_info.real_object.name());
-
-    if (geom_type == Geometry::HAIR) {
-      Hair *hair = static_cast<Hair *>(geom);
-      sync_hair(b_depsgraph, b_ob_info, hair);
-    }
-    else if (geom_type == Geometry::VOLUME) {
-      Volume *volume = static_cast<Volume *>(geom);
-      sync_volume(b_ob_info, volume);
-    }
-    else {
-      Mesh *mesh = static_cast<Mesh *>(geom);
-      sync_mesh(b_depsgraph, b_ob_info, mesh);
-    }
-  };
-
-  /* Defer the actual geometry sync to the task_pool for multithreading */
-  if (task_pool) {
-    task_pool->push(sync_func);
-  }
-  else {
-    sync_func();
-  }
-
-  return geom;
-}
-
-void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
-                                       BObjectInfo &b_ob_info,
-                                       Object *object,
-                                       float motion_time,
-                                       bool use_particle_hair,
-                                       TaskPool *task_pool)
-{
-  /* Ensure we only sync instanced geometry once. */
-  Geometry *geom = object->get_geometry();
-
-  if (geometry_motion_synced.find(geom) != geometry_motion_synced.end() ||
-      geometry_motion_attribute_synced.find(geom) != geometry_motion_attribute_synced.end()) {
-    return;
-  }
-
-  geometry_motion_synced.insert(geom);
-
-  /* Ensure we only motion sync geometry that also had geometry synced, to avoid
-   * unnecessary work and to ensure that its attributes were clear. */
-  if (geometry_synced.find(geom) == geometry_synced.end())
-    return;
-
-  /* Find time matching motion step required by geometry. */
-  int motion_step = geom->motion_step(motion_time);
-  if (motion_step < 0) {
-    return;
-  }
-
-  auto sync_func = [=]() mutable {
-    if (progress.get_cancel())
-      return;
-
-    if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
-      Hair *hair = static_cast<Hair *>(geom);
-      sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
-    }
-    else if (b_ob_info.object_data.is_a(&RNA_Volume) ||
-             object_fluid_gas_domain_find(b_ob_info.real_object)) {
-      /* No volume motion blur support yet. */
-    }
-    else {
-      Mesh *mesh = static_cast<Mesh *>(geom);
-      sync_mesh_motion(b_depsgraph, b_ob_info, mesh, motion_step);
-    }
-  };
-
-  /* Defer the actual geometry sync to the task_pool for multithreading */
-  if (task_pool) {
-    task_pool->push(sync_func);
-  }
-  else {
-    sync_func();
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_id_map.h b/intern/cycles/blender/blender_id_map.h
deleted file mode 100644
index 27a53a90f12..00000000000
--- a/intern/cycles/blender/blender_id_map.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_ID_MAP_H__
-#define __BLENDER_ID_MAP_H__
-
-#include <string.h>
-
-#include "scene/geometry.h"
-#include "scene/scene.h"
-
-#include "util/util_map.h"
-#include "util/util_set.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* ID Map
- *
- * Utility class to map between Blender datablocks and Cycles data structures,
- * and keep track of recalc tags from the dependency graph. */
-
-template<typename K, typename T> class id_map {
- public:
-  id_map(Scene *scene_) : scene(scene_)
-  {
-  }
-
-  ~id_map()
-  {
-    set<T *> nodes;
-
-    typename map<K, T *>::iterator jt;
-    for (jt = b_map.begin(); jt != b_map.end(); jt++) {
-      nodes.insert(jt->second);
-    }
-
-    scene->delete_nodes(nodes);
-  }
-
-  T *find(const BL::ID &id)
-  {
-    return find(id.ptr.owner_id);
-  }
-
-  T *find(const K &key)
-  {
-    if (b_map.find(key) != b_map.end()) {
-      T *data = b_map[key];
-      return data;
-    }
-
-    return NULL;
-  }
-
-  void set_recalc(const BL::ID &id)
-  {
-    b_recalc.insert(id.ptr.data);
-  }
-
-  void set_recalc(void *id_ptr)
-  {
-    b_recalc.insert(id_ptr);
-  }
-
-  bool has_recalc()
-  {
-    return !(b_recalc.empty());
-  }
-
-  void pre_sync()
-  {
-    used_set.clear();
-  }
-
-  /* Add new data. */
-  void add(const K &key, T *data)
-  {
-    assert(find(key) == NULL);
-    b_map[key] = data;
-    used(data);
-  }
-
-  /* Update existing data. */
-  bool update(T *data, const BL::ID &id)
-  {
-    return update(data, id, id);
-  }
-  bool update(T *data, const BL::ID &id, const BL::ID &parent)
-  {
-    bool recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
-    if (parent.ptr.data && parent.ptr.data != id.ptr.data) {
-      recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
-    }
-    used(data);
-    return recalc;
-  }
-
-  /* Combined add and update as needed. */
-  bool add_or_update(T **r_data, const BL::ID &id)
-  {
-    return add_or_update(r_data, id, id, id.ptr.owner_id);
-  }
-  bool add_or_update(T **r_data, const BL::ID &id, const K &key)
-  {
-    return add_or_update(r_data, id, id, key);
-  }
-  bool add_or_update(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
-  {
-    T *data = find(key);
-    bool recalc;
-
-    if (!data) {
-      /* Add data if it didn't exist yet. */
-      data = scene->create_node<T>();
-      add(key, data);
-      recalc = true;
-    }
-    else {
-      /* check if updated needed. */
-      recalc = update(data, id, parent);
-    }
-
-    *r_data = data;
-    return recalc;
-  }
-
-  /* Combined add or update for convenience. */
-
-  bool is_used(const K &key)
-  {
-    T *data = find(key);
-    return (data) ? used_set.find(data) != used_set.end() : false;
-  }
-
-  void used(T *data)
-  {
-    /* tag data as still in use */
-    used_set.insert(data);
-  }
-
-  void set_default(T *data)
-  {
-    b_map[NULL] = data;
-  }
-
-  void post_sync(bool do_delete = true)
-  {
-    map<K, T *> new_map;
-    typedef pair<const K, T *> TMapPair;
-    typename map<K, T *>::iterator jt;
-
-    for (jt = b_map.begin(); jt != b_map.end(); jt++) {
-      TMapPair &pair = *jt;
-
-      if (do_delete && used_set.find(pair.second) == used_set.end()) {
-        scene->delete_node(pair.second);
-      }
-      else {
-        new_map[pair.first] = pair.second;
-      }
-    }
-
-    used_set.clear();
-    b_recalc.clear();
-    b_map = new_map;
-  }
-
-  const map<K, T *> &key_to_scene_data()
-  {
-    return b_map;
-  }
-
- protected:
-  map<K, T *> b_map;
-  set<T *> used_set;
-  set<void *> b_recalc;
-  Scene *scene;
-};
-
-/* Object Key
- *
- * To uniquely identify instances, we use the parent, object and persistent instance ID.
- * We also export separate object for a mesh and its particle hair. */
-
-enum { OBJECT_PERSISTENT_ID_SIZE = 8 /* MAX_DUPLI_RECUR in Blender. */ };
-
-struct ObjectKey {
-  void *parent;
-  int id[OBJECT_PERSISTENT_ID_SIZE];
-  void *ob;
-  bool use_particle_hair;
-
-  ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_, bool use_particle_hair_)
-      : parent(parent_), ob(ob_), use_particle_hair(use_particle_hair_)
-  {
-    if (id_)
-      memcpy(id, id_, sizeof(id));
-    else
-      memset(id, 0, sizeof(id));
-  }
-
-  bool operator<(const ObjectKey &k) const
-  {
-    if (ob < k.ob) {
-      return true;
-    }
-    else if (ob == k.ob) {
-      if (parent < k.parent) {
-        return true;
-      }
-      else if (parent == k.parent) {
-        if (use_particle_hair < k.use_particle_hair) {
-          return true;
-        }
-        else if (use_particle_hair == k.use_particle_hair) {
-          return memcmp(id, k.id, sizeof(id)) < 0;
-        }
-      }
-    }
-
-    return false;
-  }
-};
-
-/* Geometry Key
- *
- * We export separate geometry for a mesh and its particle hair, so key needs to
- * distinguish between them. */
-
-struct GeometryKey {
-  void *id;
-  Geometry::Type geometry_type;
-
-  GeometryKey(void *id, Geometry::Type geometry_type) : id(id), geometry_type(geometry_type)
-  {
-  }
-
-  bool operator<(const GeometryKey &k) const
-  {
-    if (id < k.id) {
-      return true;
-    }
-    else if (id == k.id) {
-      if (geometry_type < k.geometry_type) {
-        return true;
-      }
-    }
-
-    return false;
-  }
-};
-
-/* Particle System Key */
-
-struct ParticleSystemKey {
-  void *ob;
-  int id[OBJECT_PERSISTENT_ID_SIZE];
-
-  ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
-  {
-    if (id_)
-      memcpy(id, id_, sizeof(id));
-    else
-      memset(id, 0, sizeof(id));
-  }
-
-  bool operator<(const ParticleSystemKey &k) const
-  {
-    /* first id is particle index, we don't compare that */
-    if (ob < k.ob)
-      return true;
-    else if (ob == k.ob)
-      return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
-
-    return false;
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_ID_MAP_H__ */
diff --git a/intern/cycles/blender/blender_image.cpp b/intern/cycles/blender/blender_image.cpp
deleted file mode 100644
index f27275bd457..00000000000
--- a/intern/cycles/blender/blender_image.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "blender/blender_image.h"
-#include "blender/blender_session.h"
-#include "blender/blender_util.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Packed Images */
-
-BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
-    : b_image(b_image), frame(frame), free_cache(!b_image.has_data())
-{
-}
-
-bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata)
-{
-  metadata.width = b_image.size()[0];
-  metadata.height = b_image.size()[1];
-  metadata.depth = 1;
-  metadata.channels = b_image.channels();
-
-  if (b_image.is_float()) {
-    if (metadata.channels == 1) {
-      metadata.type = IMAGE_DATA_TYPE_FLOAT;
-    }
-    else if (metadata.channels == 4) {
-      metadata.type = IMAGE_DATA_TYPE_FLOAT4;
-    }
-    else {
-      return false;
-    }
-
-    /* Float images are already converted on the Blender side,
-     * no need to do anything in Cycles. */
-    metadata.colorspace = u_colorspace_raw;
-  }
-  else {
-    if (metadata.channels == 1) {
-      metadata.type = IMAGE_DATA_TYPE_BYTE;
-    }
-    else if (metadata.channels == 4) {
-      metadata.type = IMAGE_DATA_TYPE_BYTE4;
-    }
-    else {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
-                                     void *pixels,
-                                     const size_t pixels_size,
-                                     const bool associate_alpha)
-{
-  const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
-  const int channels = metadata.channels;
-  const int tile = 0; /* TODO(lukas): Support tiles here? */
-
-  if (b_image.is_float()) {
-    /* image data */
-    float *image_pixels;
-    image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
-
-    if (image_pixels && num_pixels * channels == pixels_size) {
-      memcpy(pixels, image_pixels, pixels_size * sizeof(float));
-    }
-    else {
-      if (channels == 1) {
-        memset(pixels, 0, num_pixels * sizeof(float));
-      }
-      else {
-        const size_t num_pixels_safe = pixels_size / channels;
-        float *fp = (float *)pixels;
-        for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
-          fp[0] = 1.0f;
-          fp[1] = 0.0f;
-          fp[2] = 1.0f;
-          if (channels == 4) {
-            fp[3] = 1.0f;
-          }
-        }
-      }
-    }
-
-    if (image_pixels) {
-      MEM_freeN(image_pixels);
-    }
-  }
-  else {
-    unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
-
-    if (image_pixels && num_pixels * channels == pixels_size) {
-      memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
-    }
-    else {
-      if (channels == 1) {
-        memset(pixels, 0, pixels_size * sizeof(unsigned char));
-      }
-      else {
-        const size_t num_pixels_safe = pixels_size / channels;
-        unsigned char *cp = (unsigned char *)pixels;
-        for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
-          cp[0] = 255;
-          cp[1] = 0;
-          cp[2] = 255;
-          if (channels == 4) {
-            cp[3] = 255;
-          }
-        }
-      }
-    }
-
-    if (image_pixels) {
-      MEM_freeN(image_pixels);
-    }
-
-    if (associate_alpha) {
-      /* Premultiply, byte images are always straight for Blender. */
-      unsigned char *cp = (unsigned char *)pixels;
-      for (size_t i = 0; i < num_pixels; i++, cp += channels) {
-        cp[0] = (cp[0] * cp[3]) / 255;
-        cp[1] = (cp[1] * cp[3]) / 255;
-        cp[2] = (cp[2] * cp[3]) / 255;
-      }
-    }
-  }
-
-  /* Free image buffers to save memory during render. */
-  if (free_cache) {
-    b_image.buffers_free();
-  }
-
-  return true;
-}
-
-string BlenderImageLoader::name() const
-{
-  return BL::Image(b_image).name();
-}
-
-bool BlenderImageLoader::equals(const ImageLoader &other) const
-{
-  const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other;
-  return b_image == other_loader.b_image && frame == other_loader.frame;
-}
-
-/* Point Density */
-
-BlenderPointDensityLoader::BlenderPointDensityLoader(BL::Depsgraph b_depsgraph,
-                                                     BL::ShaderNodeTexPointDensity b_node)
-    : b_depsgraph(b_depsgraph), b_node(b_node)
-{
-}
-
-bool BlenderPointDensityLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata)
-{
-  metadata.channels = 4;
-  metadata.width = b_node.resolution();
-  metadata.height = metadata.width;
-  metadata.depth = metadata.width;
-  metadata.type = IMAGE_DATA_TYPE_FLOAT4;
-  return true;
-}
-
-bool BlenderPointDensityLoader::load_pixels(const ImageMetaData &,
-                                            void *pixels,
-                                            const size_t,
-                                            const bool)
-{
-  int length;
-  b_node.calc_point_density(b_depsgraph, &length, (float **)&pixels);
-  return true;
-}
-
-void BlenderSession::builtin_images_load()
-{
-  /* Force builtin images to be loaded along with Blender data sync. This
-   * is needed because we may be reading from depsgraph evaluated data which
-   * can be freed by Blender before Cycles reads it.
-   *
-   * TODO: the assumption that no further access to builtin image data will
-   * happen is really weak, and likely to break in the future. We should find
-   * a better solution to hand over the data directly to the image manager
-   * instead of through callbacks whose timing is difficult to control. */
-  ImageManager *manager = session->scene->image_manager;
-  Device *device = session->device;
-  manager->device_load_builtin(device, session->scene, session->progress);
-}
-
-string BlenderPointDensityLoader::name() const
-{
-  return BL::ShaderNodeTexPointDensity(b_node).name();
-}
-
-bool BlenderPointDensityLoader::equals(const ImageLoader &other) const
-{
-  const BlenderPointDensityLoader &other_loader = (const BlenderPointDensityLoader &)other;
-  return b_node == other_loader.b_node && b_depsgraph == other_loader.b_depsgraph;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_image.h b/intern/cycles/blender/blender_image.h
deleted file mode 100644
index 6f1e72c21af..00000000000
--- a/intern/cycles/blender/blender_image.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_IMAGE_H__
-#define __BLENDER_IMAGE_H__
-
-#include "RNA_blender_cpp.h"
-
-#include "scene/image.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderImageLoader : public ImageLoader {
- public:
-  BlenderImageLoader(BL::Image b_image, int frame);
-
-  bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
-  bool load_pixels(const ImageMetaData &metadata,
-                   void *pixels,
-                   const size_t pixels_size,
-                   const bool associate_alpha) override;
-  string name() const override;
-  bool equals(const ImageLoader &other) const override;
-
-  BL::Image b_image;
-  int frame;
-  bool free_cache;
-};
-
-class BlenderPointDensityLoader : public ImageLoader {
- public:
-  BlenderPointDensityLoader(BL::Depsgraph depsgraph, BL::ShaderNodeTexPointDensity b_node);
-
-  bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
-  bool load_pixels(const ImageMetaData &metadata,
-                   void *pixels,
-                   const size_t pixels_size,
-                   const bool associate_alpha) override;
-  string name() const override;
-  bool equals(const ImageLoader &other) const override;
-
-  BL::Depsgraph b_depsgraph;
-  BL::ShaderNodeTexPointDensity b_node;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_IMAGE_H__ */
diff --git a/intern/cycles/blender/blender_light.cpp b/intern/cycles/blender/blender_light.cpp
deleted file mode 100644
index aa0c6a964e4..00000000000
--- a/intern/cycles/blender/blender_light.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-
-
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/light.h"
-
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_hash.h"
-
-CCL_NAMESPACE_BEGIN
-
-void BlenderSync::sync_light(BL::Object &b_parent,
-                             int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
-                             BObjectInfo &b_ob_info,
-                             int random_id,
-                             Transform &tfm,
-                             bool *use_portal)
-{
-  /* test if we need to sync */
-  ObjectKey key(b_parent, persistent_id, b_ob_info.real_object, false);
-  BL::Light b_light(b_ob_info.object_data);
-
-  Light *light = light_map.find(key);
-
-  /* Check if the transform was modified, in case a linked collection is moved we do not get a
-   * specific depsgraph update (T88515). This also mimics the behavior for Objects. */
-  const bool tfm_updated = (light && light->get_tfm() != tfm);
-
-  /* Update if either object or light data changed. */
-  if (!light_map.add_or_update(&light, b_ob_info.real_object, b_parent, key) && !tfm_updated) {
-    Shader *shader;
-    if (!shader_map.add_or_update(&shader, b_light)) {
-      if (light->get_is_portal())
-        *use_portal = true;
-      return;
-    }
-  }
-
-  /* type */
-  switch (b_light.type()) {
-    case BL::Light::type_POINT: {
-      BL::PointLight b_point_light(b_light);
-      light->set_size(b_point_light.shadow_soft_size());
-      light->set_light_type(LIGHT_POINT);
-      break;
-    }
-    case BL::Light::type_SPOT: {
-      BL::SpotLight b_spot_light(b_light);
-      light->set_size(b_spot_light.shadow_soft_size());
-      light->set_light_type(LIGHT_SPOT);
-      light->set_spot_angle(b_spot_light.spot_size());
-      light->set_spot_smooth(b_spot_light.spot_blend());
-      break;
-    }
-    /* Hemi were removed from 2.8 */
-    // case BL::Light::type_HEMI: {
-    //  light->type = LIGHT_DISTANT;
-    //  light->size = 0.0f;
-    //  break;
-    // }
-    case BL::Light::type_SUN: {
-      BL::SunLight b_sun_light(b_light);
-      light->set_angle(b_sun_light.angle());
-      light->set_light_type(LIGHT_DISTANT);
-      break;
-    }
-    case BL::Light::type_AREA: {
-      BL::AreaLight b_area_light(b_light);
-      light->set_size(1.0f);
-      light->set_axisu(transform_get_column(&tfm, 0));
-      light->set_axisv(transform_get_column(&tfm, 1));
-      light->set_sizeu(b_area_light.size());
-      light->set_spread(b_area_light.spread());
-      switch (b_area_light.shape()) {
-        case BL::AreaLight::shape_SQUARE:
-          light->set_sizev(light->get_sizeu());
-          light->set_round(false);
-          break;
-        case BL::AreaLight::shape_RECTANGLE:
-          light->set_sizev(b_area_light.size_y());
-          light->set_round(false);
-          break;
-        case BL::AreaLight::shape_DISK:
-          light->set_sizev(light->get_sizeu());
-          light->set_round(true);
-          break;
-        case BL::AreaLight::shape_ELLIPSE:
-          light->set_sizev(b_area_light.size_y());
-          light->set_round(true);
-          break;
-      }
-      light->set_light_type(LIGHT_AREA);
-      break;
-    }
-  }
-
-  /* strength */
-  float3 strength = get_float3(b_light.color()) * BL::PointLight(b_light).energy();
-  light->set_strength(strength);
-
-  /* location and (inverted!) direction */
-  light->set_co(transform_get_column(&tfm, 3));
-  light->set_dir(-transform_get_column(&tfm, 2));
-  light->set_tfm(tfm);
-
-  /* shader */
-  array<Node *> used_shaders;
-  find_shader(b_light, used_shaders, scene->default_light);
-  light->set_shader(static_cast<Shader *>(used_shaders[0]));
-
-  /* shadow */
-  PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
-  light->set_cast_shadow(get_boolean(clight, "cast_shadow"));
-  light->set_use_mis(get_boolean(clight, "use_multiple_importance_sampling"));
-
-  light->set_max_bounces(get_int(clight, "max_bounces"));
-
-  if (b_ob_info.real_object != b_ob_info.iter_object) {
-    light->set_random_id(random_id);
-  }
-  else {
-    light->set_random_id(hash_uint2(hash_string(b_ob_info.real_object.name().c_str()), 0));
-  }
-
-  if (light->get_light_type() == LIGHT_AREA)
-    light->set_is_portal(get_boolean(clight, "is_portal"));
-  else
-    light->set_is_portal(false);
-
-  if (light->get_is_portal())
-    *use_portal = true;
-
-  /* visibility */
-  uint visibility = object_ray_visibility(b_ob_info.real_object);
-  light->set_use_camera((visibility & PATH_RAY_CAMERA) != 0);
-  light->set_use_diffuse((visibility & PATH_RAY_DIFFUSE) != 0);
-  light->set_use_glossy((visibility & PATH_RAY_GLOSSY) != 0);
-  light->set_use_transmission((visibility & PATH_RAY_TRANSMIT) != 0);
-  light->set_use_scatter((visibility & PATH_RAY_VOLUME_SCATTER) != 0);
-  light->set_is_shadow_catcher(b_ob_info.real_object.is_shadow_catcher());
-
-  /* tag */
-  light->tag_update(scene);
-}
-
-void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
-{
-  BL::World b_world = b_scene.world();
-
-  if (b_world) {
-    PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
-
-    enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
-    int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
-    bool sample_as_light = (sampling_method != SAMPLING_NONE);
-
-    if (sample_as_light || use_portal) {
-      /* test if we need to sync */
-      Light *light;
-      ObjectKey key(b_world, 0, b_world, false);
-
-      if (light_map.add_or_update(&light, b_world, b_world, key) || world_recalc ||
-          b_world.ptr.data != world_map) {
-        light->set_light_type(LIGHT_BACKGROUND);
-        if (sampling_method == SAMPLING_MANUAL) {
-          light->set_map_resolution(get_int(cworld, "sample_map_resolution"));
-        }
-        else {
-          light->set_map_resolution(0);
-        }
-        light->set_shader(scene->default_background);
-        light->set_use_mis(sample_as_light);
-        light->set_max_bounces(get_int(cworld, "max_bounces"));
-
-        /* force enable light again when world is resynced */
-        light->set_is_enabled(true);
-
-        light->tag_update(scene);
-        light_map.set_recalc(b_world);
-      }
-    }
-  }
-
-  world_map = b_world.ptr.data;
-  world_recalc = false;
-  viewport_parameters = BlenderViewportParameters(b_v3d, use_developer_ui);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_logging.cpp b/intern/cycles/blender/blender_logging.cpp
deleted file mode 100644
index b42a1f47821..00000000000
--- a/intern/cycles/blender/blender_logging.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/CCL_api.h"
-#include "util/util_logging.h"
-
-void CCL_init_logging(const char *argv0)
-{
-  ccl::util_logging_init(argv0);
-}
-
-void CCL_start_debug_logging()
-{
-  ccl::util_logging_start();
-}
-
-void CCL_logging_verbosity_set(int verbosity)
-{
-  ccl::util_logging_verbosity_set(verbosity);
-}
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
deleted file mode 100644
index 992e17d6f79..00000000000
--- a/intern/cycles/blender/blender_mesh.cpp
+++ /dev/null
@@ -1,1302 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/camera.h"
-#include "scene/colorspace.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/scene.h"
-
-#include "blender/blender_session.h"
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "subd/subd_patch.h"
-#include "subd/subd_split.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_color.h"
-#include "util/util_disjoint_set.h"
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_math.h"
-
-#include "mikktspace.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Tangent Space */
-
-struct MikkUserData {
-  MikkUserData(const BL::Mesh &b_mesh,
-               const char *layer_name,
-               const Mesh *mesh,
-               float3 *tangent,
-               float *tangent_sign)
-      : mesh(mesh), texface(NULL), orco(NULL), tangent(tangent), tangent_sign(tangent_sign)
-  {
-    const AttributeSet &attributes = (mesh->get_num_subd_faces()) ? mesh->subd_attributes :
-                                                                    mesh->attributes;
-
-    Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL);
-    vertex_normal = attr_vN->data_float3();
-
-    if (layer_name == NULL) {
-      Attribute *attr_orco = attributes.find(ATTR_STD_GENERATED);
-
-      if (attr_orco) {
-        orco = attr_orco->data_float3();
-        mesh_texture_space(*(BL::Mesh *)&b_mesh, orco_loc, orco_size);
-      }
-    }
-    else {
-      Attribute *attr_uv = attributes.find(ustring(layer_name));
-      if (attr_uv != NULL) {
-        texface = attr_uv->data_float2();
-      }
-    }
-  }
-
-  const Mesh *mesh;
-  int num_faces;
-
-  float3 *vertex_normal;
-  float2 *texface;
-  float3 *orco;
-  float3 orco_loc, orco_size;
-
-  float3 *tangent;
-  float *tangent_sign;
-};
-
-static int mikk_get_num_faces(const SMikkTSpaceContext *context)
-{
-  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
-  if (userdata->mesh->get_num_subd_faces()) {
-    return userdata->mesh->get_num_subd_faces();
-  }
-  else {
-    return userdata->mesh->num_triangles();
-  }
-}
-
-static int mikk_get_num_verts_of_face(const SMikkTSpaceContext *context, const int face_num)
-{
-  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
-  if (userdata->mesh->get_num_subd_faces()) {
-    const Mesh *mesh = userdata->mesh;
-    return mesh->get_subd_num_corners()[face_num];
-  }
-  else {
-    return 3;
-  }
-}
-
-static int mikk_vertex_index(const Mesh *mesh, const int face_num, const int vert_num)
-{
-  if (mesh->get_num_subd_faces()) {
-    const Mesh::SubdFace &face = mesh->get_subd_face(face_num);
-    return mesh->get_subd_face_corners()[face.start_corner + vert_num];
-  }
-  else {
-    return mesh->get_triangles()[face_num * 3 + vert_num];
-  }
-}
-
-static int mikk_corner_index(const Mesh *mesh, const int face_num, const int vert_num)
-{
-  if (mesh->get_num_subd_faces()) {
-    const Mesh::SubdFace &face = mesh->get_subd_face(face_num);
-    return face.start_corner + vert_num;
-  }
-  else {
-    return face_num * 3 + vert_num;
-  }
-}
-
-static void mikk_get_position(const SMikkTSpaceContext *context,
-                              float P[3],
-                              const int face_num,
-                              const int vert_num)
-{
-  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
-  const Mesh *mesh = userdata->mesh;
-  const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
-  const float3 vP = mesh->get_verts()[vertex_index];
-  P[0] = vP.x;
-  P[1] = vP.y;
-  P[2] = vP.z;
-}
-
-static void mikk_get_texture_coordinate(const SMikkTSpaceContext *context,
-                                        float uv[2],
-                                        const int face_num,
-                                        const int vert_num)
-{
-  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
-  const Mesh *mesh = userdata->mesh;
-  if (userdata->texface != NULL) {
-    const int corner_index = mikk_corner_index(mesh, face_num, vert_num);
-    float2 tfuv = userdata->texface[corner_index];
-    uv[0] = tfuv.x;
-    uv[1] = tfuv.y;
-  }
-  else if (userdata->orco != NULL) {
-    const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
-    const float3 orco_loc = userdata->orco_loc;
-    const float3 orco_size = userdata->orco_size;
-    const float3 orco = (userdata->orco[vertex_index] + orco_loc) / orco_size;
-
-    const float2 tmp = map_to_sphere(orco);
-    uv[0] = tmp.x;
-    uv[1] = tmp.y;
-  }
-  else {
-    uv[0] = 0.0f;
-    uv[1] = 0.0f;
-  }
-}
-
-static void mikk_get_normal(const SMikkTSpaceContext *context,
-                            float N[3],
-                            const int face_num,
-                            const int vert_num)
-{
-  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
-  const Mesh *mesh = userdata->mesh;
-  float3 vN;
-  if (mesh->get_num_subd_faces()) {
-    const Mesh::SubdFace &face = mesh->get_subd_face(face_num);
-    if (face.smooth) {
-      const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
-      vN = userdata->vertex_normal[vertex_index];
-    }
-    else {
-      vN = face.normal(mesh);
-    }
-  }
-  else {
-    if (mesh->get_smooth()[face_num]) {
-      const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
-      vN = userdata->vertex_normal[vertex_index];
-    }
-    else {
-      const Mesh::Triangle tri = mesh->get_triangle(face_num);
-      vN = tri.compute_normal(&mesh->get_verts()[0]);
-    }
-  }
-  N[0] = vN.x;
-  N[1] = vN.y;
-  N[2] = vN.z;
-}
-
-static void mikk_set_tangent_space(const SMikkTSpaceContext *context,
-                                   const float T[],
-                                   const float sign,
-                                   const int face_num,
-                                   const int vert_num)
-{
-  MikkUserData *userdata = (MikkUserData *)context->m_pUserData;
-  const Mesh *mesh = userdata->mesh;
-  const int corner_index = mikk_corner_index(mesh, face_num, vert_num);
-  userdata->tangent[corner_index] = make_float3(T[0], T[1], T[2]);
-  if (userdata->tangent_sign != NULL) {
-    userdata->tangent_sign[corner_index] = sign;
-  }
-}
-
-static void mikk_compute_tangents(
-    const BL::Mesh &b_mesh, const char *layer_name, Mesh *mesh, bool need_sign, bool active_render)
-{
-  /* Create tangent attributes. */
-  AttributeSet &attributes = (mesh->get_num_subd_faces()) ? mesh->subd_attributes :
-                                                            mesh->attributes;
-  Attribute *attr;
-  ustring name;
-  if (layer_name != NULL) {
-    name = ustring((string(layer_name) + ".tangent").c_str());
-  }
-  else {
-    name = ustring("orco.tangent");
-  }
-  if (active_render) {
-    attr = attributes.add(ATTR_STD_UV_TANGENT, name);
-  }
-  else {
-    attr = attributes.add(name, TypeDesc::TypeVector, ATTR_ELEMENT_CORNER);
-  }
-  float3 *tangent = attr->data_float3();
-  /* Create bitangent sign attribute. */
-  float *tangent_sign = NULL;
-  if (need_sign) {
-    Attribute *attr_sign;
-    ustring name_sign;
-    if (layer_name != NULL) {
-      name_sign = ustring((string(layer_name) + ".tangent_sign").c_str());
-    }
-    else {
-      name_sign = ustring("orco.tangent_sign");
-    }
-
-    if (active_render) {
-      attr_sign = attributes.add(ATTR_STD_UV_TANGENT_SIGN, name_sign);
-    }
-    else {
-      attr_sign = attributes.add(name_sign, TypeDesc::TypeFloat, ATTR_ELEMENT_CORNER);
-    }
-    tangent_sign = attr_sign->data_float();
-  }
-  /* Setup userdata. */
-  MikkUserData userdata(b_mesh, layer_name, mesh, tangent, tangent_sign);
-  /* Setup interface. */
-  SMikkTSpaceInterface sm_interface;
-  memset(&sm_interface, 0, sizeof(sm_interface));
-  sm_interface.m_getNumFaces = mikk_get_num_faces;
-  sm_interface.m_getNumVerticesOfFace = mikk_get_num_verts_of_face;
-  sm_interface.m_getPosition = mikk_get_position;
-  sm_interface.m_getTexCoord = mikk_get_texture_coordinate;
-  sm_interface.m_getNormal = mikk_get_normal;
-  sm_interface.m_setTSpaceBasic = mikk_set_tangent_space;
-  /* Setup context. */
-  SMikkTSpaceContext context;
-  memset(&context, 0, sizeof(context));
-  context.m_pUserData = &userdata;
-  context.m_pInterface = &sm_interface;
-  /* Compute tangents. */
-  genTangSpaceDefault(&context);
-}
-
-/* Create sculpt vertex color attributes. */
-static void attr_create_sculpt_vertex_color(Scene *scene,
-                                            Mesh *mesh,
-                                            BL::Mesh &b_mesh,
-                                            bool subdivision)
-{
-  for (BL::MeshVertColorLayer &l : b_mesh.sculpt_vertex_colors) {
-    const bool active_render = l.active_render();
-    AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
-    ustring vcol_name = ustring(l.name().c_str());
-
-    const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
-                           mesh->need_attribute(scene, vcol_std);
-
-    if (!need_vcol) {
-      continue;
-    }
-
-    AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
-    Attribute *vcol_attr = attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_VERTEX);
-    vcol_attr->std = vcol_std;
-
-    float4 *cdata = vcol_attr->data_float4();
-    int numverts = b_mesh.vertices.length();
-
-    for (int i = 0; i < numverts; i++) {
-      *(cdata++) = get_float4(l.data[i].color());
-    }
-  }
-}
-
-template<typename TypeInCycles, typename GetValueAtIndex>
-static void fill_generic_attribute(BL::Mesh &b_mesh,
-                                   TypeInCycles *data,
-                                   const AttributeElement element,
-                                   const GetValueAtIndex &get_value_at_index)
-{
-  switch (element) {
-    case ATTR_ELEMENT_CORNER: {
-      for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
-        const int index = t.index() * 3;
-        BL::Array<int, 3> loops = t.loops();
-        data[index] = get_value_at_index(loops[0]);
-        data[index + 1] = get_value_at_index(loops[1]);
-        data[index + 2] = get_value_at_index(loops[2]);
-      }
-      break;
-    }
-    case ATTR_ELEMENT_VERTEX: {
-      const int num_verts = b_mesh.vertices.length();
-      for (int i = 0; i < num_verts; i++) {
-        data[i] = get_value_at_index(i);
-      }
-      break;
-    }
-    case ATTR_ELEMENT_FACE: {
-      for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
-        data[t.index()] = get_value_at_index(t.polygon_index());
-      }
-      break;
-    }
-    default: {
-      assert(false);
-      break;
-    }
-  }
-}
-
-static void attr_create_motion(Mesh *mesh, BL::Attribute &b_attribute, const float motion_scale)
-{
-  if (!(b_attribute.domain() == BL::Attribute::domain_POINT) &&
-      (b_attribute.data_type() == BL::Attribute::data_type_FLOAT_VECTOR)) {
-    return;
-  }
-
-  BL::FloatVectorAttribute b_vector_attribute(b_attribute);
-  const int numverts = mesh->get_verts().size();
-
-  /* Find or add attribute */
-  float3 *P = &mesh->get_verts()[0];
-  Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-  if (!attr_mP) {
-    attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
-  }
-
-  /* Only export previous and next frame, we don't have any in between data. */
-  float motion_times[2] = {-1.0f, 1.0f};
-  for (int step = 0; step < 2; step++) {
-    const float relative_time = motion_times[step] * 0.5f * motion_scale;
-    float3 *mP = attr_mP->data_float3() + step * numverts;
-
-    for (int i = 0; i < numverts; i++) {
-      mP[i] = P[i] + get_float3(b_vector_attribute.data[i].vector()) * relative_time;
-    }
-  }
-}
-
-static void attr_create_generic(Scene *scene,
-                                Mesh *mesh,
-                                BL::Mesh &b_mesh,
-                                const bool subdivision,
-                                const bool need_motion,
-                                const float motion_scale)
-{
-  if (subdivision) {
-    /* TODO: Handle subdivision correctly. */
-    return;
-  }
-  AttributeSet &attributes = mesh->attributes;
-  static const ustring u_velocity("velocity");
-
-  for (BL::Attribute &b_attribute : b_mesh.attributes) {
-    const ustring name{b_attribute.name().c_str()};
-
-    if (need_motion && name == u_velocity) {
-      attr_create_motion(mesh, b_attribute, motion_scale);
-    }
-
-    if (!mesh->need_attribute(scene, name)) {
-      continue;
-    }
-    if (attributes.find(name)) {
-      continue;
-    }
-
-    const BL::Attribute::domain_enum b_domain = b_attribute.domain();
-    const BL::Attribute::data_type_enum b_data_type = b_attribute.data_type();
-
-    AttributeElement element = ATTR_ELEMENT_NONE;
-    switch (b_domain) {
-      case BL::Attribute::domain_CORNER:
-        element = ATTR_ELEMENT_CORNER;
-        break;
-      case BL::Attribute::domain_POINT:
-        element = ATTR_ELEMENT_VERTEX;
-        break;
-      case BL::Attribute::domain_FACE:
-        element = ATTR_ELEMENT_FACE;
-        break;
-      default:
-        break;
-    }
-    if (element == ATTR_ELEMENT_NONE) {
-      /* Not supported. */
-      continue;
-    }
-    switch (b_data_type) {
-      case BL::Attribute::data_type_FLOAT: {
-        BL::FloatAttribute b_float_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat, element);
-        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_mesh, data, element, [&](int i) { return b_float_attribute.data[i].value(); });
-        break;
-      }
-      case BL::Attribute::data_type_BOOLEAN: {
-        BL::BoolAttribute b_bool_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat, element);
-        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_mesh, data, element, [&](int i) { return (float)b_bool_attribute.data[i].value(); });
-        break;
-      }
-      case BL::Attribute::data_type_INT: {
-        BL::IntAttribute b_int_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat, element);
-        float *data = attr->data_float();
-        fill_generic_attribute(
-            b_mesh, data, element, [&](int i) { return (float)b_int_attribute.data[i].value(); });
-        break;
-      }
-      case BL::Attribute::data_type_FLOAT_VECTOR: {
-        BL::FloatVectorAttribute b_vector_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeVector, element);
-        float3 *data = attr->data_float3();
-        fill_generic_attribute(b_mesh, data, element, [&](int i) {
-          BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
-          return make_float3(v[0], v[1], v[2]);
-        });
-        break;
-      }
-      case BL::Attribute::data_type_FLOAT_COLOR: {
-        BL::FloatColorAttribute b_color_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeRGBA, element);
-        float4 *data = attr->data_float4();
-        fill_generic_attribute(b_mesh, data, element, [&](int i) {
-          BL::Array<float, 4> v = b_color_attribute.data[i].color();
-          return make_float4(v[0], v[1], v[2], v[3]);
-        });
-        break;
-      }
-      case BL::Attribute::data_type_FLOAT2: {
-        BL::Float2Attribute b_float2_attribute{b_attribute};
-        Attribute *attr = attributes.add(name, TypeFloat2, element);
-        float2 *data = attr->data_float2();
-        fill_generic_attribute(b_mesh, data, element, [&](int i) {
-          BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
-          return make_float2(v[0], v[1]);
-        });
-        break;
-      }
-      default:
-        /* Not supported. */
-        break;
-    }
-  }
-}
-
-/* Create vertex color attributes. */
-static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
-{
-  for (BL::MeshLoopColorLayer &l : b_mesh.vertex_colors) {
-    const bool active_render = l.active_render();
-    AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
-    ustring vcol_name = ustring(l.name().c_str());
-
-    const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
-                           mesh->need_attribute(scene, vcol_std);
-
-    if (!need_vcol) {
-      continue;
-    }
-
-    Attribute *vcol_attr = NULL;
-
-    if (subdivision) {
-      if (active_render) {
-        vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name);
-      }
-      else {
-        vcol_attr = mesh->subd_attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
-      }
-
-      uchar4 *cdata = vcol_attr->data_uchar4();
-
-      for (BL::MeshPolygon &p : b_mesh.polygons) {
-        int n = p.loop_total();
-        for (int i = 0; i < n; i++) {
-          float4 color = get_float4(l.data[p.loop_start() + i].color());
-          /* Compress/encode vertex color using the sRGB curve. */
-          *(cdata++) = color_float4_to_uchar4(color);
-        }
-      }
-    }
-    else {
-      if (active_render) {
-        vcol_attr = mesh->attributes.add(vcol_std, vcol_name);
-      }
-      else {
-        vcol_attr = mesh->attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
-      }
-
-      uchar4 *cdata = vcol_attr->data_uchar4();
-
-      for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
-        int3 li = get_int3(t.loops());
-        float4 c1 = get_float4(l.data[li[0]].color());
-        float4 c2 = get_float4(l.data[li[1]].color());
-        float4 c3 = get_float4(l.data[li[2]].color());
-
-        /* Compress/encode vertex color using the sRGB curve. */
-        cdata[0] = color_float4_to_uchar4(c1);
-        cdata[1] = color_float4_to_uchar4(c2);
-        cdata[2] = color_float4_to_uchar4(c3);
-
-        cdata += 3;
-      }
-    }
-  }
-}
-
-/* Create uv map attributes. */
-static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
-{
-  if (b_mesh.uv_layers.length() != 0) {
-    for (BL::MeshUVLoopLayer &l : b_mesh.uv_layers) {
-      const bool active_render = l.active_render();
-      AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
-      ustring uv_name = ustring(l.name().c_str());
-      AttributeStandard tangent_std = (active_render) ? ATTR_STD_UV_TANGENT : ATTR_STD_NONE;
-      ustring tangent_name = ustring((string(l.name().c_str()) + ".tangent").c_str());
-
-      /* Denotes whether UV map was requested directly. */
-      const bool need_uv = mesh->need_attribute(scene, uv_name) ||
-                           mesh->need_attribute(scene, uv_std);
-      /* Denotes whether tangent was requested directly. */
-      const bool need_tangent = mesh->need_attribute(scene, tangent_name) ||
-                                (active_render && mesh->need_attribute(scene, tangent_std));
-
-      /* UV map */
-      /* NOTE: We create temporary UV layer if its needed for tangent but
-       * wasn't requested by other nodes in shaders.
-       */
-      Attribute *uv_attr = NULL;
-      if (need_uv || need_tangent) {
-        if (active_render) {
-          uv_attr = mesh->attributes.add(uv_std, uv_name);
-        }
-        else {
-          uv_attr = mesh->attributes.add(uv_name, TypeFloat2, ATTR_ELEMENT_CORNER);
-        }
-
-        float2 *fdata = uv_attr->data_float2();
-
-        for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
-          int3 li = get_int3(t.loops());
-          fdata[0] = get_float2(l.data[li[0]].uv());
-          fdata[1] = get_float2(l.data[li[1]].uv());
-          fdata[2] = get_float2(l.data[li[2]].uv());
-          fdata += 3;
-        }
-      }
-
-      /* UV tangent */
-      if (need_tangent) {
-        AttributeStandard sign_std = (active_render) ? ATTR_STD_UV_TANGENT_SIGN : ATTR_STD_NONE;
-        ustring sign_name = ustring((string(l.name().c_str()) + ".tangent_sign").c_str());
-        bool need_sign = (mesh->need_attribute(scene, sign_name) ||
-                          mesh->need_attribute(scene, sign_std));
-        mikk_compute_tangents(b_mesh, l.name().c_str(), mesh, need_sign, active_render);
-      }
-      /* Remove temporarily created UV attribute. */
-      if (!need_uv && uv_attr != NULL) {
-        mesh->attributes.remove(uv_attr);
-      }
-    }
-  }
-  else if (mesh->need_attribute(scene, ATTR_STD_UV_TANGENT)) {
-    bool need_sign = mesh->need_attribute(scene, ATTR_STD_UV_TANGENT_SIGN);
-    mikk_compute_tangents(b_mesh, NULL, mesh, need_sign, true);
-    if (!mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
-      mesh->attributes.remove(ATTR_STD_GENERATED);
-    }
-  }
-}
-
-static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
-{
-  if (b_mesh.uv_layers.length() != 0) {
-    BL::Mesh::uv_layers_iterator l;
-    int i = 0;
-
-    for (b_mesh.uv_layers.begin(l); l != b_mesh.uv_layers.end(); ++l, ++i) {
-      bool active_render = l->active_render();
-      AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
-      ustring uv_name = ustring(l->name().c_str());
-      AttributeStandard tangent_std = (active_render) ? ATTR_STD_UV_TANGENT : ATTR_STD_NONE;
-      ustring tangent_name = ustring((string(l->name().c_str()) + ".tangent").c_str());
-
-      /* Denotes whether UV map was requested directly. */
-      const bool need_uv = mesh->need_attribute(scene, uv_name) ||
-                           mesh->need_attribute(scene, uv_std);
-      /* Denotes whether tangent was requested directly. */
-      const bool need_tangent = mesh->need_attribute(scene, tangent_name) ||
-                                (active_render && mesh->need_attribute(scene, tangent_std));
-
-      Attribute *uv_attr = NULL;
-
-      /* UV map */
-      if (need_uv || need_tangent) {
-        if (active_render)
-          uv_attr = mesh->subd_attributes.add(uv_std, uv_name);
-        else
-          uv_attr = mesh->subd_attributes.add(uv_name, TypeFloat2, ATTR_ELEMENT_CORNER);
-
-        if (subdivide_uvs) {
-          uv_attr->flags |= ATTR_SUBDIVIDED;
-        }
-
-        float2 *fdata = uv_attr->data_float2();
-
-        for (BL::MeshPolygon &p : b_mesh.polygons) {
-          int n = p.loop_total();
-          for (int j = 0; j < n; j++) {
-            *(fdata++) = get_float2(l->data[p.loop_start() + j].uv());
-          }
-        }
-      }
-
-      /* UV tangent */
-      if (need_tangent) {
-        AttributeStandard sign_std = (active_render) ? ATTR_STD_UV_TANGENT_SIGN : ATTR_STD_NONE;
-        ustring sign_name = ustring((string(l->name().c_str()) + ".tangent_sign").c_str());
-        bool need_sign = (mesh->need_attribute(scene, sign_name) ||
-                          mesh->need_attribute(scene, sign_std));
-        mikk_compute_tangents(b_mesh, l->name().c_str(), mesh, need_sign, active_render);
-      }
-      /* Remove temporarily created UV attribute. */
-      if (!need_uv && uv_attr != NULL) {
-        mesh->subd_attributes.remove(uv_attr);
-      }
-    }
-  }
-  else if (mesh->need_attribute(scene, ATTR_STD_UV_TANGENT)) {
-    bool need_sign = mesh->need_attribute(scene, ATTR_STD_UV_TANGENT_SIGN);
-    mikk_compute_tangents(b_mesh, NULL, mesh, need_sign, true);
-    if (!mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
-      mesh->subd_attributes.remove(ATTR_STD_GENERATED);
-    }
-  }
-}
-
-/* Create vertex pointiness attributes. */
-
-/* Compare vertices by sum of their coordinates. */
-class VertexAverageComparator {
- public:
-  VertexAverageComparator(const array<float3> &verts) : verts_(verts)
-  {
-  }
-
-  bool operator()(const int &vert_idx_a, const int &vert_idx_b)
-  {
-    const float3 &vert_a = verts_[vert_idx_a];
-    const float3 &vert_b = verts_[vert_idx_b];
-    if (vert_a == vert_b) {
-      /* Special case for doubles, so we ensure ordering. */
-      return vert_idx_a > vert_idx_b;
-    }
-    const float x1 = vert_a.x + vert_a.y + vert_a.z;
-    const float x2 = vert_b.x + vert_b.y + vert_b.z;
-    return x1 < x2;
-  }
-
- protected:
-  const array<float3> &verts_;
-};
-
-static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
-{
-  if (!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
-    return;
-  }
-  const int num_verts = b_mesh.vertices.length();
-  if (num_verts == 0) {
-    return;
-  }
-  /* STEP 1: Find out duplicated vertices and point duplicates to a single
-   *         original vertex.
-   */
-  vector<int> sorted_vert_indeices(num_verts);
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    sorted_vert_indeices[vert_index] = vert_index;
-  }
-  VertexAverageComparator compare(mesh->get_verts());
-  sort(sorted_vert_indeices.begin(), sorted_vert_indeices.end(), compare);
-  /* This array stores index of the original vertex for the given vertex
-   * index.
-   */
-  vector<int> vert_orig_index(num_verts);
-  for (int sorted_vert_index = 0; sorted_vert_index < num_verts; ++sorted_vert_index) {
-    const int vert_index = sorted_vert_indeices[sorted_vert_index];
-    const float3 &vert_co = mesh->get_verts()[vert_index];
-    bool found = false;
-    for (int other_sorted_vert_index = sorted_vert_index + 1; other_sorted_vert_index < num_verts;
-         ++other_sorted_vert_index) {
-      const int other_vert_index = sorted_vert_indeices[other_sorted_vert_index];
-      const float3 &other_vert_co = mesh->get_verts()[other_vert_index];
-      /* We are too far away now, we wouldn't have duplicate. */
-      if ((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
-              (vert_co.x + vert_co.y + vert_co.z) >
-          3 * FLT_EPSILON) {
-        break;
-      }
-      /* Found duplicate. */
-      if (len_squared(other_vert_co - vert_co) < FLT_EPSILON) {
-        found = true;
-        vert_orig_index[vert_index] = other_vert_index;
-        break;
-      }
-    }
-    if (!found) {
-      vert_orig_index[vert_index] = vert_index;
-    }
-  }
-  /* Make sure we always points to the very first orig vertex. */
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    int orig_index = vert_orig_index[vert_index];
-    while (orig_index != vert_orig_index[orig_index]) {
-      orig_index = vert_orig_index[orig_index];
-    }
-    vert_orig_index[vert_index] = orig_index;
-  }
-  sorted_vert_indeices.free_memory();
-  /* STEP 2: Calculate vertex normals taking into account their possible
-   *         duplicates which gets "welded" together.
-   */
-  vector<float3> vert_normal(num_verts, zero_float3());
-  /* First we accumulate all vertex normals in the original index. */
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    const float3 normal = get_float3(b_mesh.vertices[vert_index].normal());
-    const int orig_index = vert_orig_index[vert_index];
-    vert_normal[orig_index] += normal;
-  }
-  /* Then we normalize the accumulated result and flush it to all duplicates
-   * as well.
-   */
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    const int orig_index = vert_orig_index[vert_index];
-    vert_normal[vert_index] = normalize(vert_normal[orig_index]);
-  }
-  /* STEP 3: Calculate pointiness using single ring neighborhood. */
-  vector<int> counter(num_verts, 0);
-  vector<float> raw_data(num_verts, 0.0f);
-  vector<float3> edge_accum(num_verts, zero_float3());
-  BL::Mesh::edges_iterator e;
-  EdgeMap visited_edges;
-  int edge_index = 0;
-  memset(&counter[0], 0, sizeof(int) * counter.size());
-  for (b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
-    const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
-              v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
-    if (visited_edges.exists(v0, v1)) {
-      continue;
-    }
-    visited_edges.insert(v0, v1);
-    float3 co0 = get_float3(b_mesh.vertices[v0].co()), co1 = get_float3(b_mesh.vertices[v1].co());
-    float3 edge = normalize(co1 - co0);
-    edge_accum[v0] += edge;
-    edge_accum[v1] += -edge;
-    ++counter[v0];
-    ++counter[v1];
-  }
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    const int orig_index = vert_orig_index[vert_index];
-    if (orig_index != vert_index) {
-      /* Skip duplicates, they'll be overwritten later on. */
-      continue;
-    }
-    if (counter[vert_index] > 0) {
-      const float3 normal = vert_normal[vert_index];
-      const float angle = safe_acosf(dot(normal, edge_accum[vert_index] / counter[vert_index]));
-      raw_data[vert_index] = angle * M_1_PI_F;
-    }
-    else {
-      raw_data[vert_index] = 0.0f;
-    }
-  }
-  /* STEP 3: Blur vertices to approximate 2 ring neighborhood. */
-  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
-  Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
-  float *data = attr->data_float();
-  memcpy(data, &raw_data[0], sizeof(float) * raw_data.size());
-  memset(&counter[0], 0, sizeof(int) * counter.size());
-  edge_index = 0;
-  visited_edges.clear();
-  for (b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
-    const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
-              v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
-    if (visited_edges.exists(v0, v1)) {
-      continue;
-    }
-    visited_edges.insert(v0, v1);
-    data[v0] += raw_data[v1];
-    data[v1] += raw_data[v0];
-    ++counter[v0];
-    ++counter[v1];
-  }
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    data[vert_index] /= counter[vert_index] + 1;
-  }
-  /* STEP 4: Copy attribute to the duplicated vertices. */
-  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
-    const int orig_index = vert_orig_index[vert_index];
-    data[vert_index] = data[orig_index];
-  }
-}
-
-/* The Random Per Island attribute is a random float associated with each
- * connected component (island) of the mesh. The attribute is computed by
- * first classifying the vertices into different sets using a Disjoint Set
- * data structure. Then the index of the root of each vertex (Which is the
- * representative of the set the vertex belongs to) is hashed and stored.
- *
- * We are using a face attribute to avoid interpolation during rendering,
- * allowing the user to safely hash the output further. Had we used vertex
- * attribute, the interpolation will introduce very slight variations,
- * making the output unsafe to hash. */
-static void attr_create_random_per_island(Scene *scene,
-                                          Mesh *mesh,
-                                          BL::Mesh &b_mesh,
-                                          bool subdivision)
-{
-  if (!mesh->need_attribute(scene, ATTR_STD_RANDOM_PER_ISLAND)) {
-    return;
-  }
-
-  int number_of_vertices = b_mesh.vertices.length();
-  if (number_of_vertices == 0) {
-    return;
-  }
-
-  DisjointSet vertices_sets(number_of_vertices);
-
-  for (BL::MeshEdge &e : b_mesh.edges) {
-    vertices_sets.join(e.vertices()[0], e.vertices()[1]);
-  }
-
-  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
-  Attribute *attribute = attributes.add(ATTR_STD_RANDOM_PER_ISLAND);
-  float *data = attribute->data_float();
-
-  if (!subdivision) {
-    for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
-      data[t.index()] = hash_uint_to_float(vertices_sets.find(t.vertices()[0]));
-    }
-  }
-  else {
-    for (BL::MeshPolygon &p : b_mesh.polygons) {
-      data[p.index()] = hash_uint_to_float(vertices_sets.find(p.vertices()[0]));
-    }
-  }
-}
-
-/* Create Mesh */
-
-static void create_mesh(Scene *scene,
-                        Mesh *mesh,
-                        BL::Mesh &b_mesh,
-                        const array<Node *> &used_shaders,
-                        const bool need_motion,
-                        const float motion_scale,
-                        const bool subdivision = false,
-                        const bool subdivide_uvs = true)
-{
-  /* count vertices and faces */
-  int numverts = b_mesh.vertices.length();
-  int numfaces = (!subdivision) ? b_mesh.loop_triangles.length() : b_mesh.polygons.length();
-  int numtris = 0;
-  int numcorners = 0;
-  int numngons = 0;
-  bool use_loop_normals = b_mesh.use_auto_smooth() &&
-                          (mesh->get_subdivision_type() != Mesh::SUBDIVISION_CATMULL_CLARK);
-
-  /* If no faces, create empty mesh. */
-  if (numfaces == 0) {
-    return;
-  }
-
-  if (!subdivision) {
-    numtris = numfaces;
-  }
-  else {
-    for (BL::MeshPolygon &p : b_mesh.polygons) {
-      numngons += (p.loop_total() == 4) ? 0 : 1;
-      numcorners += p.loop_total();
-    }
-  }
-
-  /* allocate memory */
-  if (subdivision) {
-    mesh->reserve_subd_faces(numfaces, numngons, numcorners);
-  }
-
-  mesh->reserve_mesh(numverts, numtris);
-
-  /* create vertex coordinates and normals */
-  BL::Mesh::vertices_iterator v;
-  for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v)
-    mesh->add_vertex(get_float3(v->co()));
-
-  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
-  Attribute *attr_N = attributes.add(ATTR_STD_VERTEX_NORMAL);
-  float3 *N = attr_N->data_float3();
-
-  for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++N)
-    *N = get_float3(v->normal());
-  N = attr_N->data_float3();
-
-  /* create generated coordinates from undeformed coordinates */
-  const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.length() == 0) &&
-                                    (mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
-  if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
-    Attribute *attr = attributes.add(ATTR_STD_GENERATED);
-    attr->flags |= ATTR_SUBDIVIDED;
-
-    float3 loc, size;
-    mesh_texture_space(b_mesh, loc, size);
-
-    float3 *generated = attr->data_float3();
-    size_t i = 0;
-
-    for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v) {
-      generated[i++] = get_float3(v->undeformed_co()) * size - loc;
-    }
-  }
-
-  /* create faces */
-  if (!subdivision) {
-    for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
-      BL::MeshPolygon p = b_mesh.polygons[t.polygon_index()];
-      int3 vi = get_int3(t.vertices());
-
-      int shader = clamp(p.material_index(), 0, used_shaders.size() - 1);
-      bool smooth = p.use_smooth() || use_loop_normals;
-
-      if (use_loop_normals) {
-        BL::Array<float, 9> loop_normals = t.split_normals();
-        for (int i = 0; i < 3; i++) {
-          N[vi[i]] = make_float3(
-              loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
-        }
-      }
-
-      /* Create triangles.
-       *
-       * NOTE: Autosmooth is already taken care about.
-       */
-      mesh->add_triangle(vi[0], vi[1], vi[2], shader, smooth);
-    }
-  }
-  else {
-    vector<int> vi;
-
-    for (BL::MeshPolygon &p : b_mesh.polygons) {
-      int n = p.loop_total();
-      int shader = clamp(p.material_index(), 0, used_shaders.size() - 1);
-      bool smooth = p.use_smooth() || use_loop_normals;
-
-      vi.resize(n);
-      for (int i = 0; i < n; i++) {
-        /* NOTE: Autosmooth is already taken care about. */
-        vi[i] = b_mesh.loops[p.loop_start() + i].vertex_index();
-      }
-
-      /* create subd faces */
-      mesh->add_subd_face(&vi[0], n, shader, smooth);
-    }
-  }
-
-  /* Create all needed attributes.
-   * The calculate functions will check whether they're needed or not.
-   */
-  attr_create_pointiness(scene, mesh, b_mesh, subdivision);
-  attr_create_vertex_color(scene, mesh, b_mesh, subdivision);
-  attr_create_sculpt_vertex_color(scene, mesh, b_mesh, subdivision);
-  attr_create_random_per_island(scene, mesh, b_mesh, subdivision);
-  attr_create_generic(scene, mesh, b_mesh, subdivision, need_motion, motion_scale);
-
-  if (subdivision) {
-    attr_create_subd_uv_map(scene, mesh, b_mesh, subdivide_uvs);
-  }
-  else {
-    attr_create_uv_map(scene, mesh, b_mesh);
-  }
-
-  /* For volume objects, create a matrix to transform from object space to
-   * mesh texture space. this does not work with deformations but that can
-   * probably only be done well with a volume grid mapping of coordinates. */
-  if (mesh->need_attribute(scene, ATTR_STD_GENERATED_TRANSFORM)) {
-    Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED_TRANSFORM);
-    Transform *tfm = attr->data_transform();
-
-    float3 loc, size;
-    mesh_texture_space(b_mesh, loc, size);
-
-    *tfm = transform_translate(-loc) * transform_scale(size);
-  }
-}
-
-static void create_subd_mesh(Scene *scene,
-                             Mesh *mesh,
-                             BObjectInfo &b_ob_info,
-                             BL::Mesh &b_mesh,
-                             const array<Node *> &used_shaders,
-                             const bool need_motion,
-                             const float motion_scale,
-                             float dicing_rate,
-                             int max_subdivisions)
-{
-  BL::Object b_ob = b_ob_info.real_object;
-
-  BL::SubsurfModifier subsurf_mod(b_ob.modifiers[b_ob.modifiers.length() - 1]);
-  bool subdivide_uvs = subsurf_mod.uv_smooth() != BL::SubsurfModifier::uv_smooth_NONE;
-
-  create_mesh(scene, mesh, b_mesh, used_shaders, need_motion, motion_scale, true, subdivide_uvs);
-
-  /* export creases */
-  size_t num_creases = 0;
-
-  for (BL::MeshEdge &e : b_mesh.edges) {
-    if (e.crease() != 0.0f) {
-      num_creases++;
-    }
-  }
-
-  mesh->reserve_subd_creases(num_creases);
-
-  for (BL::MeshEdge &e : b_mesh.edges) {
-    if (e.crease() != 0.0f) {
-      mesh->add_crease(e.vertices()[0], e.vertices()[1], e.crease());
-    }
-  }
-
-  /* set subd params */
-  PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
-  float subd_dicing_rate = max(0.1f, RNA_float_get(&cobj, "dicing_rate") * dicing_rate);
-
-  mesh->set_subd_dicing_rate(subd_dicing_rate);
-  mesh->set_subd_max_level(max_subdivisions);
-  mesh->set_subd_objecttoworld(get_transform(b_ob.matrix_world()));
-}
-
-/* Sync */
-
-/* Check whether some of "built-in" motion-related attributes are needed to be exported (includes
- * things like velocity from cache modifier, fluid simulation).
- *
- * NOTE: This code is run prior to object motion blur initialization. so can not access properties
- * set by `sync_object_motion_init()`. */
-static bool mesh_need_motion_attribute(BObjectInfo &b_ob_info, Scene *scene)
-{
-  const Scene::MotionType need_motion = scene->need_motion();
-  if (need_motion == Scene::MOTION_NONE) {
-    /* Simple case: neither motion pass nor motion blur is needed, no need in the motion related
-     * attributes. */
-    return false;
-  }
-
-  if (need_motion == Scene::MOTION_BLUR) {
-    /* A bit tricky and implicit case:
-     * - Motion blur is enabled in the scene, which implies specific number of time steps for
-     *   objects.
-     * - If the object has motion blur disabled on it, it will have 0 time steps.
-     * - Motion attribute expects non-zero time steps.
-     *
-     * Avoid adding motion attributes if the motion blur will enforce 0 motion steps. */
-    PointerRNA cobject = RNA_pointer_get(&b_ob_info.real_object.ptr, "cycles");
-    const bool use_motion = get_boolean(cobject, "use_motion_blur");
-    if (!use_motion) {
-      return false;
-    }
-  }
-
-  /* Motion pass which implies 3 motion steps, or motion blur which is not disabled on object
-   * level. */
-  return true;
-}
-
-void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Mesh *mesh)
-{
-  /* make a copy of the shaders as the caller in the main thread still need them for syncing the
-   * attributes */
-  array<Node *> used_shaders = mesh->get_used_shaders();
-
-  Mesh new_mesh;
-  new_mesh.set_used_shaders(used_shaders);
-
-  if (view_layer.use_surfaces) {
-    /* Adaptive subdivision setup. Not for baking since that requires
-     * exact mapping to the Blender mesh. */
-    if (!scene->bake_manager->get_baking()) {
-      new_mesh.set_subdivision_type(
-          object_subdivision_type(b_ob_info.real_object, preview, experimental));
-    }
-
-    /* For some reason, meshes do not need this... */
-    bool need_undeformed = new_mesh.need_attribute(scene, ATTR_STD_GENERATED);
-    BL::Mesh b_mesh = object_to_mesh(
-        b_data, b_ob_info, b_depsgraph, need_undeformed, new_mesh.get_subdivision_type());
-
-    if (b_mesh) {
-      /* Motion blur attribute is relative to seconds, we need it relative to frames. */
-      const bool need_motion = mesh_need_motion_attribute(b_ob_info, scene);
-      const float motion_scale = (need_motion) ?
-                                     scene->motion_shutter_time() /
-                                         (b_scene.render().fps() / b_scene.render().fps_base()) :
-                                     0.0f;
-
-      /* Sync mesh itself. */
-      if (new_mesh.get_subdivision_type() != Mesh::SUBDIVISION_NONE)
-        create_subd_mesh(scene,
-                         &new_mesh,
-                         b_ob_info,
-                         b_mesh,
-                         new_mesh.get_used_shaders(),
-                         need_motion,
-                         motion_scale,
-                         dicing_rate,
-                         max_subdivisions);
-      else
-        create_mesh(scene,
-                    &new_mesh,
-                    b_mesh,
-                    new_mesh.get_used_shaders(),
-                    need_motion,
-                    motion_scale,
-                    false);
-
-      free_object_to_mesh(b_data, b_ob_info, b_mesh);
-    }
-  }
-
-  /* update original sockets */
-
-  mesh->clear_non_sockets();
-
-  for (const SocketType &socket : new_mesh.type->inputs) {
-    /* Those sockets are updated in sync_object, so do not modify them. */
-    if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
-        socket.name == "used_shaders") {
-      continue;
-    }
-    mesh->set_value(socket, new_mesh, socket);
-  }
-
-  mesh->attributes.update(std::move(new_mesh.attributes));
-  mesh->subd_attributes.update(std::move(new_mesh.subd_attributes));
-
-  mesh->set_num_subd_faces(new_mesh.get_num_subd_faces());
-
-  /* tag update */
-  bool rebuild = (mesh->triangles_is_modified()) || (mesh->subd_num_corners_is_modified()) ||
-                 (mesh->subd_shader_is_modified()) || (mesh->subd_smooth_is_modified()) ||
-                 (mesh->subd_ptex_offset_is_modified()) ||
-                 (mesh->subd_start_corner_is_modified()) ||
-                 (mesh->subd_face_corners_is_modified());
-
-  mesh->tag_update(scene, rebuild);
-}
-
-void BlenderSync::sync_mesh_motion(BL::Depsgraph b_depsgraph,
-                                   BObjectInfo &b_ob_info,
-                                   Mesh *mesh,
-                                   int motion_step)
-{
-  /* Skip if no vertices were exported. */
-  size_t numverts = mesh->get_verts().size();
-  if (numverts == 0) {
-    return;
-  }
-
-  /* Skip objects without deforming modifiers. this is not totally reliable,
-   * would need a more extensive check to see which objects are animated. */
-  BL::Mesh b_mesh(PointerRNA_NULL);
-  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
-    /* get derived mesh */
-    b_mesh = object_to_mesh(b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
-  }
-
-  const std::string ob_name = b_ob_info.real_object.name();
-
-  /* TODO(sergey): Perform preliminary check for number of vertices. */
-  if (b_mesh) {
-    /* Export deformed coordinates. */
-    /* Find attributes. */
-    Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
-    Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
-    bool new_attribute = false;
-    /* Add new attributes if they don't exist already. */
-    if (!attr_mP) {
-      attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
-      if (attr_N)
-        attr_mN = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_NORMAL);
-
-      new_attribute = true;
-    }
-    /* Load vertex data from mesh. */
-    float3 *mP = attr_mP->data_float3() + motion_step * numverts;
-    float3 *mN = (attr_mN) ? attr_mN->data_float3() + motion_step * numverts : NULL;
-    /* NOTE: We don't copy more that existing amount of vertices to prevent
-     * possible memory corruption.
-     */
-    BL::Mesh::vertices_iterator v;
-    int i = 0;
-    for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end() && i < numverts; ++v, ++i) {
-      mP[i] = get_float3(v->co());
-      if (mN)
-        mN[i] = get_float3(v->normal());
-    }
-    if (new_attribute) {
-      /* In case of new attribute, we verify if there really was any motion. */
-      if (b_mesh.vertices.length() != numverts ||
-          memcmp(mP, &mesh->get_verts()[0], sizeof(float3) * numverts) == 0) {
-        /* no motion, remove attributes again */
-        if (b_mesh.vertices.length() != numverts) {
-          VLOG(1) << "Topology differs, disabling motion blur for object " << ob_name;
-        }
-        else {
-          VLOG(1) << "No actual deformation motion for object " << ob_name;
-        }
-        mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
-        if (attr_mN)
-          mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL);
-      }
-      else if (motion_step > 0) {
-        VLOG(1) << "Filling deformation motion for object " << ob_name;
-        /* motion, fill up previous steps that we might have skipped because
-         * they had no motion, but we need them anyway now */
-        float3 *P = &mesh->get_verts()[0];
-        float3 *N = (attr_N) ? attr_N->data_float3() : NULL;
-        for (int step = 0; step < motion_step; step++) {
-          memcpy(attr_mP->data_float3() + step * numverts, P, sizeof(float3) * numverts);
-          if (attr_mN)
-            memcpy(attr_mN->data_float3() + step * numverts, N, sizeof(float3) * numverts);
-        }
-      }
-    }
-    else {
-      if (b_mesh.vertices.length() != numverts) {
-        VLOG(1) << "Topology differs, discarding motion blur for object " << ob_name << " at time "
-                << motion_step;
-        memcpy(mP, &mesh->get_verts()[0], sizeof(float3) * numverts);
-        if (mN != NULL) {
-          memcpy(mN, attr_N->data_float3(), sizeof(float3) * numverts);
-        }
-      }
-    }
-
-    free_object_to_mesh(b_data, b_ob_info, b_mesh);
-    return;
-  }
-
-  /* No deformation on this frame, copy coordinates if other frames did have it. */
-  mesh->copy_center_to_motion_step(motion_step);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
deleted file mode 100644
index 75311805fd8..00000000000
--- a/intern/cycles/blender/blender_object.cpp
+++ /dev/null
@@ -1,769 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/alembic.h"
-#include "scene/camera.h"
-#include "scene/integrator.h"
-#include "scene/light.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/particles.h"
-#include "scene/scene.h"
-#include "scene/shader.h"
-#include "scene/shader_graph.h"
-#include "scene/shader_nodes.h"
-
-#include "blender/blender_object_cull.h"
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_task.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Utilities */
-
-bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
-{
-  /* test if we can instance or if the object is modified */
-  if (b_ob.type() == BL::Object::type_META) {
-    /* multi-user and dupli metaballs are fused, can't instance */
-    return true;
-  }
-  else if (ccl::BKE_object_is_modified(b_ob, b_scene, preview)) {
-    /* modifiers */
-    return true;
-  }
-  else {
-    /* object level material links */
-    for (BL::MaterialSlot &b_slot : b_ob.material_slots) {
-      if (b_slot.link() == BL::MaterialSlot::link_OBJECT) {
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-bool BlenderSync::object_is_geometry(BL::Object &b_ob)
-{
-  BL::ID b_ob_data = b_ob.data();
-
-  if (!b_ob_data) {
-    return false;
-  }
-
-  BL::Object::type_enum type = b_ob.type();
-
-  if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
-    /* Will be exported attached to mesh. */
-    return true;
-  }
-  else if (type == BL::Object::type_CURVE) {
-    /* Skip exporting curves without faces, overhead can be
-     * significant if there are many for path animation. */
-    BL::Curve b_curve(b_ob_data);
-
-    return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
-            b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
-  }
-  else {
-    return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
-            b_ob_data.is_a(&RNA_MetaBall));
-  }
-}
-
-bool BlenderSync::object_is_light(BL::Object &b_ob)
-{
-  BL::ID b_ob_data = b_ob.data();
-
-  return (b_ob_data && b_ob_data.is_a(&RNA_Light));
-}
-
-void BlenderSync::sync_object_motion_init(BL::Object &b_parent, BL::Object &b_ob, Object *object)
-{
-  /* Initialize motion blur for object, detecting if it's enabled and creating motion
-   * steps array if so. */
-  array<Transform> motion;
-  object->set_motion(motion);
-
-  Geometry *geom = object->get_geometry();
-  if (!geom) {
-    return;
-  }
-
-  int motion_steps = 0;
-  bool use_motion_blur = false;
-
-  Scene::MotionType need_motion = scene->need_motion();
-  if (need_motion == Scene::MOTION_BLUR) {
-    motion_steps = object_motion_steps(b_parent, b_ob, Object::MAX_MOTION_STEPS);
-    if (motion_steps && object_use_deform_motion(b_parent, b_ob)) {
-      use_motion_blur = true;
-    }
-  }
-  else if (need_motion != Scene::MOTION_NONE) {
-    motion_steps = 3;
-  }
-
-  geom->set_use_motion_blur(use_motion_blur);
-  geom->set_motion_steps(motion_steps);
-
-  motion.resize(motion_steps, transform_empty());
-
-  if (motion_steps) {
-    motion[motion_steps / 2] = object->get_tfm();
-
-    /* update motion socket before trying to access object->motion_time */
-    object->set_motion(motion);
-
-    for (size_t step = 0; step < motion_steps; step++) {
-      motion_times.insert(object->motion_time(step));
-    }
-  }
-}
-
-Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
-                                 BL::ViewLayer &b_view_layer,
-                                 BL::DepsgraphObjectInstance &b_instance,
-                                 float motion_time,
-                                 bool use_particle_hair,
-                                 bool show_lights,
-                                 BlenderObjectCulling &culling,
-                                 bool *use_portal,
-                                 TaskPool *geom_task_pool)
-{
-  const bool is_instance = b_instance.is_instance();
-  BL::Object b_ob = b_instance.object();
-  BL::Object b_parent = is_instance ? b_instance.parent() : b_instance.object();
-  BObjectInfo b_ob_info{b_ob, is_instance ? b_instance.instance_object() : b_ob, b_ob.data()};
-  const bool motion = motion_time != 0.0f;
-  /*const*/ Transform tfm = get_transform(b_ob.matrix_world());
-  int *persistent_id = NULL;
-  BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id_array;
-  if (is_instance) {
-    persistent_id_array = b_instance.persistent_id();
-    persistent_id = persistent_id_array.data;
-  }
-
-  /* light is handled separately */
-  if (!motion && object_is_light(b_ob)) {
-    if (!show_lights) {
-      return NULL;
-    }
-
-    /* TODO: don't use lights for excluded layers used as mask layer,
-     * when dynamic overrides are back. */
-#if 0
-    if (!((layer_flag & view_layer.holdout_layer) && (layer_flag & view_layer.exclude_layer)))
-#endif
-    {
-      sync_light(b_parent,
-                 persistent_id,
-                 b_ob_info,
-                 is_instance ? b_instance.random_id() : 0,
-                 tfm,
-                 use_portal);
-    }
-
-    return NULL;
-  }
-
-  /* only interested in object that we can create meshes from */
-  if (!object_is_geometry(b_ob)) {
-    return NULL;
-  }
-
-  /* Perform object culling. */
-  if (culling.test(scene, b_ob, tfm)) {
-    return NULL;
-  }
-
-  /* Visibility flags for both parent and child. */
-  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
-  bool use_holdout = b_parent.holdout_get(PointerRNA_NULL, b_view_layer);
-  uint visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL_VISIBILITY;
-
-  if (b_parent.ptr.data != b_ob.ptr.data) {
-    visibility &= object_ray_visibility(b_parent);
-  }
-
-  /* TODO: make holdout objects on excluded layer invisible for non-camera rays. */
-#if 0
-  if (use_holdout && (layer_flag & view_layer.exclude_layer)) {
-    visibility &= ~(PATH_RAY_ALL_VISIBILITY - PATH_RAY_CAMERA);
-  }
-#endif
-
-  /* Clear camera visibility for indirect only objects. */
-  bool use_indirect_only = !use_holdout &&
-                           b_parent.indirect_only_get(PointerRNA_NULL, b_view_layer);
-  if (use_indirect_only) {
-    visibility &= ~PATH_RAY_CAMERA;
-  }
-
-  /* Don't export completely invisible objects. */
-  if (visibility == 0) {
-    return NULL;
-  }
-
-  /* Use task pool only for non-instances, since sync_dupli_particle accesses
-   * geometry. This restriction should be removed for better performance. */
-  TaskPool *object_geom_task_pool = (is_instance) ? NULL : geom_task_pool;
-
-  /* key to lookup object */
-  ObjectKey key(b_parent, persistent_id, b_ob_info.real_object, use_particle_hair);
-  Object *object;
-
-  /* motion vector case */
-  if (motion) {
-    object = object_map.find(key);
-
-    if (object && object->use_motion()) {
-      /* Set transform at matching motion time step. */
-      int time_index = object->motion_step(motion_time);
-      if (time_index >= 0) {
-        array<Transform> motion = object->get_motion();
-        motion[time_index] = tfm;
-        object->set_motion(motion);
-      }
-
-      /* mesh deformation */
-      if (object->get_geometry())
-        sync_geometry_motion(
-            b_depsgraph, b_ob_info, object, motion_time, use_particle_hair, object_geom_task_pool);
-    }
-
-    return object;
-  }
-
-  /* test if we need to sync */
-  bool object_updated = object_map.add_or_update(&object, b_ob, b_parent, key) ||
-                        (tfm != object->get_tfm());
-
-  /* mesh sync */
-  Geometry *geometry = sync_geometry(
-      b_depsgraph, b_ob_info, object_updated, use_particle_hair, object_geom_task_pool);
-  object->set_geometry(geometry);
-
-  /* special case not tracked by object update flags */
-
-  if (sync_object_attributes(b_instance, object)) {
-    object_updated = true;
-  }
-
-  /* holdout */
-  object->set_use_holdout(use_holdout);
-
-  object->set_visibility(visibility);
-
-  object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
-
-  float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
-  object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
-
-  float shadow_terminator_geometry_offset = get_float(cobject,
-                                                      "shadow_terminator_geometry_offset");
-  object->set_shadow_terminator_geometry_offset(shadow_terminator_geometry_offset);
-
-  float ao_distance = get_float(cobject, "ao_distance");
-  if (ao_distance == 0.0f && b_parent.ptr.data != b_ob.ptr.data) {
-    PointerRNA cparent = RNA_pointer_get(&b_parent.ptr, "cycles");
-    ao_distance = get_float(cparent, "ao_distance");
-  }
-  object->set_ao_distance(ao_distance);
-
-  /* sync the asset name for Cryptomatte */
-  BL::Object parent = b_ob.parent();
-  ustring parent_name;
-  if (parent) {
-    while (parent.parent()) {
-      parent = parent.parent();
-    }
-    parent_name = parent.name();
-  }
-  else {
-    parent_name = b_ob.name();
-  }
-  object->set_asset_name(parent_name);
-
-  /* object sync
-   * transform comparison should not be needed, but duplis don't work perfect
-   * in the depsgraph and may not signal changes, so this is a workaround */
-  if (object->is_modified() || object_updated ||
-      (object->get_geometry() && object->get_geometry()->is_modified())) {
-    object->name = b_ob.name().c_str();
-    object->set_pass_id(b_ob.pass_index());
-    object->set_color(get_float3(b_ob.color()));
-    object->set_tfm(tfm);
-
-    /* dupli texture coordinates and random_id */
-    if (is_instance) {
-      object->set_dupli_generated(0.5f * get_float3(b_instance.orco()) -
-                                  make_float3(0.5f, 0.5f, 0.5f));
-      object->set_dupli_uv(get_float2(b_instance.uv()));
-      object->set_random_id(b_instance.random_id());
-    }
-    else {
-      object->set_dupli_generated(zero_float3());
-      object->set_dupli_uv(zero_float2());
-      object->set_random_id(hash_uint2(hash_string(object->name.c_str()), 0));
-    }
-
-    object->tag_update(scene);
-  }
-
-  sync_object_motion_init(b_parent, b_ob, object);
-
-  if (is_instance) {
-    /* Sync possible particle data. */
-    sync_dupli_particle(b_parent, b_instance, object);
-  }
-
-  return object;
-}
-
-/* This function mirrors drw_uniform_property_lookup in draw_instance_data.cpp */
-static bool lookup_property(BL::ID b_id, const string &name, float4 *r_value)
-{
-  PointerRNA ptr;
-  PropertyRNA *prop;
-
-  if (!RNA_path_resolve(&b_id.ptr, name.c_str(), &ptr, &prop)) {
-    return false;
-  }
-
-  if (prop == NULL) {
-    return false;
-  }
-
-  PropertyType type = RNA_property_type(prop);
-  int arraylen = RNA_property_array_length(&ptr, prop);
-
-  if (arraylen == 0) {
-    float value;
-
-    if (type == PROP_FLOAT)
-      value = RNA_property_float_get(&ptr, prop);
-    else if (type == PROP_INT)
-      value = static_cast<float>(RNA_property_int_get(&ptr, prop));
-    else
-      return false;
-
-    *r_value = make_float4(value, value, value, 1.0f);
-    return true;
-  }
-  else if (type == PROP_FLOAT && arraylen <= 4) {
-    *r_value = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-    RNA_property_float_get_array(&ptr, prop, &r_value->x);
-    return true;
-  }
-
-  return false;
-}
-
-/* This function mirrors drw_uniform_attribute_lookup in draw_instance_data.cpp */
-static float4 lookup_instance_property(BL::DepsgraphObjectInstance &b_instance,
-                                       const string &name,
-                                       bool use_instancer)
-{
-  string idprop_name = string_printf("[\"%s\"]", name.c_str());
-  float4 value;
-
-  /* If requesting instance data, check the parent particle system and object. */
-  if (use_instancer && b_instance.is_instance()) {
-    BL::ParticleSystem b_psys = b_instance.particle_system();
-
-    if (b_psys) {
-      if (lookup_property(b_psys.settings(), idprop_name, &value) ||
-          lookup_property(b_psys.settings(), name, &value)) {
-        return value;
-      }
-    }
-    if (lookup_property(b_instance.parent(), idprop_name, &value) ||
-        lookup_property(b_instance.parent(), name, &value)) {
-      return value;
-    }
-  }
-
-  /* Check the object and mesh. */
-  BL::Object b_ob = b_instance.object();
-  BL::ID b_data = b_ob.data();
-
-  if (lookup_property(b_ob, idprop_name, &value) || lookup_property(b_ob, name, &value) ||
-      lookup_property(b_data, idprop_name, &value) || lookup_property(b_data, name, &value)) {
-    return value;
-  }
-
-  return make_float4(0.0f);
-}
-
-bool BlenderSync::sync_object_attributes(BL::DepsgraphObjectInstance &b_instance, Object *object)
-{
-  /* Find which attributes are needed. */
-  AttributeRequestSet requests = object->get_geometry()->needed_attributes();
-
-  /* Delete attributes that became unnecessary. */
-  vector<ParamValue> &attributes = object->attributes;
-  bool changed = false;
-
-  for (int i = attributes.size() - 1; i >= 0; i--) {
-    if (!requests.find(attributes[i].name())) {
-      attributes.erase(attributes.begin() + i);
-      changed = true;
-    }
-  }
-
-  /* Update attribute values. */
-  foreach (AttributeRequest &req, requests.requests) {
-    ustring name = req.name;
-
-    std::string real_name;
-    BlenderAttributeType type = blender_attribute_name_split_type(name, &real_name);
-
-    if (type != BL::ShaderNodeAttribute::attribute_type_GEOMETRY) {
-      bool use_instancer = (type == BL::ShaderNodeAttribute::attribute_type_INSTANCER);
-      float4 value = lookup_instance_property(b_instance, real_name, use_instancer);
-
-      /* Try finding the existing attribute value. */
-      ParamValue *param = NULL;
-
-      for (size_t i = 0; i < attributes.size(); i++) {
-        if (attributes[i].name() == name) {
-          param = &attributes[i];
-          break;
-        }
-      }
-
-      /* Replace or add the value. */
-      ParamValue new_param(name, TypeDesc::TypeFloat4, 1, &value);
-      assert(new_param.datasize() == sizeof(value));
-
-      if (!param) {
-        changed = true;
-        attributes.push_back(new_param);
-      }
-      else if (memcmp(param->data(), &value, sizeof(value)) != 0) {
-        changed = true;
-        *param = new_param;
-      }
-    }
-  }
-
-  return changed;
-}
-
-/* Object Loop */
-
-void BlenderSync::sync_procedural(BL::Object &b_ob,
-                                  BL::MeshSequenceCacheModifier &b_mesh_cache,
-                                  bool has_subdivision_modifier)
-{
-#ifdef WITH_ALEMBIC
-  BL::CacheFile cache_file = b_mesh_cache.cache_file();
-  void *cache_file_key = cache_file.ptr.data;
-
-  AlembicProcedural *procedural = static_cast<AlembicProcedural *>(
-      procedural_map.find(cache_file_key));
-
-  if (procedural == nullptr) {
-    procedural = scene->create_node<AlembicProcedural>();
-    procedural_map.add(cache_file_key, procedural);
-  }
-  else {
-    procedural_map.used(procedural);
-  }
-
-  float current_frame = static_cast<float>(b_scene.frame_current());
-  if (cache_file.override_frame()) {
-    current_frame = cache_file.frame();
-  }
-
-  if (!cache_file.override_frame()) {
-    procedural->set_start_frame(static_cast<float>(b_scene.frame_start()));
-    procedural->set_end_frame(static_cast<float>(b_scene.frame_end()));
-  }
-
-  procedural->set_frame(current_frame);
-  procedural->set_frame_rate(b_scene.render().fps() / b_scene.render().fps_base());
-  procedural->set_frame_offset(cache_file.frame_offset());
-
-  string absolute_path = blender_absolute_path(b_data, b_ob, b_mesh_cache.cache_file().filepath());
-  procedural->set_filepath(ustring(absolute_path));
-
-  procedural->set_scale(cache_file.scale());
-
-  procedural->set_use_prefetch(cache_file.use_prefetch());
-  procedural->set_prefetch_cache_size(cache_file.prefetch_cache_size());
-
-  /* create or update existing AlembicObjects */
-  ustring object_path = ustring(b_mesh_cache.object_path());
-
-  AlembicObject *abc_object = procedural->get_or_create_object(object_path);
-
-  array<Node *> used_shaders = find_used_shaders(b_ob);
-  abc_object->set_used_shaders(used_shaders);
-
-  PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
-  const float subd_dicing_rate = max(0.1f, RNA_float_get(&cobj, "dicing_rate") * dicing_rate);
-  abc_object->set_subd_dicing_rate(subd_dicing_rate);
-  abc_object->set_subd_max_level(max_subdivisions);
-
-  abc_object->set_ignore_subdivision(!has_subdivision_modifier);
-
-  if (abc_object->is_modified() || procedural->is_modified()) {
-    procedural->tag_update(scene);
-  }
-#else
-  (void)b_ob;
-  (void)b_mesh_cache;
-  (void)has_subdivision_modifier;
-#endif
-}
-
-void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
-                               BL::SpaceView3D &b_v3d,
-                               float motion_time)
-{
-  /* Task pool for multithreaded geometry sync. */
-  TaskPool geom_task_pool;
-
-  /* layer data */
-  bool motion = motion_time != 0.0f;
-
-  if (!motion) {
-    /* prepare for sync */
-    light_map.pre_sync();
-    geometry_map.pre_sync();
-    object_map.pre_sync();
-    procedural_map.pre_sync();
-    particle_system_map.pre_sync();
-    motion_times.clear();
-  }
-  else {
-    geometry_motion_synced.clear();
-  }
-
-  /* initialize culling */
-  BlenderObjectCulling culling(scene, b_scene);
-
-  /* object loop */
-  bool cancel = false;
-  bool use_portal = false;
-  const bool show_lights = BlenderViewportParameters(b_v3d, use_developer_ui).use_scene_lights;
-
-  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
-  BL::Depsgraph::object_instances_iterator b_instance_iter;
-
-  for (b_depsgraph.object_instances.begin(b_instance_iter);
-       b_instance_iter != b_depsgraph.object_instances.end() && !cancel;
-       ++b_instance_iter) {
-    BL::DepsgraphObjectInstance b_instance = *b_instance_iter;
-    BL::Object b_ob = b_instance.object();
-
-    /* Viewport visibility. */
-    const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d);
-    if (show_in_viewport == false) {
-      continue;
-    }
-
-    /* Load per-object culling data. */
-    culling.init_object(scene, b_ob);
-
-    /* Ensure the object geom supporting the hair is processed before adding
-     * the hair processing task to the task pool, calling .to_mesh() on the
-     * same object in parallel does not work. */
-    const bool sync_hair = b_instance.show_particles() && object_has_particle_hair(b_ob);
-
-    /* Object itself. */
-    if (b_instance.show_self()) {
-#ifdef WITH_ALEMBIC
-      bool use_procedural = false;
-      bool has_subdivision_modifier = false;
-      BL::MeshSequenceCacheModifier b_mesh_cache(PointerRNA_NULL);
-
-      /* Experimental as Blender does not have good support for procedurals at the moment, also
-       * only available in preview renders since currently do not have a good cache policy, the
-       * data being loaded at once for all the frames. */
-      if (experimental && b_v3d) {
-        b_mesh_cache = object_mesh_cache_find(b_ob, &has_subdivision_modifier);
-        use_procedural = b_mesh_cache && b_mesh_cache.cache_file().use_render_procedural();
-      }
-
-      if (use_procedural) {
-        /* Skip in the motion case, as generating motion blur data will be handled in the
-         * procedural. */
-        if (!motion) {
-          sync_procedural(b_ob, b_mesh_cache, has_subdivision_modifier);
-        }
-      }
-      else
-#endif
-      {
-        sync_object(b_depsgraph,
-                    b_view_layer,
-                    b_instance,
-                    motion_time,
-                    false,
-                    show_lights,
-                    culling,
-                    &use_portal,
-                    sync_hair ? NULL : &geom_task_pool);
-      }
-    }
-
-    /* Particle hair as separate object. */
-    if (sync_hair) {
-      sync_object(b_depsgraph,
-                  b_view_layer,
-                  b_instance,
-                  motion_time,
-                  true,
-                  show_lights,
-                  culling,
-                  &use_portal,
-                  &geom_task_pool);
-    }
-
-    cancel = progress.get_cancel();
-  }
-
-  geom_task_pool.wait_work();
-
-  progress.set_sync_status("");
-
-  if (!cancel && !motion) {
-    sync_background_light(b_v3d, use_portal);
-
-    /* Handle removed data and modified pointers, as this may free memory, delete Nodes in the
-     * right order to ensure that dependent data is freed after their users. Objects should be
-     * freed before particle systems and geometries. */
-    light_map.post_sync();
-    object_map.post_sync();
-    geometry_map.post_sync();
-    particle_system_map.post_sync();
-    procedural_map.post_sync();
-  }
-
-  if (motion)
-    geometry_motion_synced.clear();
-}
-
-void BlenderSync::sync_motion(BL::RenderSettings &b_render,
-                              BL::Depsgraph &b_depsgraph,
-                              BL::SpaceView3D &b_v3d,
-                              BL::Object &b_override,
-                              int width,
-                              int height,
-                              void **python_thread_state)
-{
-  if (scene->need_motion() == Scene::MOTION_NONE)
-    return;
-
-  /* get camera object here to deal with camera switch */
-  BL::Object b_cam = b_scene.camera();
-  if (b_override)
-    b_cam = b_override;
-
-  int frame_center = b_scene.frame_current();
-  float subframe_center = b_scene.frame_subframe();
-  float frame_center_delta = 0.0f;
-
-  if (scene->need_motion() != Scene::MOTION_PASS &&
-      scene->camera->get_motion_position() != Camera::MOTION_POSITION_CENTER) {
-    float shuttertime = scene->camera->get_shuttertime();
-    if (scene->camera->get_motion_position() == Camera::MOTION_POSITION_END) {
-      frame_center_delta = -shuttertime * 0.5f;
-    }
-    else {
-      assert(scene->camera->get_motion_position() == Camera::MOTION_POSITION_START);
-      frame_center_delta = shuttertime * 0.5f;
-    }
-
-    float time = frame_center + subframe_center + frame_center_delta;
-    int frame = (int)floorf(time);
-    float subframe = time - frame;
-    python_thread_state_restore(python_thread_state);
-    b_engine.frame_set(frame, subframe);
-    python_thread_state_save(python_thread_state);
-    if (b_cam) {
-      sync_camera_motion(b_render, b_cam, width, height, 0.0f);
-    }
-    sync_objects(b_depsgraph, b_v3d);
-  }
-
-  /* Insert motion times from camera. Motion times from other objects
-   * have already been added in a sync_objects call. */
-  if (b_cam) {
-    uint camera_motion_steps = object_motion_steps(b_cam, b_cam);
-    for (size_t step = 0; step < camera_motion_steps; step++) {
-      motion_times.insert(scene->camera->motion_time(step));
-    }
-  }
-
-  /* Check which geometry already has motion blur so it can be skipped. */
-  geometry_motion_attribute_synced.clear();
-  for (Geometry *geom : scene->geometry) {
-    if (geom->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
-      geometry_motion_attribute_synced.insert(geom);
-    }
-  }
-
-  /* note iteration over motion_times set happens in sorted order */
-  foreach (float relative_time, motion_times) {
-    /* center time is already handled. */
-    if (relative_time == 0.0f) {
-      continue;
-    }
-
-    VLOG(1) << "Synchronizing motion for the relative time " << relative_time << ".";
-
-    /* fixed shutter time to get previous and next frame for motion pass */
-    float shuttertime = scene->motion_shutter_time();
-
-    /* compute frame and subframe time */
-    float time = frame_center + subframe_center + frame_center_delta +
-                 relative_time * shuttertime * 0.5f;
-    int frame = (int)floorf(time);
-    float subframe = time - frame;
-
-    /* change frame */
-    python_thread_state_restore(python_thread_state);
-    b_engine.frame_set(frame, subframe);
-    python_thread_state_save(python_thread_state);
-
-    /* Syncs camera motion if relative_time is one of the camera's motion times. */
-    sync_camera_motion(b_render, b_cam, width, height, relative_time);
-
-    /* sync object */
-    sync_objects(b_depsgraph, b_v3d, relative_time);
-  }
-
-  geometry_motion_attribute_synced.clear();
-
-  /* we need to set the python thread state again because this
-   * function assumes it is being executed from python and will
-   * try to save the thread state */
-  python_thread_state_restore(python_thread_state);
-  b_engine.frame_set(frame_center, subframe_center);
-  python_thread_state_save(python_thread_state);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_object_cull.cpp b/intern/cycles/blender/blender_object_cull.cpp
deleted file mode 100644
index 34cceb5a6e4..00000000000
--- a/intern/cycles/blender/blender_object_cull.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstdlib>
-
-#include "scene/camera.h"
-
-#include "blender/blender_object_cull.h"
-#include "blender/blender_util.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene &b_scene)
-    : use_scene_camera_cull_(false),
-      use_camera_cull_(false),
-      camera_cull_margin_(0.0f),
-      use_scene_distance_cull_(false),
-      use_distance_cull_(false),
-      distance_cull_margin_(0.0f)
-{
-  if (b_scene.render().use_simplify()) {
-    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-    use_scene_camera_cull_ = scene->camera->get_camera_type() != CAMERA_PANORAMA &&
-                             !b_scene.render().use_multiview() &&
-                             get_boolean(cscene, "use_camera_cull");
-    use_scene_distance_cull_ = scene->camera->get_camera_type() != CAMERA_PANORAMA &&
-                               !b_scene.render().use_multiview() &&
-                               get_boolean(cscene, "use_distance_cull");
-
-    camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
-    distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
-
-    if (distance_cull_margin_ == 0.0f) {
-      use_scene_distance_cull_ = false;
-    }
-  }
-}
-
-void BlenderObjectCulling::init_object(Scene *scene, BL::Object &b_ob)
-{
-  if (!use_scene_camera_cull_ && !use_scene_distance_cull_) {
-    return;
-  }
-
-  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
-
-  use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
-  use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
-
-  if (use_camera_cull_ || use_distance_cull_) {
-    /* Need to have proper projection matrix. */
-    scene->camera->update(scene);
-  }
-}
-
-bool BlenderObjectCulling::test(Scene *scene, BL::Object &b_ob, Transform &tfm)
-{
-  if (!use_camera_cull_ && !use_distance_cull_) {
-    return false;
-  }
-
-  /* Compute world space bounding box corners. */
-  float3 bb[8];
-  BL::Array<float, 24> boundbox = b_ob.bound_box();
-  for (int i = 0; i < 8; ++i) {
-    float3 p = make_float3(boundbox[3 * i + 0], boundbox[3 * i + 1], boundbox[3 * i + 2]);
-    bb[i] = transform_point(&tfm, p);
-  }
-
-  bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
-  bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
-
-  return ((camera_culled && distance_culled) || (camera_culled && !use_distance_cull_) ||
-          (distance_culled && !use_camera_cull_));
-}
-
-/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
- * to reduce number of objects which are wrongly considered visible.
- */
-bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
-{
-  Camera *cam = scene->camera;
-  const ProjectionTransform &worldtondc = cam->worldtondc;
-  float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
-         bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
-  bool all_behind = true;
-  for (int i = 0; i < 8; ++i) {
-    float3 p = bb[i];
-    float4 b = make_float4(p.x, p.y, p.z, 1.0f);
-    float4 c = make_float4(
-        dot(worldtondc.x, b), dot(worldtondc.y, b), dot(worldtondc.z, b), dot(worldtondc.w, b));
-    p = float4_to_float3(c / c.w);
-    if (c.z < 0.0f) {
-      p.x = 1.0f - p.x;
-      p.y = 1.0f - p.y;
-    }
-    if (c.z >= -camera_cull_margin_) {
-      all_behind = false;
-    }
-    bb_min = min(bb_min, p);
-    bb_max = max(bb_max, p);
-  }
-  if (all_behind) {
-    return true;
-  }
-  return (bb_min.x >= 1.0f + camera_cull_margin_ || bb_min.y >= 1.0f + camera_cull_margin_ ||
-          bb_max.x <= -camera_cull_margin_ || bb_max.y <= -camera_cull_margin_);
-}
-
-bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
-{
-  float3 camera_position = transform_get_column(&scene->camera->get_matrix(), 3);
-  float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
-         bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
-
-  /* Find min & max points for x & y & z on bounding box */
-  for (int i = 0; i < 8; ++i) {
-    float3 p = bb[i];
-    bb_min = min(bb_min, p);
-    bb_max = max(bb_max, p);
-  }
-
-  float3 closest_point = max(min(bb_max, camera_position), bb_min);
-  return (len_squared(camera_position - closest_point) >
-          distance_cull_margin_ * distance_cull_margin_);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_object_cull.h b/intern/cycles/blender/blender_object_cull.h
deleted file mode 100644
index 0879db4f802..00000000000
--- a/intern/cycles/blender/blender_object_cull.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_OBJECT_CULL_H__
-#define __BLENDER_OBJECT_CULL_H__
-
-#include "blender/blender_sync.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Scene;
-
-class BlenderObjectCulling {
- public:
-  BlenderObjectCulling(Scene *scene, BL::Scene &b_scene);
-
-  void init_object(Scene *scene, BL::Object &b_ob);
-  bool test(Scene *scene, BL::Object &b_ob, Transform &tfm);
-
- private:
-  bool test_camera(Scene *scene, float3 bb[8]);
-  bool test_distance(Scene *scene, float3 bb[8]);
-
-  bool use_scene_camera_cull_;
-  bool use_camera_cull_;
-  float camera_cull_margin_;
-  bool use_scene_distance_cull_;
-  bool use_distance_cull_;
-  float distance_cull_margin_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_OBJECT_CULL_H__ */
diff --git a/intern/cycles/blender/blender_output_driver.cpp b/intern/cycles/blender/blender_output_driver.cpp
deleted file mode 100644
index 2f2844d4820..00000000000
--- a/intern/cycles/blender/blender_output_driver.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
-{
-}
-
-BlenderOutputDriver::~BlenderOutputDriver()
-{
-}
-
-bool BlenderOutputDriver::read_render_tile(const Tile &tile)
-{
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return false;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* layer will be missing if it was disabled in the UI */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return false;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass.
-   * TODO:copy only the required ones for better performance? */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
-  }
-
-  b_engine_.end_result(b_rr, false, false, false);
-
-  return true;
-}
-
-bool BlenderOutputDriver::update_render_tile(const Tile &tile)
-{
-  /* Use final write for preview renders, otherwise render result wouldn't be be updated
-   * quickly on Blender side. For all other cases we use the display driver. */
-  if (b_engine_.is_preview()) {
-    write_render_tile(tile);
-    return true;
-  }
-
-  /* Don't highlight full-frame tile. */
-  if (!(tile.size == tile.full_size)) {
-    b_engine_.tile_highlight_clear_all();
-    b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
-  }
-
-  return false;
-}
-
-void BlenderOutputDriver::write_render_tile(const Tile &tile)
-{
-  b_engine_.tile_highlight_clear_all();
-
-  /* Get render result. */
-  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
-                                                 tile.offset.y,
-                                                 tile.size.x,
-                                                 tile.size.y,
-                                                 tile.layer.c_str(),
-                                                 tile.view.c_str());
-
-  /* Can happen if the intersected rectangle gives 0 width or height. */
-  if (b_rr.ptr.data == NULL) {
-    return;
-  }
-
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-
-  /* Layer will be missing if it was disabled in the UI. */
-  if (b_single_rlay == b_rr.layers.end()) {
-    return;
-  }
-
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  vector<float> pixels(tile.size.x * tile.size.y * 4);
-
-  /* Copy each pass. */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
-      memset(&pixels[0], 0, pixels.size() * sizeof(float));
-    }
-
-    b_pass.rect(&pixels[0]);
-  }
-
-  b_engine_.end_result(b_rr, true, false, true);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_output_driver.h b/intern/cycles/blender/blender_output_driver.h
deleted file mode 100644
index 1d016f8bcb9..00000000000
--- a/intern/cycles/blender/blender_output_driver.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_blender_cpp.h"
-
-#include "session/output_driver.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderOutputDriver : public OutputDriver {
- public:
-  explicit BlenderOutputDriver(BL::RenderEngine &b_engine);
-  ~BlenderOutputDriver();
-
-  virtual void write_render_tile(const Tile &tile) override;
-  virtual bool update_render_tile(const Tile &tile) override;
-  virtual bool read_render_tile(const Tile &tile) override;
-
- protected:
-  BL::RenderEngine b_engine_;
-};
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp
deleted file mode 100644
index f654998af62..00000000000
--- a/intern/cycles/blender/blender_particles.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/particles.h"
-
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_foreach.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Utilities */
-
-bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
-                                      BL::DepsgraphObjectInstance &b_instance,
-                                      Object *object)
-{
-  /* Test if this dupli was generated from a particle system. */
-  BL::ParticleSystem b_psys = b_instance.particle_system();
-  if (!b_psys)
-    return false;
-
-  object->set_hide_on_missing_motion(true);
-
-  /* test if we need particle data */
-  if (!object->get_geometry()->need_attribute(scene, ATTR_STD_PARTICLE))
-    return false;
-
-  /* don't handle child particles yet */
-  BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id();
-
-  if (persistent_id[0] >= b_psys.particles.length())
-    return false;
-
-  /* find particle system */
-  ParticleSystemKey key(b_ob, persistent_id);
-  ParticleSystem *psys;
-
-  bool first_use = !particle_system_map.is_used(key);
-  bool need_update = particle_system_map.add_or_update(&psys, b_ob, b_instance.object(), key);
-
-  /* no update needed? */
-  if (!need_update && !object->get_geometry()->is_modified() &&
-      !scene->object_manager->need_update())
-    return true;
-
-  /* first time used in this sync loop? clear and tag update */
-  if (first_use) {
-    psys->particles.clear();
-    psys->tag_update(scene);
-  }
-
-  /* add particle */
-  BL::Particle b_pa = b_psys.particles[persistent_id[0]];
-  Particle pa;
-
-  pa.index = persistent_id[0];
-  pa.age = b_scene.frame_current_final() - b_pa.birth_time();
-  pa.lifetime = b_pa.lifetime();
-  pa.location = get_float3(b_pa.location());
-  pa.rotation = get_float4(b_pa.rotation());
-  pa.size = b_pa.size();
-  pa.velocity = get_float3(b_pa.velocity());
-  pa.angular_velocity = get_float3(b_pa.angular_velocity());
-
-  psys->particles.push_back_slow(pa);
-
-  object->set_particle_system(psys);
-  object->set_particle_index(psys->particles.size() - 1);
-
-  if (object->particle_index_is_modified())
-    scene->object_manager->tag_update(scene, ObjectManager::PARTICLE_MODIFIED);
-
-  /* return that this object has particle data */
-  return true;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
deleted file mode 100644
index 45e5394cf34..00000000000
--- a/intern/cycles/blender/blender_python.cpp
+++ /dev/null
@@ -1,1063 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <Python.h>
-
-#include "blender/CCL_api.h"
-
-#include "blender/blender_device.h"
-#include "blender/blender_session.h"
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "session/denoising.h"
-#include "session/merge.h"
-
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_md5.h"
-#include "util/util_opengl.h"
-#include "util/util_openimagedenoise.h"
-#include "util/util_path.h"
-#include "util/util_string.h"
-#include "util/util_task.h"
-#include "util/util_tbb.h"
-#include "util/util_types.h"
-
-#ifdef WITH_OSL
-#  include "scene/osl.h"
-
-#  include <OSL/oslconfig.h>
-#  include <OSL/oslquery.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-namespace {
-
-/* Flag describing whether debug flags were synchronized from scene. */
-bool debug_flags_set = false;
-
-void *pylong_as_voidptr_typesafe(PyObject *object)
-{
-  if (object == Py_None)
-    return NULL;
-  return PyLong_AsVoidPtr(object);
-}
-
-PyObject *pyunicode_from_string(const char *str)
-{
-  /* Ignore errors if device API returns invalid UTF-8 strings. */
-  return PyUnicode_DecodeUTF8(str, strlen(str), "ignore");
-}
-
-/* Synchronize debug flags from a given Blender scene.
- * Return truth when device list needs invalidation.
- */
-static void debug_flags_sync_from_scene(BL::Scene b_scene)
-{
-  DebugFlagsRef flags = DebugFlags();
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  /* Synchronize shared flags. */
-  flags.viewport_static_bvh = get_enum(cscene, "debug_bvh_type");
-  /* Synchronize CPU flags. */
-  flags.cpu.avx2 = get_boolean(cscene, "debug_use_cpu_avx2");
-  flags.cpu.avx = get_boolean(cscene, "debug_use_cpu_avx");
-  flags.cpu.sse41 = get_boolean(cscene, "debug_use_cpu_sse41");
-  flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
-  flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
-  flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
-  /* Synchronize CUDA flags. */
-  flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
-  /* Synchronize OptiX flags. */
-  flags.optix.use_debug = get_boolean(cscene, "debug_use_optix_debug");
-}
-
-/* Reset debug flags to default values.
- * Return truth when device list needs invalidation.
- */
-static void debug_flags_reset()
-{
-  DebugFlagsRef flags = DebugFlags();
-  flags.reset();
-}
-
-} /* namespace */
-
-void python_thread_state_save(void **python_thread_state)
-{
-  *python_thread_state = (void *)PyEval_SaveThread();
-}
-
-void python_thread_state_restore(void **python_thread_state)
-{
-  PyEval_RestoreThread((PyThreadState *)*python_thread_state);
-  *python_thread_state = NULL;
-}
-
-static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
-{
-  const char *result = PyUnicode_AsUTF8(py_str);
-  if (result) {
-    /* 99% of the time this is enough but we better support non unicode
-     * chars since blender doesn't limit this.
-     */
-    return result;
-  }
-  else {
-    PyErr_Clear();
-    if (PyBytes_Check(py_str)) {
-      return PyBytes_AS_STRING(py_str);
-    }
-    else if ((*coerce = PyUnicode_EncodeFSDefault(py_str))) {
-      return PyBytes_AS_STRING(*coerce);
-    }
-    else {
-      /* Clear the error, so Cycles can be at least used without
-       * GPU and OSL support,
-       */
-      PyErr_Clear();
-      return "";
-    }
-  }
-}
-
-static PyObject *init_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *path, *user_path, *temp_path;
-  int headless;
-
-  if (!PyArg_ParseTuple(args, "OOOi", &path, &user_path, &temp_path, &headless)) {
-    return nullptr;
-  }
-
-  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr, *temp_path_coerce = nullptr;
-  path_init(PyC_UnicodeAsByte(path, &path_coerce),
-            PyC_UnicodeAsByte(user_path, &user_path_coerce),
-            PyC_UnicodeAsByte(temp_path, &temp_path_coerce));
-  Py_XDECREF(path_coerce);
-  Py_XDECREF(user_path_coerce);
-  Py_XDECREF(temp_path_coerce);
-
-  BlenderSession::headless = headless;
-
-  DebugFlags().running_inside_blender = true;
-
-  VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
-{
-  ShaderManager::free_memory();
-  TaskScheduler::free_memory();
-  Device::free_memory();
-  Py_RETURN_NONE;
-}
-
-static PyObject *create_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pyengine, *pypreferences, *pydata, *pyscreen, *pyregion, *pyv3d, *pyrv3d;
-  int preview_osl;
-
-  if (!PyArg_ParseTuple(args,
-                        "OOOOOOOi",
-                        &pyengine,
-                        &pypreferences,
-                        &pydata,
-                        &pyscreen,
-                        &pyregion,
-                        &pyv3d,
-                        &pyrv3d,
-                        &preview_osl)) {
-    return NULL;
-  }
-
-  /* RNA */
-  ID *bScreen = (ID *)PyLong_AsVoidPtr(pyscreen);
-
-  PointerRNA engineptr;
-  RNA_pointer_create(NULL, &RNA_RenderEngine, (void *)PyLong_AsVoidPtr(pyengine), &engineptr);
-  BL::RenderEngine engine(engineptr);
-
-  PointerRNA preferencesptr;
-  RNA_pointer_create(
-      NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
-  BL::Preferences preferences(preferencesptr);
-
-  PointerRNA dataptr;
-  RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
-  BL::BlendData data(dataptr);
-
-  PointerRNA regionptr;
-  RNA_pointer_create(bScreen, &RNA_Region, pylong_as_voidptr_typesafe(pyregion), &regionptr);
-  BL::Region region(regionptr);
-
-  PointerRNA v3dptr;
-  RNA_pointer_create(bScreen, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d), &v3dptr);
-  BL::SpaceView3D v3d(v3dptr);
-
-  PointerRNA rv3dptr;
-  RNA_pointer_create(bScreen, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr);
-  BL::RegionView3D rv3d(rv3dptr);
-
-  /* create session */
-  BlenderSession *session;
-
-  if (rv3d) {
-    /* interactive viewport session */
-    int width = region.width();
-    int height = region.height();
-
-    session = new BlenderSession(engine, preferences, data, v3d, rv3d, width, height);
-  }
-  else {
-    /* offline session or preview render */
-    session = new BlenderSession(engine, preferences, data, preview_osl);
-  }
-
-  return PyLong_FromVoidPtr(session);
-}
-
-static PyObject *free_func(PyObject * /*self*/, PyObject *value)
-{
-  delete (BlenderSession *)PyLong_AsVoidPtr(value);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *render_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession, *pydepsgraph;
-
-  if (!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
-    return NULL;
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  PointerRNA depsgraphptr;
-  RNA_pointer_create(NULL, &RNA_Depsgraph, (ID *)PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
-  BL::Depsgraph b_depsgraph(depsgraphptr);
-
-  /* Allow Blender to execute other Python scripts. */
-  python_thread_state_save(&session->python_thread_state);
-
-  session->render(b_depsgraph);
-
-  python_thread_state_restore(&session->python_thread_state);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *render_frame_finish_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession;
-
-  if (!PyArg_ParseTuple(args, "O", &pysession)) {
-    return nullptr;
-  }
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  /* Allow Blender to execute other Python scripts. */
-  python_thread_state_save(&session->python_thread_state);
-
-  session->render_frame_finish();
-
-  python_thread_state_restore(&session->python_thread_state);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *py_session, *py_graph, *py_screen, *py_space_image;
-
-  if (!PyArg_ParseTuple(args, "OOOO", &py_session, &py_graph, &py_screen, &py_space_image)) {
-    return nullptr;
-  }
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(py_session);
-
-  ID *b_screen = (ID *)PyLong_AsVoidPtr(py_screen);
-
-  PointerRNA b_space_image_ptr;
-  RNA_pointer_create(b_screen,
-                     &RNA_SpaceImageEditor,
-                     pylong_as_voidptr_typesafe(py_space_image),
-                     &b_space_image_ptr);
-  BL::SpaceImageEditor b_space_image(b_space_image_ptr);
-
-  session->draw(b_space_image);
-
-  Py_RETURN_NONE;
-}
-
-/* pixel_array and result passed as pointers */
-static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession, *pydepsgraph, *pyobject;
-  const char *pass_type;
-  int pass_filter, width, height;
-
-  if (!PyArg_ParseTuple(args,
-                        "OOOsiii",
-                        &pysession,
-                        &pydepsgraph,
-                        &pyobject,
-                        &pass_type,
-                        &pass_filter,
-                        &width,
-                        &height))
-    return NULL;
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  PointerRNA depsgraphptr;
-  RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
-  BL::Depsgraph b_depsgraph(depsgraphptr);
-
-  PointerRNA objectptr;
-  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
-  BL::Object b_object(objectptr);
-
-  python_thread_state_save(&session->python_thread_state);
-
-  session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height);
-
-  python_thread_state_restore(&session->python_thread_state);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession, *pygraph, *pyv3d, *pyrv3d;
-
-  if (!PyArg_ParseTuple(args, "OOOO", &pysession, &pygraph, &pyv3d, &pyrv3d))
-    return NULL;
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  if (PyLong_AsVoidPtr(pyrv3d)) {
-    /* 3d view drawing */
-    int viewport[4];
-    glGetIntegerv(GL_VIEWPORT, viewport);
-
-    session->view_draw(viewport[2], viewport[3]);
-  }
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *reset_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession, *pydata, *pydepsgraph;
-
-  if (!PyArg_ParseTuple(args, "OOO", &pysession, &pydata, &pydepsgraph))
-    return NULL;
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  PointerRNA dataptr;
-  RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
-  BL::BlendData b_data(dataptr);
-
-  PointerRNA depsgraphptr;
-  RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
-  BL::Depsgraph b_depsgraph(depsgraphptr);
-
-  python_thread_state_save(&session->python_thread_state);
-
-  session->reset_session(b_data, b_depsgraph);
-
-  python_thread_state_restore(&session->python_thread_state);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *sync_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pysession, *pydepsgraph;
-
-  if (!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
-    return NULL;
-
-  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
-
-  PointerRNA depsgraphptr;
-  RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
-  BL::Depsgraph b_depsgraph(depsgraphptr);
-
-  python_thread_state_save(&session->python_thread_state);
-
-  session->synchronize(b_depsgraph);
-
-  python_thread_state_restore(&session->python_thread_state);
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *available_devices_func(PyObject * /*self*/, PyObject *args)
-{
-  const char *type_name;
-  if (!PyArg_ParseTuple(args, "s", &type_name)) {
-    return NULL;
-  }
-
-  DeviceType type = Device::type_from_string(type_name);
-  /* "NONE" is defined by the add-on, see: `CyclesPreferences.get_device_types`. */
-  if ((type == DEVICE_NONE) && (strcmp(type_name, "NONE") != 0)) {
-    PyErr_Format(PyExc_ValueError, "Device \"%s\" not known.", type_name);
-    return NULL;
-  }
-
-  uint mask = (type == DEVICE_NONE) ? DEVICE_MASK_ALL : DEVICE_MASK(type);
-  mask |= DEVICE_MASK_CPU;
-
-  vector<DeviceInfo> devices = Device::available_devices(mask);
-  PyObject *ret = PyTuple_New(devices.size());
-
-  for (size_t i = 0; i < devices.size(); i++) {
-    DeviceInfo &device = devices[i];
-    string type_name = Device::string_from_type(device.type);
-    PyObject *device_tuple = PyTuple_New(4);
-    PyTuple_SET_ITEM(device_tuple, 0, pyunicode_from_string(device.description.c_str()));
-    PyTuple_SET_ITEM(device_tuple, 1, pyunicode_from_string(type_name.c_str()));
-    PyTuple_SET_ITEM(device_tuple, 2, pyunicode_from_string(device.id.c_str()));
-    PyTuple_SET_ITEM(device_tuple, 3, PyBool_FromLong(device.has_peer_memory));
-    PyTuple_SET_ITEM(ret, i, device_tuple);
-  }
-
-  return ret;
-}
-
-#ifdef WITH_OSL
-
-static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pydata, *pynodegroup, *pynode;
-  const char *filepath = NULL;
-
-  if (!PyArg_ParseTuple(args, "OOOs", &pydata, &pynodegroup, &pynode, &filepath))
-    return NULL;
-
-  /* RNA */
-  PointerRNA dataptr;
-  RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
-  BL::BlendData b_data(dataptr);
-
-  PointerRNA nodeptr;
-  RNA_pointer_create((ID *)PyLong_AsVoidPtr(pynodegroup),
-                     &RNA_ShaderNodeScript,
-                     (void *)PyLong_AsVoidPtr(pynode),
-                     &nodeptr);
-  BL::ShaderNodeScript b_node(nodeptr);
-
-  /* update bytecode hash */
-  string bytecode = b_node.bytecode();
-
-  if (!bytecode.empty()) {
-    MD5Hash md5;
-    md5.append((const uint8_t *)bytecode.c_str(), bytecode.size());
-    b_node.bytecode_hash(md5.get_hex().c_str());
-  }
-  else
-    b_node.bytecode_hash("");
-
-  /* query from file path */
-  OSL::OSLQuery query;
-
-  if (!OSLShaderManager::osl_query(query, filepath))
-    Py_RETURN_FALSE;
-
-  /* add new sockets from parameters */
-  set<void *> used_sockets;
-
-  for (int i = 0; i < query.nparams(); i++) {
-    const OSL::OSLQuery::Parameter *param = query.getparam(i);
-
-    /* skip unsupported types */
-    if (param->varlenarray || param->isstruct || param->type.arraylen > 1)
-      continue;
-
-    /* Read metadata. */
-    bool is_bool_param = false;
-    ustring param_label = param->name;
-
-    for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
-      if (metadata.type == TypeDesc::STRING) {
-        if (metadata.name == "widget") {
-          /* Boolean socket. */
-          if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
-            is_bool_param = true;
-          }
-        }
-        else if (metadata.name == "label") {
-          /* Socket label. */
-          param_label = metadata.sdefault[0];
-        }
-      }
-    }
-    /* determine socket type */
-    string socket_type;
-    BL::NodeSocket::type_enum data_type = BL::NodeSocket::type_VALUE;
-    float4 default_float4 = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-    float default_float = 0.0f;
-    int default_int = 0;
-    string default_string = "";
-    bool default_boolean = false;
-
-    if (param->isclosure) {
-      socket_type = "NodeSocketShader";
-      data_type = BL::NodeSocket::type_SHADER;
-    }
-    else if (param->type.vecsemantics == TypeDesc::COLOR) {
-      socket_type = "NodeSocketColor";
-      data_type = BL::NodeSocket::type_RGBA;
-
-      if (param->validdefault) {
-        default_float4[0] = param->fdefault[0];
-        default_float4[1] = param->fdefault[1];
-        default_float4[2] = param->fdefault[2];
-      }
-    }
-    else if (param->type.vecsemantics == TypeDesc::POINT ||
-             param->type.vecsemantics == TypeDesc::VECTOR ||
-             param->type.vecsemantics == TypeDesc::NORMAL) {
-      socket_type = "NodeSocketVector";
-      data_type = BL::NodeSocket::type_VECTOR;
-
-      if (param->validdefault) {
-        default_float4[0] = param->fdefault[0];
-        default_float4[1] = param->fdefault[1];
-        default_float4[2] = param->fdefault[2];
-      }
-    }
-    else if (param->type.aggregate == TypeDesc::SCALAR) {
-      if (param->type.basetype == TypeDesc::INT) {
-        if (is_bool_param) {
-          socket_type = "NodeSocketBool";
-          data_type = BL::NodeSocket::type_BOOLEAN;
-          if (param->validdefault) {
-            default_boolean = (bool)param->idefault[0];
-          }
-        }
-        else {
-          socket_type = "NodeSocketInt";
-          data_type = BL::NodeSocket::type_INT;
-          if (param->validdefault)
-            default_int = param->idefault[0];
-        }
-      }
-      else if (param->type.basetype == TypeDesc::FLOAT) {
-        socket_type = "NodeSocketFloat";
-        data_type = BL::NodeSocket::type_VALUE;
-        if (param->validdefault)
-          default_float = param->fdefault[0];
-      }
-      else if (param->type.basetype == TypeDesc::STRING) {
-        socket_type = "NodeSocketString";
-        data_type = BL::NodeSocket::type_STRING;
-        if (param->validdefault)
-          default_string = param->sdefault[0].string();
-      }
-      else
-        continue;
-    }
-    else
-      continue;
-
-    /* Update existing socket. */
-    bool found_existing = false;
-    if (param->isoutput) {
-      for (BL::NodeSocket &b_sock : b_node.outputs) {
-        if (b_sock.identifier() == param->name) {
-          if (b_sock.bl_idname() != socket_type) {
-            /* Remove if type no longer matches. */
-            b_node.outputs.remove(b_data, b_sock);
-          }
-          else {
-            /* Reuse and update label. */
-            if (b_sock.name() != param_label) {
-              b_sock.name(param_label.string());
-            }
-            used_sockets.insert(b_sock.ptr.data);
-            found_existing = true;
-          }
-          break;
-        }
-      }
-    }
-    else {
-      for (BL::NodeSocket &b_sock : b_node.inputs) {
-        if (b_sock.identifier() == param->name) {
-          if (b_sock.bl_idname() != socket_type) {
-            /* Remove if type no longer matches. */
-            b_node.inputs.remove(b_data, b_sock);
-          }
-          else {
-            /* Reuse and update label. */
-            if (b_sock.name() != param_label) {
-              b_sock.name(param_label.string());
-            }
-            used_sockets.insert(b_sock.ptr.data);
-            found_existing = true;
-          }
-          break;
-        }
-      }
-    }
-
-    if (!found_existing) {
-      /* Create new socket. */
-      BL::NodeSocket b_sock = (param->isoutput) ? b_node.outputs.create(b_data,
-                                                                        socket_type.c_str(),
-                                                                        param_label.c_str(),
-                                                                        param->name.c_str()) :
-                                                  b_node.inputs.create(b_data,
-                                                                       socket_type.c_str(),
-                                                                       param_label.c_str(),
-                                                                       param->name.c_str());
-
-      /* set default value */
-      if (data_type == BL::NodeSocket::type_VALUE) {
-        set_float(b_sock.ptr, "default_value", default_float);
-      }
-      else if (data_type == BL::NodeSocket::type_INT) {
-        set_int(b_sock.ptr, "default_value", default_int);
-      }
-      else if (data_type == BL::NodeSocket::type_RGBA) {
-        set_float4(b_sock.ptr, "default_value", default_float4);
-      }
-      else if (data_type == BL::NodeSocket::type_VECTOR) {
-        set_float3(b_sock.ptr, "default_value", float4_to_float3(default_float4));
-      }
-      else if (data_type == BL::NodeSocket::type_STRING) {
-        set_string(b_sock.ptr, "default_value", default_string);
-      }
-      else if (data_type == BL::NodeSocket::type_BOOLEAN) {
-        set_boolean(b_sock.ptr, "default_value", default_boolean);
-      }
-
-      used_sockets.insert(b_sock.ptr.data);
-    }
-  }
-
-  /* remove unused parameters */
-  bool removed;
-
-  do {
-    removed = false;
-
-    for (BL::NodeSocket &b_input : b_node.inputs) {
-      if (used_sockets.find(b_input.ptr.data) == used_sockets.end()) {
-        b_node.inputs.remove(b_data, b_input);
-        removed = true;
-        break;
-      }
-    }
-
-    for (BL::NodeSocket &b_output : b_node.outputs) {
-      if (used_sockets.find(b_output.ptr.data) == used_sockets.end()) {
-        b_node.outputs.remove(b_data, b_output);
-        removed = true;
-        break;
-      }
-    }
-  } while (removed);
-
-  Py_RETURN_TRUE;
-}
-
-static PyObject *osl_compile_func(PyObject * /*self*/, PyObject *args)
-{
-  const char *inputfile = NULL, *outputfile = NULL;
-
-  if (!PyArg_ParseTuple(args, "ss", &inputfile, &outputfile))
-    return NULL;
-
-  /* return */
-  if (!OSLShaderManager::osl_compile(inputfile, outputfile))
-    Py_RETURN_FALSE;
-
-  Py_RETURN_TRUE;
-}
-#endif
-
-static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/)
-{
-  string system_info = Device::device_capabilities();
-  return pyunicode_from_string(system_info.c_str());
-}
-
-static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepaths)
-{
-  if (PyUnicode_Check(pyfilepaths)) {
-    const char *filepath = PyUnicode_AsUTF8(pyfilepaths);
-    filepaths.push_back(filepath);
-    return true;
-  }
-
-  PyObject *sequence = PySequence_Fast(pyfilepaths,
-                                       "File paths must be a string or sequence of strings");
-  if (sequence == NULL) {
-    return false;
-  }
-
-  for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
-    PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
-    const char *filepath = PyUnicode_AsUTF8(item);
-    if (filepath == NULL) {
-      PyErr_SetString(PyExc_ValueError, "File paths must be a string or sequence of strings.");
-      Py_DECREF(sequence);
-      return false;
-    }
-    filepaths.push_back(filepath);
-  }
-  Py_DECREF(sequence);
-
-  return true;
-}
-
-static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
-{
-#if 1
-  (void)args;
-  (void)keywords;
-#else
-  static const char *keyword_list[] = {
-      "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
-  PyObject *pypreferences, *pyscene, *pyviewlayer;
-  PyObject *pyinput, *pyoutput = NULL;
-  int tile_size = 0, samples = 0;
-
-  if (!PyArg_ParseTupleAndKeywords(args,
-                                   keywords,
-                                   "OOOO|Oii",
-                                   (char **)keyword_list,
-                                   &pypreferences,
-                                   &pyscene,
-                                   &pyviewlayer,
-                                   &pyinput,
-                                   &pyoutput,
-                                   &tile_size,
-                                   &samples)) {
-    return NULL;
-  }
-
-  /* Get device specification from preferences and scene. */
-  PointerRNA preferencesptr;
-  RNA_pointer_create(
-      NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
-  BL::Preferences b_preferences(preferencesptr);
-
-  PointerRNA sceneptr;
-  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
-  BL::Scene b_scene(sceneptr);
-
-  DeviceInfo device = blender_device_info(b_preferences, b_scene, true);
-
-  /* Get denoising parameters from view layer. */
-  PointerRNA viewlayerptr;
-  RNA_pointer_create((ID *)PyLong_AsVoidPtr(pyscene),
-                     &RNA_ViewLayer,
-                     PyLong_AsVoidPtr(pyviewlayer),
-                     &viewlayerptr);
-  PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
-
-  DenoiseParams params;
-  params.radius = get_int(cviewlayer, "denoising_radius");
-  params.strength = get_float(cviewlayer, "denoising_strength");
-  params.feature_strength = get_float(cviewlayer, "denoising_feature_strength");
-  params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
-  params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");
-
-  /* Parse file paths list. */
-  vector<string> input, output;
-
-  if (!image_parse_filepaths(pyinput, input)) {
-    return NULL;
-  }
-
-  if (pyoutput) {
-    if (!image_parse_filepaths(pyoutput, output)) {
-      return NULL;
-    }
-  }
-  else {
-    output = input;
-  }
-
-  if (input.empty()) {
-    PyErr_SetString(PyExc_ValueError, "No input file paths specified.");
-    return NULL;
-  }
-  if (input.size() != output.size()) {
-    PyErr_SetString(PyExc_ValueError, "Number of input and output file paths does not match.");
-    return NULL;
-  }
-
-  /* Create denoiser. */
-  DenoiserPipeline denoiser(device);
-  denoiser.params = params;
-  denoiser.input = input;
-  denoiser.output = output;
-
-  if (tile_size > 0) {
-    denoiser.tile_size = make_int2(tile_size, tile_size);
-  }
-  if (samples > 0) {
-    denoiser.samples_override = samples;
-  }
-
-  /* Run denoiser. */
-  if (!denoiser.run()) {
-    PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
-    return NULL;
-  }
-#endif
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
-{
-  static const char *keyword_list[] = {"input", "output", NULL};
-  PyObject *pyinput, *pyoutput = NULL;
-
-  if (!PyArg_ParseTupleAndKeywords(
-          args, keywords, "OO", (char **)keyword_list, &pyinput, &pyoutput)) {
-    return NULL;
-  }
-
-  /* Parse input list. */
-  vector<string> input;
-  if (!image_parse_filepaths(pyinput, input)) {
-    return NULL;
-  }
-
-  /* Parse output string. */
-  if (!PyUnicode_Check(pyoutput)) {
-    PyErr_SetString(PyExc_ValueError, "Output must be a string.");
-    return NULL;
-  }
-  string output = PyUnicode_AsUTF8(pyoutput);
-
-  /* Merge. */
-  ImageMerger merger;
-  merger.input = input;
-  merger.output = output;
-
-  if (!merger.run()) {
-    PyErr_SetString(PyExc_ValueError, merger.error.c_str());
-    return NULL;
-  }
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
-{
-  PyObject *pyscene;
-  if (!PyArg_ParseTuple(args, "O", &pyscene)) {
-    return NULL;
-  }
-
-  PointerRNA sceneptr;
-  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
-  BL::Scene b_scene(sceneptr);
-
-  debug_flags_sync_from_scene(b_scene);
-
-  VLOG(2) << "Debug flags set to:\n" << DebugFlags();
-
-  debug_flags_set = true;
-
-  Py_RETURN_NONE;
-}
-
-static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/)
-{
-  debug_flags_reset();
-  if (debug_flags_set) {
-    VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
-    debug_flags_set = false;
-  }
-  Py_RETURN_NONE;
-}
-
-static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*/)
-{
-  BlenderSession::print_render_stats = true;
-  Py_RETURN_NONE;
-}
-
-static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
-{
-  vector<DeviceType> device_types = Device::available_types();
-  bool has_cuda = false, has_optix = false, has_hip = false;
-  foreach (DeviceType device_type, device_types) {
-    has_cuda |= (device_type == DEVICE_CUDA);
-    has_optix |= (device_type == DEVICE_OPTIX);
-    has_hip |= (device_type == DEVICE_HIP);
-  }
-  PyObject *list = PyTuple_New(3);
-  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
-  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
-  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
-  return list;
-}
-
-static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
-{
-  PyObject *override_string = PyObject_Str(arg);
-  string override = PyUnicode_AsUTF8(override_string);
-  Py_DECREF(override_string);
-
-  bool include_cpu = false;
-  const string cpu_suffix = "+CPU";
-  if (string_endswith(override, cpu_suffix)) {
-    include_cpu = true;
-    override = override.substr(0, override.length() - cpu_suffix.length());
-  }
-
-  if (override == "CPU") {
-    BlenderSession::device_override = DEVICE_MASK_CPU;
-  }
-  else if (override == "CUDA") {
-    BlenderSession::device_override = DEVICE_MASK_CUDA;
-  }
-  else if (override == "OPTIX") {
-    BlenderSession::device_override = DEVICE_MASK_OPTIX;
-  }
-  else if (override == "HIP") {
-    BlenderSession::device_override = DEVICE_MASK_HIP;
-  }
-  else {
-    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
-    Py_RETURN_FALSE;
-  }
-
-  if (include_cpu) {
-    BlenderSession::device_override = (DeviceTypeMask)(BlenderSession::device_override |
-                                                       DEVICE_MASK_CPU);
-  }
-
-  Py_RETURN_TRUE;
-}
-
-static PyMethodDef methods[] = {
-    {"init", init_func, METH_VARARGS, ""},
-    {"exit", exit_func, METH_VARARGS, ""},
-    {"create", create_func, METH_VARARGS, ""},
-    {"free", free_func, METH_O, ""},
-    {"render", render_func, METH_VARARGS, ""},
-    {"render_frame_finish", render_frame_finish_func, METH_VARARGS, ""},
-    {"draw", draw_func, METH_VARARGS, ""},
-    {"bake", bake_func, METH_VARARGS, ""},
-    {"view_draw", view_draw_func, METH_VARARGS, ""},
-    {"sync", sync_func, METH_VARARGS, ""},
-    {"reset", reset_func, METH_VARARGS, ""},
-#ifdef WITH_OSL
-    {"osl_update_node", osl_update_node_func, METH_VARARGS, ""},
-    {"osl_compile", osl_compile_func, METH_VARARGS, ""},
-#endif
-    {"available_devices", available_devices_func, METH_VARARGS, ""},
-    {"system_info", system_info_func, METH_NOARGS, ""},
-
-    /* Standalone denoising */
-    {"denoise", (PyCFunction)denoise_func, METH_VARARGS | METH_KEYWORDS, ""},
-    {"merge", (PyCFunction)merge_func, METH_VARARGS | METH_KEYWORDS, ""},
-
-    /* Debugging routines */
-    {"debug_flags_update", debug_flags_update_func, METH_VARARGS, ""},
-    {"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},
-
-    /* Statistics. */
-    {"enable_print_stats", enable_print_stats_func, METH_NOARGS, ""},
-
-    /* Compute Device selection */
-    {"get_device_types", get_device_types_func, METH_VARARGS, ""},
-    {"set_device_override", set_device_override_func, METH_O, ""},
-
-    {NULL, NULL, 0, NULL},
-};
-
-static struct PyModuleDef module = {
-    PyModuleDef_HEAD_INIT,
-    "_cycles",
-    "Blender cycles render integration",
-    -1,
-    methods,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-};
-
-CCL_NAMESPACE_END
-
-void *CCL_python_module_init()
-{
-  PyObject *mod = PyModule_Create(&ccl::module);
-
-#ifdef WITH_OSL
-  /* TODO(sergey): This gives us library we've been linking against.
-   *               In theory with dynamic OSL library it might not be
-   *               accurate, but there's nothing in OSL API which we
-   *               might use to get version in runtime.
-   */
-  int curversion = OSL_LIBRARY_VERSION_CODE;
-  PyModule_AddObject(mod, "with_osl", Py_True);
-  Py_INCREF(Py_True);
-  PyModule_AddObject(
-      mod,
-      "osl_version",
-      Py_BuildValue("(iii)", curversion / 10000, (curversion / 100) % 100, curversion % 100));
-  PyModule_AddObject(
-      mod,
-      "osl_version_string",
-      PyUnicode_FromFormat(
-          "%2d, %2d, %2d", curversion / 10000, (curversion / 100) % 100, curversion % 100));
-#else
-  PyModule_AddObject(mod, "with_osl", Py_False);
-  Py_INCREF(Py_False);
-  PyModule_AddStringConstant(mod, "osl_version", "unknown");
-  PyModule_AddStringConstant(mod, "osl_version_string", "unknown");
-#endif
-
-#ifdef WITH_EMBREE
-  PyModule_AddObject(mod, "with_embree", Py_True);
-  Py_INCREF(Py_True);
-#else  /* WITH_EMBREE */
-  PyModule_AddObject(mod, "with_embree", Py_False);
-  Py_INCREF(Py_False);
-#endif /* WITH_EMBREE */
-
-  if (ccl::openimagedenoise_supported()) {
-    PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
-    Py_INCREF(Py_True);
-  }
-  else {
-    PyModule_AddObject(mod, "with_openimagedenoise", Py_False);
-    Py_INCREF(Py_False);
-  }
-
-  return (void *)mod;
-}
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
deleted file mode 100644
index 988a8159864..00000000000
--- a/intern/cycles/blender/blender_session.cpp
+++ /dev/null
@@ -1,1003 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdlib.h>
-
-#include "device/device.h"
-#include "scene/background.h"
-#include "scene/camera.h"
-#include "scene/colorspace.h"
-#include "scene/film.h"
-#include "scene/integrator.h"
-#include "scene/light.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/scene.h"
-#include "scene/shader.h"
-#include "scene/stats.h"
-#include "session/buffers.h"
-#include "session/session.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_color.h"
-#include "util/util_foreach.h"
-#include "util/util_function.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_murmurhash.h"
-#include "util/util_path.h"
-#include "util/util_progress.h"
-#include "util/util_time.h"
-
-#include "blender/blender_display_driver.h"
-#include "blender/blender_output_driver.h"
-#include "blender/blender_session.h"
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-CCL_NAMESPACE_BEGIN
-
-DeviceTypeMask BlenderSession::device_override = DEVICE_MASK_ALL;
-bool BlenderSession::headless = false;
-bool BlenderSession::print_render_stats = false;
-
-BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
-                               BL::Preferences &b_userpref,
-                               BL::BlendData &b_data,
-                               bool preview_osl)
-    : session(NULL),
-      scene(NULL),
-      sync(NULL),
-      b_engine(b_engine),
-      b_userpref(b_userpref),
-      b_data(b_data),
-      b_render(b_engine.render()),
-      b_depsgraph(PointerRNA_NULL),
-      b_scene(PointerRNA_NULL),
-      b_v3d(PointerRNA_NULL),
-      b_rv3d(PointerRNA_NULL),
-      width(0),
-      height(0),
-      preview_osl(preview_osl),
-      python_thread_state(NULL),
-      use_developer_ui(false)
-{
-  /* offline render */
-  background = true;
-  last_redraw_time = 0.0;
-  start_resize_time = 0.0;
-  last_status_time = 0.0;
-}
-
-BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
-                               BL::Preferences &b_userpref,
-                               BL::BlendData &b_data,
-                               BL::SpaceView3D &b_v3d,
-                               BL::RegionView3D &b_rv3d,
-                               int width,
-                               int height)
-    : session(NULL),
-      scene(NULL),
-      sync(NULL),
-      b_engine(b_engine),
-      b_userpref(b_userpref),
-      b_data(b_data),
-      b_render(b_engine.render()),
-      b_depsgraph(PointerRNA_NULL),
-      b_scene(PointerRNA_NULL),
-      b_v3d(b_v3d),
-      b_rv3d(b_rv3d),
-      width(width),
-      height(height),
-      preview_osl(false),
-      python_thread_state(NULL),
-      use_developer_ui(b_userpref.experimental().use_cycles_debug() &&
-                       b_userpref.view().show_developer_ui())
-{
-  /* 3d view render */
-  background = false;
-  last_redraw_time = 0.0;
-  start_resize_time = 0.0;
-  last_status_time = 0.0;
-}
-
-BlenderSession::~BlenderSession()
-{
-  free_session();
-}
-
-void BlenderSession::create_session()
-{
-  const SessionParams session_params = BlenderSync::get_session_params(
-      b_engine, b_userpref, b_scene, background);
-  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
-  const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
-
-  /* reset status/progress */
-  last_status = "";
-  last_error = "";
-  last_progress = -1.0f;
-  start_resize_time = 0.0;
-
-  /* create session */
-  session = new Session(session_params, scene_params);
-  session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this));
-  session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this));
-  session->set_pause(session_pause);
-
-  /* create scene */
-  scene = session->scene;
-  scene->name = b_scene.name();
-
-  /* create sync */
-  sync = new BlenderSync(
-      b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
-  BL::Object b_camera_override(b_engine.camera_override());
-  if (b_v3d) {
-    sync->sync_view(b_v3d, b_rv3d, width, height);
-  }
-  else {
-    sync->sync_camera(b_render, b_camera_override, width, height, "");
-  }
-
-  /* set buffer parameters */
-  const BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_v3d, b_rv3d, scene->camera, width, height);
-  session->reset(session_params, buffer_params);
-
-  /* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
-   * engine that no tracking of the tiles state is needed.
-   * The offline rendering will make a decision when tile is being written. The penalty of asking
-   * the engine to keep track of tiles state is minimal, so there is nothing to worry about here
-   * about possible single-tiled final render. */
-  if (!b_engine.is_preview() && !b_v3d) {
-    b_engine.use_highlight_tiles(true);
-  }
-}
-
-void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
-{
-  /* Update data, scene and depsgraph pointers. These can change after undo. */
-  this->b_data = b_data;
-  this->b_depsgraph = b_depsgraph;
-  this->b_scene = b_depsgraph.scene_eval();
-  if (sync) {
-    sync->reset(this->b_data, this->b_scene);
-  }
-
-  if (preview_osl) {
-    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-    RNA_boolean_set(&cscene, "shading_system", preview_osl);
-  }
-
-  if (b_v3d) {
-    this->b_render = b_scene.render();
-  }
-  else {
-    this->b_render = b_engine.render();
-    width = render_resolution_x(b_render);
-    height = render_resolution_y(b_render);
-  }
-
-  bool is_new_session = (session == NULL);
-  if (is_new_session) {
-    /* Initialize session and remember it was just created so not to
-     * re-create it below.
-     */
-    create_session();
-  }
-
-  if (b_v3d) {
-    /* NOTE: We need to create session, but all the code from below
-     * will make viewport render to stuck on initialization.
-     */
-    return;
-  }
-
-  const SessionParams session_params = BlenderSync::get_session_params(
-      b_engine, b_userpref, b_scene, background);
-  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
-
-  if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
-      !this->b_render.use_persistent_data()) {
-    /* if scene or session parameters changed, it's easier to simply re-create
-     * them rather than trying to distinguish which settings need to be updated
-     */
-    if (!is_new_session) {
-      free_session();
-      create_session();
-    }
-    return;
-  }
-
-  session->progress.reset();
-
-  /* peak memory usage should show current render peak, not peak for all renders
-   * made by this render session
-   */
-  session->stats.mem_peak = session->stats.mem_used;
-
-  if (is_new_session) {
-    /* Sync object should be re-created for new scene. */
-    delete sync;
-    sync = new BlenderSync(
-        b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
-  }
-  else {
-    /* Sync recalculations to do just the required updates. */
-    sync->sync_recalc(b_depsgraph, b_v3d);
-  }
-
-  BL::Object b_camera_override(b_engine.camera_override());
-  sync->sync_camera(b_render, b_camera_override, width, height, "");
-
-  BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
-  BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
-  const BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
-  session->reset(session_params, buffer_params);
-
-  /* reset time */
-  start_resize_time = 0.0;
-
-  {
-    thread_scoped_lock lock(draw_state_.mutex);
-    draw_state_.last_pass_index = -1;
-  }
-}
-
-void BlenderSession::free_session()
-{
-  if (session) {
-    session->cancel(true);
-  }
-
-  delete sync;
-  sync = nullptr;
-
-  delete session;
-  session = nullptr;
-
-  display_driver_ = nullptr;
-}
-
-void BlenderSession::full_buffer_written(string_view filename)
-{
-  full_buffer_files_.emplace_back(filename);
-}
-
-static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
-{
-  string identifier = string_printf("%08x", util_murmur_hash3(name.c_str(), name.length(), 0));
-  string prefix = "cryptomatte/" + identifier.substr(0, 7) + "/";
-
-  render_add_metadata(b_rr, prefix + "name", name);
-  render_add_metadata(b_rr, prefix + "hash", "MurmurHash3_32");
-  render_add_metadata(b_rr, prefix + "conversion", "uint32_to_float32");
-  render_add_metadata(b_rr, prefix + "manifest", manifest);
-}
-
-void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_layer_name)
-{
-  BL::RenderResult b_rr = b_engine.get_result();
-  string prefix = "cycles." + view_layer_name + ".";
-
-  /* Configured number of samples for the view layer. */
-  b_rr.stamp_data_add_field((prefix + "samples").c_str(),
-                            to_string(session->params.samples).c_str());
-
-  /* Store ranged samples information. */
-  /* TODO(sergey): Need to bring this information back. */
-#if 0
-  if (session->tile_manager.range_num_samples != -1) {
-    b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
-                              to_string(session->tile_manager.range_start_sample).c_str());
-    b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
-                              to_string(session->tile_manager.range_num_samples).c_str());
-  }
-#endif
-
-  /* Write cryptomatte metadata. */
-  if (scene->film->get_cryptomatte_passes() & CRYPT_OBJECT) {
-    add_cryptomatte_layer(b_rr,
-                          view_layer_name + ".CryptoObject",
-                          scene->object_manager->get_cryptomatte_objects(scene));
-  }
-  if (scene->film->get_cryptomatte_passes() & CRYPT_MATERIAL) {
-    add_cryptomatte_layer(b_rr,
-                          view_layer_name + ".CryptoMaterial",
-                          scene->shader_manager->get_cryptomatte_materials(scene));
-  }
-  if (scene->film->get_cryptomatte_passes() & CRYPT_ASSET) {
-    add_cryptomatte_layer(b_rr,
-                          view_layer_name + ".CryptoAsset",
-                          scene->object_manager->get_cryptomatte_assets(scene));
-  }
-
-  /* Store synchronization and bare-render times. */
-  double total_time, render_time;
-  session->progress.get_time(total_time, render_time);
-  b_rr.stamp_data_add_field((prefix + "total_time").c_str(),
-                            time_human_readable_from_seconds(total_time).c_str());
-  b_rr.stamp_data_add_field((prefix + "render_time").c_str(),
-                            time_human_readable_from_seconds(render_time).c_str());
-  b_rr.stamp_data_add_field((prefix + "synchronization_time").c_str(),
-                            time_human_readable_from_seconds(total_time - render_time).c_str());
-}
-
-void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
-{
-  b_depsgraph = b_depsgraph_;
-
-  if (session->progress.get_cancel()) {
-    update_status_progress();
-    return;
-  }
-
-  /* Create driver to write out render results. */
-  ensure_display_driver_if_needed();
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
-
-  session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
-
-  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
-
-  /* get buffer parameters */
-  const SessionParams session_params = BlenderSync::get_session_params(
-      b_engine, b_userpref, b_scene, background);
-  BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_v3d, b_rv3d, scene->camera, width, height);
-
-  /* temporary render result to find needed passes and views */
-  BL::RenderResult b_rr = b_engine.begin_result(0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
-  BL::RenderResult::layers_iterator b_single_rlay;
-  b_rr.layers.begin(b_single_rlay);
-  BL::RenderLayer b_rlay = *b_single_rlay;
-
-  {
-    thread_scoped_lock lock(draw_state_.mutex);
-    b_rlay_name = b_view_layer.name();
-
-    /* Signal that the display pass is to be updated. */
-    draw_state_.last_pass_index = -1;
-  }
-
-  /* Compute render passes and film settings. */
-  sync->sync_render_passes(b_rlay, b_view_layer);
-
-  BL::RenderResult::views_iterator b_view_iter;
-
-  int num_views = 0;
-  for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter) {
-    num_views++;
-  }
-
-  int view_index = 0;
-  for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end();
-       ++b_view_iter, ++view_index) {
-    b_rview_name = b_view_iter->name();
-
-    buffer_params.layer = b_view_layer.name();
-    buffer_params.view = b_rview_name;
-
-    /* set the current view */
-    b_engine.active_view_set(b_rview_name.c_str());
-
-    /* update scene */
-    BL::Object b_camera_override(b_engine.camera_override());
-    sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
-    sync->sync_data(
-        b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
-    builtin_images_load();
-
-    /* Attempt to free all data which is held by Blender side, since at this
-     * point we know that we've got everything to render current view layer.
-     */
-    /* At the moment we only free if we are not doing multi-view
-     * (or if we are rendering the last view). See T58142/D4239 for discussion.
-     */
-    if (view_index == num_views - 1) {
-      free_blender_memory_if_possible();
-    }
-
-    /* Make sure all views have different noise patterns. - hardcoded value just to make it random
-     */
-    if (view_index != 0) {
-      int seed = scene->integrator->get_seed();
-      seed += hash_uint2(seed, hash_uint2(view_index * 0xdeadbeef, 0));
-      scene->integrator->set_seed(seed);
-    }
-
-    /* Update number of samples per layer. */
-    const int samples = sync->get_layer_samples();
-    const bool bound_samples = sync->get_layer_bound_samples();
-
-    SessionParams effective_session_params = session_params;
-    if (samples != 0 && (!bound_samples || (samples < session_params.samples))) {
-      effective_session_params.samples = samples;
-    }
-
-    /* Update session itself. */
-    session->reset(effective_session_params, buffer_params);
-
-    /* render */
-    if (!b_engine.is_preview() && background && print_render_stats) {
-      scene->enable_update_stats();
-    }
-
-    session->start();
-    session->wait();
-
-    if (!b_engine.is_preview() && background && print_render_stats) {
-      RenderStats stats;
-      session->collect_statistics(&stats);
-      printf("Render statistics:\n%s\n", stats.full_report().c_str());
-    }
-
-    if (session->progress.get_cancel())
-      break;
-  }
-
-  /* add metadata */
-  stamp_view_layer_metadata(scene, b_rlay_name);
-
-  /* free result without merging */
-  b_engine.end_result(b_rr, true, false, false);
-
-  /* When tiled rendering is used there will be no "write" done for the tile. Forcefully clear
-   * highlighted tiles now, so that the highlight will be removed while processing full frame from
-   * file. */
-  b_engine.tile_highlight_clear_all();
-
-  double total_time, render_time;
-  session->progress.get_time(total_time, render_time);
-  VLOG(1) << "Total render time: " << total_time;
-  VLOG(1) << "Render time (without synchronization): " << render_time;
-}
-
-void BlenderSession::render_frame_finish()
-{
-  /* Processing of all layers and views is done. Clear the strings so that we can communicate
-   * progress about reading files and denoising them. */
-  b_rlay_name = "";
-  b_rview_name = "";
-
-  if (!b_render.use_persistent_data()) {
-    /* Free the sync object so that it can properly dereference nodes from the scene graph before
-     * the graph is freed. */
-    delete sync;
-    sync = nullptr;
-
-    session->device_free();
-  }
-
-  for (string_view filename : full_buffer_files_) {
-    session->process_full_buffer_from_disk(filename);
-    if (check_and_report_session_error()) {
-      break;
-    }
-  }
-
-  for (string_view filename : full_buffer_files_) {
-    path_remove(filename);
-  }
-
-  /* Clear driver. */
-  session->set_output_driver(nullptr);
-  session->full_buffer_written_cb = function_null;
-
-  /* All the files are handled.
-   * Clear the list so that this session can be re-used by Persistent Data. */
-  full_buffer_files_.clear();
-}
-
-static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
-{
-  const char *bake_type = bake_type_str.c_str();
-
-  /* data passes */
-  if (strcmp(bake_type, "POSITION") == 0) {
-    return PASS_POSITION;
-  }
-  else if (strcmp(bake_type, "NORMAL") == 0) {
-    return PASS_NORMAL;
-  }
-  else if (strcmp(bake_type, "UV") == 0) {
-    return PASS_UV;
-  }
-  else if (strcmp(bake_type, "ROUGHNESS") == 0) {
-    return PASS_ROUGHNESS;
-  }
-  else if (strcmp(bake_type, "EMIT") == 0) {
-    return PASS_EMISSION;
-  }
-  /* light passes */
-  else if (strcmp(bake_type, "AO") == 0) {
-    return PASS_AO;
-  }
-  else if (strcmp(bake_type, "COMBINED") == 0) {
-    return PASS_COMBINED;
-  }
-  else if (strcmp(bake_type, "SHADOW") == 0) {
-    return PASS_SHADOW;
-  }
-  else if (strcmp(bake_type, "DIFFUSE") == 0) {
-    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
-        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_DIFFUSE;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
-      return PASS_DIFFUSE_DIRECT;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_DIFFUSE_INDIRECT;
-    }
-    else {
-      return PASS_DIFFUSE_COLOR;
-    }
-  }
-  else if (strcmp(bake_type, "GLOSSY") == 0) {
-    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
-        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_GLOSSY;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
-      return PASS_GLOSSY_DIRECT;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_GLOSSY_INDIRECT;
-    }
-    else {
-      return PASS_GLOSSY_COLOR;
-    }
-  }
-  else if (strcmp(bake_type, "TRANSMISSION") == 0) {
-    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
-        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_TRANSMISSION;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
-      return PASS_TRANSMISSION_DIRECT;
-    }
-    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
-      return PASS_TRANSMISSION_INDIRECT;
-    }
-    else {
-      return PASS_TRANSMISSION_COLOR;
-    }
-  }
-  /* extra */
-  else if (strcmp(bake_type, "ENVIRONMENT") == 0) {
-    return PASS_BACKGROUND;
-  }
-
-  return PASS_COMBINED;
-}
-
-void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
-                          BL::Object &b_object,
-                          const string &bake_type,
-                          const int bake_filter,
-                          const int bake_width,
-                          const int bake_height)
-{
-  b_depsgraph = b_depsgraph_;
-
-  /* Initialize bake manager, before we load the baking kernels. */
-  scene->bake_manager->set(scene, b_object.name());
-
-  /* Add render pass that we want to bake, and name it Combined so that it is
-   * used as that on the Blender side. */
-  Pass *pass = scene->create_node<Pass>();
-  pass->set_name(ustring("Combined"));
-  pass->set_type(bake_type_to_pass(bake_type, bake_filter));
-  pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
-
-  session->set_display_driver(nullptr);
-  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
-
-  if (!session->progress.get_cancel()) {
-    /* Sync scene. */
-    BL::Object b_camera_override(b_engine.camera_override());
-    sync->sync_camera(b_render, b_camera_override, width, height, "");
-    sync->sync_data(
-        b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
-    builtin_images_load();
-  }
-
-  /* Object might have been disabled for rendering or excluded in some
-   * other way, in that case Blender will report a warning afterwards. */
-  bool object_found = false;
-  foreach (Object *ob, scene->objects) {
-    if (ob->name == b_object.name()) {
-      object_found = true;
-      break;
-    }
-  }
-
-  if (object_found && !session->progress.get_cancel()) {
-    /* Get session and buffer parameters. */
-    const SessionParams session_params = BlenderSync::get_session_params(
-        b_engine, b_userpref, b_scene, background);
-
-    BufferParams buffer_params;
-    buffer_params.width = bake_width;
-    buffer_params.height = bake_height;
-
-    /* Update session. */
-    session->reset(session_params, buffer_params);
-
-    session->progress.set_update_callback(
-        function_bind(&BlenderSession::update_bake_progress, this));
-  }
-
-  /* Perform bake. Check cancel to avoid crash with incomplete scene data. */
-  if (object_found && !session->progress.get_cancel()) {
-    session->start();
-    session->wait();
-  }
-
-  session->set_output_driver(nullptr);
-}
-
-void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
-{
-  /* only used for viewport render */
-  if (!b_v3d)
-    return;
-
-  /* on session/scene parameter changes, we recreate session entirely */
-  const SessionParams session_params = BlenderSync::get_session_params(
-      b_engine, b_userpref, b_scene, background);
-  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
-  const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
-
-  if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
-    free_session();
-    create_session();
-  }
-
-  ensure_display_driver_if_needed();
-
-  /* increase samples and render time, but never decrease */
-  session->set_samples(session_params.samples);
-  session->set_time_limit(session_params.time_limit);
-  session->set_pause(session_pause);
-
-  /* copy recalc flags, outside of mutex so we can decide to do the real
-   * synchronization at a later time to not block on running updates */
-  sync->sync_recalc(b_depsgraph_, b_v3d);
-
-  /* don't do synchronization if on pause */
-  if (session_pause) {
-    tag_update();
-    return;
-  }
-
-  /* try to acquire mutex. if we don't want to or can't, come back later */
-  if (!session->ready_to_reset() || !session->scene->mutex.try_lock()) {
-    tag_update();
-    return;
-  }
-
-  /* data and camera synchronize */
-  b_depsgraph = b_depsgraph_;
-
-  BL::Object b_camera_override(b_engine.camera_override());
-  sync->sync_data(
-      b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
-
-  if (b_rv3d)
-    sync->sync_view(b_v3d, b_rv3d, width, height);
-  else
-    sync->sync_camera(b_render, b_camera_override, width, height, "");
-
-  /* get buffer parameters */
-  const BufferParams buffer_params = BlenderSync::get_buffer_params(
-      b_v3d, b_rv3d, scene->camera, width, height);
-
-  /* reset if needed */
-  if (scene->need_reset()) {
-    session->reset(session_params, buffer_params);
-
-    /* After session reset, so device is not accessing image data anymore. */
-    builtin_images_load();
-
-    /* reset time */
-    start_resize_time = 0.0;
-  }
-
-  /* unlock */
-  session->scene->mutex.unlock();
-
-  /* Start rendering thread, if it's not running already. Do this
-   * after all scene data has been synced at least once. */
-  session->start();
-}
-
-void BlenderSession::draw(BL::SpaceImageEditor &space_image)
-{
-  if (!session || !session->scene) {
-    /* Offline render drawing does not force the render engine update, which means it's possible
-     * that the Session is not created yet. */
-    return;
-  }
-
-  thread_scoped_lock lock(draw_state_.mutex);
-
-  const int pass_index = space_image.image_user().multilayer_pass();
-  if (pass_index != draw_state_.last_pass_index) {
-    BL::RenderPass b_display_pass(b_engine.pass_by_index_get(b_rlay_name.c_str(), pass_index));
-    if (!b_display_pass) {
-      return;
-    }
-
-    Scene *scene = session->scene;
-
-    thread_scoped_lock lock(scene->mutex);
-
-    const Pass *pass = Pass::find(scene->passes, b_display_pass.name());
-    if (!pass) {
-      return;
-    }
-
-    scene->film->set_display_pass(pass->get_type());
-
-    draw_state_.last_pass_index = pass_index;
-  }
-
-  if (display_driver_) {
-    BL::Array<float, 2> zoom = space_image.zoom();
-    display_driver_->set_zoom(zoom[0], zoom[1]);
-  }
-
-  session->draw();
-}
-
-void BlenderSession::view_draw(int w, int h)
-{
-  /* pause in redraw in case update is not being called due to final render */
-  session->set_pause(BlenderSync::get_session_pause(b_scene, background));
-
-  /* before drawing, we verify camera and viewport size changes, because
-   * we do not get update callbacks for those, we must detect them here */
-  if (session->ready_to_reset()) {
-    bool reset = false;
-
-    /* if dimensions changed, reset */
-    if (width != w || height != h) {
-      if (start_resize_time == 0.0) {
-        /* don't react immediately to resizes to avoid flickery resizing
-         * of the viewport, and some window managers changing the window
-         * size temporarily on unminimize */
-        start_resize_time = time_dt();
-        tag_redraw();
-      }
-      else if (time_dt() - start_resize_time < 0.2) {
-        tag_redraw();
-      }
-      else {
-        width = w;
-        height = h;
-        reset = true;
-      }
-    }
-
-    /* try to acquire mutex. if we can't, come back later */
-    if (!session->scene->mutex.try_lock()) {
-      tag_update();
-    }
-    else {
-      /* update camera from 3d view */
-
-      sync->sync_view(b_v3d, b_rv3d, width, height);
-
-      if (scene->camera->is_modified())
-        reset = true;
-
-      session->scene->mutex.unlock();
-    }
-
-    /* reset if requested */
-    if (reset) {
-      const SessionParams session_params = BlenderSync::get_session_params(
-          b_engine, b_userpref, b_scene, background);
-      const BufferParams buffer_params = BlenderSync::get_buffer_params(
-          b_v3d, b_rv3d, scene->camera, width, height);
-      const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
-
-      if (session_pause == false) {
-        session->reset(session_params, buffer_params);
-        start_resize_time = 0.0;
-      }
-    }
-  }
-  else {
-    tag_update();
-  }
-
-  /* update status and progress for 3d view draw */
-  update_status_progress();
-
-  /* draw */
-  session->draw();
-}
-
-void BlenderSession::get_status(string &status, string &substatus)
-{
-  session->progress.get_status(status, substatus);
-}
-
-void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
-{
-  session->progress.get_time(total_time, render_time);
-  progress = session->progress.get_progress();
-}
-
-void BlenderSession::update_bake_progress()
-{
-  float progress = session->progress.get_progress();
-
-  if (progress != last_progress) {
-    b_engine.update_progress(progress);
-    last_progress = progress;
-  }
-}
-
-void BlenderSession::update_status_progress()
-{
-  string timestatus, status, substatus;
-  string scene_status = "";
-  float progress;
-  double total_time, remaining_time = 0, render_time;
-  float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
-  float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
-
-  get_status(status, substatus);
-  get_progress(progress, total_time, render_time);
-
-  if (progress > 0) {
-    remaining_time = session->get_estimated_remaining_time();
-  }
-
-  if (background) {
-    if (scene)
-      scene_status += " | " + scene->name;
-    if (b_rlay_name != "")
-      scene_status += ", " + b_rlay_name;
-
-    if (b_rview_name != "")
-      scene_status += ", " + b_rview_name;
-
-    if (remaining_time > 0) {
-      timestatus += "Remaining:" + time_human_readable_from_seconds(remaining_time) + " | ";
-    }
-
-    timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", (double)mem_used, (double)mem_peak);
-
-    if (status.size() > 0)
-      status = " | " + status;
-    if (substatus.size() > 0)
-      status += " | " + substatus;
-  }
-
-  double current_time = time_dt();
-  /* When rendering in a window, redraw the status at least once per second to keep the elapsed
-   * and remaining time up-to-date. For headless rendering, only report when something
-   * significant changes to keep the console output readable. */
-  if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
-    b_engine.update_stats("", (timestatus + scene_status + status).c_str());
-    b_engine.update_memory_stats(mem_used, mem_peak);
-    last_status = status;
-    last_status_time = current_time;
-  }
-  if (progress != last_progress) {
-    b_engine.update_progress(progress);
-    last_progress = progress;
-  }
-
-  check_and_report_session_error();
-}
-
-bool BlenderSession::check_and_report_session_error()
-{
-  if (!session->progress.get_error()) {
-    return false;
-  }
-
-  const string error = session->progress.get_error_message();
-  if (error != last_error) {
-    /* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
-     * Would be nice to have this figured out.
-     *
-     * For until then, 1 << 5 means RPT_ERROR. */
-    b_engine.report(1 << 5, error.c_str());
-    b_engine.error_set(error.c_str());
-    last_error = error;
-  }
-
-  return true;
-}
-
-void BlenderSession::tag_update()
-{
-  /* tell blender that we want to get another update callback */
-  b_engine.tag_update();
-}
-
-void BlenderSession::tag_redraw()
-{
-  if (background) {
-    /* update stats and progress, only for background here because
-     * in 3d view we do it in draw for thread safety reasons */
-    update_status_progress();
-
-    /* offline render, redraw if timeout passed */
-    if (time_dt() - last_redraw_time > 1.0) {
-      b_engine.tag_redraw();
-      last_redraw_time = time_dt();
-    }
-  }
-  else {
-    /* tell blender that we want to redraw */
-    b_engine.tag_redraw();
-  }
-}
-
-void BlenderSession::test_cancel()
-{
-  /* test if we need to cancel rendering */
-  if (background)
-    if (b_engine.test_break())
-      session->progress.set_cancel("Cancelled");
-}
-
-void BlenderSession::free_blender_memory_if_possible()
-{
-  if (!background) {
-    /* During interactive render we can not free anything: attempts to save
-     * memory would cause things to be allocated and evaluated for every
-     * updated sample.
-     */
-    return;
-  }
-  b_engine.free_blender_memory();
-}
-
-void BlenderSession::ensure_display_driver_if_needed()
-{
-  if (display_driver_) {
-    /* Driver is already created. */
-    return;
-  }
-
-  if (headless) {
-    /* No display needed for headless. */
-    return;
-  }
-
-  if (b_engine.is_preview()) {
-    /* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
-    return;
-  }
-
-  unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
-                                                                                      b_scene);
-  display_driver_ = display_driver.get();
-  session->set_display_driver(move(display_driver));
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
deleted file mode 100644
index 9bc685ec306..00000000000
--- a/intern/cycles/blender/blender_session.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_SESSION_H__
-#define __BLENDER_SESSION_H__
-
-#include "RNA_blender_cpp.h"
-
-#include "device/device.h"
-
-#include "scene/bake.h"
-#include "scene/scene.h"
-#include "session/session.h"
-
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderDisplayDriver;
-class BlenderSync;
-class ImageMetaData;
-class Scene;
-class Session;
-
-class BlenderSession {
- public:
-  BlenderSession(BL::RenderEngine &b_engine,
-                 BL::Preferences &b_userpref,
-                 BL::BlendData &b_data,
-                 bool preview_osl);
-
-  BlenderSession(BL::RenderEngine &b_engine,
-                 BL::Preferences &b_userpref,
-                 BL::BlendData &b_data,
-                 BL::SpaceView3D &b_v3d,
-                 BL::RegionView3D &b_rv3d,
-                 int width,
-                 int height);
-
-  ~BlenderSession();
-
-  /* session */
-  void create_session();
-  void free_session();
-
-  void reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph);
-
-  /* offline render */
-  void render(BL::Depsgraph &b_depsgraph);
-
-  void render_frame_finish();
-
-  void bake(BL::Depsgraph &b_depsgrah,
-            BL::Object &b_object,
-            const string &pass_type,
-            const int custom_flag,
-            const int bake_width,
-            const int bake_height);
-
-  void full_buffer_written(string_view filename);
-  /* interactive updates */
-  void synchronize(BL::Depsgraph &b_depsgraph);
-
-  /* drawing */
-  void draw(BL::SpaceImageEditor &space_image);
-  void view_draw(int w, int h);
-  void tag_redraw();
-  void tag_update();
-  void get_status(string &status, string &substatus);
-  void get_progress(float &progress, double &total_time, double &render_time);
-  void test_cancel();
-  void update_status_progress();
-  void update_bake_progress();
-
-  bool background;
-  Session *session;
-  Scene *scene;
-  BlenderSync *sync;
-  double last_redraw_time;
-
-  BL::RenderEngine b_engine;
-  BL::Preferences b_userpref;
-  BL::BlendData b_data;
-  BL::RenderSettings b_render;
-  BL::Depsgraph b_depsgraph;
-  /* NOTE: Blender's scene might become invalid after call
-   * #free_blender_memory_if_possible(). */
-  BL::Scene b_scene;
-  BL::SpaceView3D b_v3d;
-  BL::RegionView3D b_rv3d;
-  string b_rlay_name;
-  string b_rview_name;
-
-  string last_status;
-  string last_error;
-  float last_progress;
-  double last_status_time;
-
-  int width, height;
-  bool preview_osl;
-  double start_resize_time;
-
-  void *python_thread_state;
-
-  bool use_developer_ui;
-
-  /* Global state which is common for all render sessions created from Blender.
-   * Usually denotes command line arguments.
-   */
-  static DeviceTypeMask device_override;
-
-  /* Blender is running from the command line, no windows are shown and some
-   * extra render optimization is possible (possible to free draw-only data and
-   * so on.
-   */
-  static bool headless;
-
-  static bool print_render_stats;
-
- protected:
-  void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
-
-  /* Check whether session error happened.
-   * If so, it is reported to the render engine and true is returned.
-   * Otherwise false is returned. */
-  bool check_and_report_session_error();
-
-  void builtin_images_load();
-
-  /* Is used after each render layer synchronization is done with the goal
-   * of freeing render engine data which is held from Blender side (for
-   * example, dependency graph).
-   */
-  void free_blender_memory_if_possible();
-
-  void ensure_display_driver_if_needed();
-
-  struct {
-    thread_mutex mutex;
-    int last_pass_index = -1;
-  } draw_state_;
-
-  /* NOTE: The BlenderSession references the display driver. */
-  BlenderDisplayDriver *display_driver_ = nullptr;
-
-  vector<string> full_buffer_files_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_SESSION_H__ */
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
deleted file mode 100644
index 8d3bbb520c8..00000000000
--- a/intern/cycles/blender/blender_shader.cpp
+++ /dev/null
@@ -1,1589 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/background.h"
-#include "scene/colorspace.h"
-#include "scene/integrator.h"
-#include "scene/light.h"
-#include "scene/osl.h"
-#include "scene/scene.h"
-#include "scene/shader.h"
-#include "scene/shader_graph.h"
-#include "scene/shader_nodes.h"
-
-#include "blender/blender_image.h"
-#include "blender/blender_sync.h"
-#include "blender/blender_texture.h"
-#include "blender/blender_util.h"
-
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_set.h"
-#include "util/util_string.h"
-#include "util/util_task.h"
-
-CCL_NAMESPACE_BEGIN
-
-typedef map<void *, ShaderInput *> PtrInputMap;
-typedef map<void *, ShaderOutput *> PtrOutputMap;
-typedef map<string, ConvertNode *> ProxyMap;
-
-/* Find */
-
-void BlenderSync::find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader)
-{
-  Shader *shader = (id) ? shader_map.find(id) : default_shader;
-
-  used_shaders.push_back_slow(shader);
-  shader->tag_used(scene);
-}
-
-/* RNA translation utilities */
-
-static VolumeSampling get_volume_sampling(PointerRNA &ptr)
-{
-  return (VolumeSampling)get_enum(
-      ptr, "volume_sampling", VOLUME_NUM_SAMPLING, VOLUME_SAMPLING_DISTANCE);
-}
-
-static VolumeInterpolation get_volume_interpolation(PointerRNA &ptr)
-{
-  return (VolumeInterpolation)get_enum(
-      ptr, "volume_interpolation", VOLUME_NUM_INTERPOLATION, VOLUME_INTERPOLATION_LINEAR);
-}
-
-static DisplacementMethod get_displacement_method(PointerRNA &ptr)
-{
-  return (DisplacementMethod)get_enum(
-      ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
-}
-
-static int validate_enum_value(int value, int num_values, int default_value)
-{
-  if (value >= num_values) {
-    return default_value;
-  }
-  return value;
-}
-
-template<typename NodeType> static InterpolationType get_image_interpolation(NodeType &b_node)
-{
-  int value = b_node.interpolation();
-  return (InterpolationType)validate_enum_value(
-      value, INTERPOLATION_NUM_TYPES, INTERPOLATION_LINEAR);
-}
-
-template<typename NodeType> static ExtensionType get_image_extension(NodeType &b_node)
-{
-  int value = b_node.extension();
-  return (ExtensionType)validate_enum_value(value, EXTENSION_NUM_TYPES, EXTENSION_REPEAT);
-}
-
-static ImageAlphaType get_image_alpha_type(BL::Image &b_image)
-{
-  int value = b_image.alpha_mode();
-  return (ImageAlphaType)validate_enum_value(value, IMAGE_ALPHA_NUM_TYPES, IMAGE_ALPHA_AUTO);
-}
-
-/* Attribute name translation utilities */
-
-/* Since Eevee needs to know whether the attribute is uniform or varying
- * at the time it compiles the shader for the material, Blender had to
- * introduce different namespaces (types) in its attribute node. However,
- * Cycles already has object attributes that form a uniform namespace with
- * the more common varying attributes. Without completely reworking the
- * attribute handling in Cycles to introduce separate namespaces (this could
- * be especially hard for OSL which directly uses the name string), the
- * space identifier has to be added to the attribute name as a prefix.
- *
- * The prefixes include a control character to ensure the user specified
- * name can't accidentally include a special prefix.
- */
-
-static const string_view object_attr_prefix("\x01object:");
-static const string_view instancer_attr_prefix("\x01instancer:");
-
-static ustring blender_attribute_name_add_type(const string &name, BlenderAttributeType type)
-{
-  switch (type) {
-    case BL::ShaderNodeAttribute::attribute_type_OBJECT:
-      return ustring::concat(object_attr_prefix, name);
-    case BL::ShaderNodeAttribute::attribute_type_INSTANCER:
-      return ustring::concat(instancer_attr_prefix, name);
-    default:
-      return ustring(name);
-  }
-}
-
-BlenderAttributeType blender_attribute_name_split_type(ustring name, string *r_real_name)
-{
-  string_view sname(name);
-
-  if (sname.substr(0, object_attr_prefix.size()) == object_attr_prefix) {
-    *r_real_name = sname.substr(object_attr_prefix.size());
-    return BL::ShaderNodeAttribute::attribute_type_OBJECT;
-  }
-
-  if (sname.substr(0, instancer_attr_prefix.size()) == instancer_attr_prefix) {
-    *r_real_name = sname.substr(instancer_attr_prefix.size());
-    return BL::ShaderNodeAttribute::attribute_type_INSTANCER;
-  }
-
-  return BL::ShaderNodeAttribute::attribute_type_GEOMETRY;
-}
-
-/* Graph */
-
-static BL::NodeSocket get_node_output(BL::Node &b_node, const string &name)
-{
-  for (BL::NodeSocket &b_out : b_node.outputs) {
-    if (b_out.identifier() == name) {
-      return b_out;
-    }
-  }
-  assert(0);
-  return *b_node.outputs.begin();
-}
-
-static float3 get_node_output_rgba(BL::Node &b_node, const string &name)
-{
-  BL::NodeSocket b_sock = get_node_output(b_node, name);
-  float value[4];
-  RNA_float_get_array(&b_sock.ptr, "default_value", value);
-  return make_float3(value[0], value[1], value[2]);
-}
-
-static float get_node_output_value(BL::Node &b_node, const string &name)
-{
-  BL::NodeSocket b_sock = get_node_output(b_node, name);
-  return RNA_float_get(&b_sock.ptr, "default_value");
-}
-
-static float3 get_node_output_vector(BL::Node &b_node, const string &name)
-{
-  BL::NodeSocket b_sock = get_node_output(b_node, name);
-  float value[3];
-  RNA_float_get_array(&b_sock.ptr, "default_value", value);
-  return make_float3(value[0], value[1], value[2]);
-}
-
-static SocketType::Type convert_socket_type(BL::NodeSocket &b_socket)
-{
-  switch (b_socket.type()) {
-    case BL::NodeSocket::type_VALUE:
-      return SocketType::FLOAT;
-    case BL::NodeSocket::type_INT:
-      return SocketType::INT;
-    case BL::NodeSocket::type_VECTOR:
-      return SocketType::VECTOR;
-    case BL::NodeSocket::type_RGBA:
-      return SocketType::COLOR;
-    case BL::NodeSocket::type_STRING:
-      return SocketType::STRING;
-    case BL::NodeSocket::type_SHADER:
-      return SocketType::CLOSURE;
-
-    default:
-      return SocketType::UNDEFINED;
-  }
-}
-
-static void set_default_value(ShaderInput *input,
-                              BL::NodeSocket &b_sock,
-                              BL::BlendData &b_data,
-                              BL::ID &b_id)
-{
-  Node *node = input->parent;
-  const SocketType &socket = input->socket_type;
-
-  /* copy values for non linked inputs */
-  switch (input->type()) {
-    case SocketType::FLOAT: {
-      node->set(socket, get_float(b_sock.ptr, "default_value"));
-      break;
-    }
-    case SocketType::INT: {
-      if (b_sock.type() == BL::NodeSocket::type_BOOLEAN) {
-        node->set(socket, get_boolean(b_sock.ptr, "default_value"));
-      }
-      else {
-        node->set(socket, get_int(b_sock.ptr, "default_value"));
-      }
-      break;
-    }
-    case SocketType::COLOR: {
-      node->set(socket, float4_to_float3(get_float4(b_sock.ptr, "default_value")));
-      break;
-    }
-    case SocketType::NORMAL:
-    case SocketType::POINT:
-    case SocketType::VECTOR: {
-      node->set(socket, get_float3(b_sock.ptr, "default_value"));
-      break;
-    }
-    case SocketType::STRING: {
-      node->set(
-          socket,
-          (ustring)blender_absolute_path(b_data, b_id, get_string(b_sock.ptr, "default_value")));
-      break;
-    }
-    default:
-      break;
-  }
-}
-
-static void get_tex_mapping(TextureNode *mapping, BL::TexMapping &b_mapping)
-{
-  if (!b_mapping)
-    return;
-
-  mapping->set_tex_mapping_translation(get_float3(b_mapping.translation()));
-  mapping->set_tex_mapping_rotation(get_float3(b_mapping.rotation()));
-  mapping->set_tex_mapping_scale(get_float3(b_mapping.scale()));
-  mapping->set_tex_mapping_type((TextureMapping::Type)b_mapping.vector_type());
-
-  mapping->set_tex_mapping_x_mapping((TextureMapping::Mapping)b_mapping.mapping_x());
-  mapping->set_tex_mapping_y_mapping((TextureMapping::Mapping)b_mapping.mapping_y());
-  mapping->set_tex_mapping_z_mapping((TextureMapping::Mapping)b_mapping.mapping_z());
-}
-
-static ShaderNode *add_node(Scene *scene,
-                            BL::RenderEngine &b_engine,
-                            BL::BlendData &b_data,
-                            BL::Depsgraph &b_depsgraph,
-                            BL::Scene &b_scene,
-                            ShaderGraph *graph,
-                            BL::ShaderNodeTree &b_ntree,
-                            BL::ShaderNode &b_node)
-{
-  ShaderNode *node = NULL;
-
-  /* existing blender nodes */
-  if (b_node.is_a(&RNA_ShaderNodeRGBCurve)) {
-    BL::ShaderNodeRGBCurve b_curve_node(b_node);
-    BL::CurveMapping mapping(b_curve_node.mapping());
-    RGBCurvesNode *curves = graph->create_node<RGBCurvesNode>();
-    array<float3> curve_mapping_curves;
-    float min_x, max_x;
-    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
-    curvemapping_minmax(mapping, 4, &min_x, &max_x);
-    curves->set_min_x(min_x);
-    curves->set_max_x(max_x);
-    curves->set_curves(curve_mapping_curves);
-    node = curves;
-  }
-  if (b_node.is_a(&RNA_ShaderNodeVectorCurve)) {
-    BL::ShaderNodeVectorCurve b_curve_node(b_node);
-    BL::CurveMapping mapping(b_curve_node.mapping());
-    VectorCurvesNode *curves = graph->create_node<VectorCurvesNode>();
-    array<float3> curve_mapping_curves;
-    float min_x, max_x;
-    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
-    curvemapping_minmax(mapping, 3, &min_x, &max_x);
-    curves->set_min_x(min_x);
-    curves->set_max_x(max_x);
-    curves->set_curves(curve_mapping_curves);
-    node = curves;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
-    BL::ShaderNodeFloatCurve b_curve_node(b_node);
-    BL::CurveMapping mapping(b_curve_node.mapping());
-    FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
-    array<float> curve_mapping_curve;
-    float min_x, max_x;
-    curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
-    curvemapping_minmax(mapping, 1, &min_x, &max_x);
-    curve->set_min_x(min_x);
-    curve->set_max_x(max_x);
-    curve->set_curve(curve_mapping_curve);
-    node = curve;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
-    RGBRampNode *ramp = graph->create_node<RGBRampNode>();
-    BL::ShaderNodeValToRGB b_ramp_node(b_node);
-    BL::ColorRamp b_color_ramp(b_ramp_node.color_ramp());
-    array<float3> ramp_values;
-    array<float> ramp_alpha;
-    colorramp_to_array(b_color_ramp, ramp_values, ramp_alpha, RAMP_TABLE_SIZE);
-    ramp->set_ramp(ramp_values);
-    ramp->set_ramp_alpha(ramp_alpha);
-    ramp->set_interpolate(b_color_ramp.interpolation() != BL::ColorRamp::interpolation_CONSTANT);
-    node = ramp;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeRGB)) {
-    ColorNode *color = graph->create_node<ColorNode>();
-    color->set_value(get_node_output_rgba(b_node, "Color"));
-    node = color;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeValue)) {
-    ValueNode *value = graph->create_node<ValueNode>();
-    value->set_value(get_node_output_value(b_node, "Value"));
-    node = value;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeCameraData)) {
-    node = graph->create_node<CameraNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeInvert)) {
-    node = graph->create_node<InvertNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeGamma)) {
-    node = graph->create_node<GammaNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBrightContrast)) {
-    node = graph->create_node<BrightContrastNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeMixRGB)) {
-    BL::ShaderNodeMixRGB b_mix_node(b_node);
-    MixNode *mix = graph->create_node<MixNode>();
-    mix->set_mix_type((NodeMix)b_mix_node.blend_type());
-    mix->set_use_clamp(b_mix_node.use_clamp());
-    node = mix;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeSeparateRGB)) {
-    node = graph->create_node<SeparateRGBNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeCombineRGB)) {
-    node = graph->create_node<CombineRGBNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeSeparateHSV)) {
-    node = graph->create_node<SeparateHSVNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeCombineHSV)) {
-    node = graph->create_node<CombineHSVNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeSeparateXYZ)) {
-    node = graph->create_node<SeparateXYZNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeCombineXYZ)) {
-    node = graph->create_node<CombineXYZNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeHueSaturation)) {
-    node = graph->create_node<HSVNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeRGBToBW)) {
-    node = graph->create_node<RGBToBWNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
-    BL::ShaderNodeMapRange b_map_range_node(b_node);
-    MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
-    map_range_node->set_clamp(b_map_range_node.clamp());
-    map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
-    node = map_range_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
-    BL::ShaderNodeClamp b_clamp_node(b_node);
-    ClampNode *clamp_node = graph->create_node<ClampNode>();
-    clamp_node->set_clamp_type((NodeClampType)b_clamp_node.clamp_type());
-    node = clamp_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeMath)) {
-    BL::ShaderNodeMath b_math_node(b_node);
-    MathNode *math_node = graph->create_node<MathNode>();
-    math_node->set_math_type((NodeMathType)b_math_node.operation());
-    math_node->set_use_clamp(b_math_node.use_clamp());
-    node = math_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVectorMath)) {
-    BL::ShaderNodeVectorMath b_vector_math_node(b_node);
-    VectorMathNode *vector_math_node = graph->create_node<VectorMathNode>();
-    vector_math_node->set_math_type((NodeVectorMathType)b_vector_math_node.operation());
-    node = vector_math_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVectorRotate)) {
-    BL::ShaderNodeVectorRotate b_vector_rotate_node(b_node);
-    VectorRotateNode *vector_rotate_node = graph->create_node<VectorRotateNode>();
-    vector_rotate_node->set_rotate_type(
-        (NodeVectorRotateType)b_vector_rotate_node.rotation_type());
-    vector_rotate_node->set_invert(b_vector_rotate_node.invert());
-    node = vector_rotate_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
-    BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
-    VectorTransformNode *vtransform = graph->create_node<VectorTransformNode>();
-    vtransform->set_transform_type((NodeVectorTransformType)b_vector_transform_node.vector_type());
-    vtransform->set_convert_from(
-        (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_from());
-    vtransform->set_convert_to(
-        (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_to());
-    node = vtransform;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeNormal)) {
-    BL::Node::outputs_iterator out_it;
-    b_node.outputs.begin(out_it);
-
-    NormalNode *norm = graph->create_node<NormalNode>();
-    norm->set_direction(get_node_output_vector(b_node, "Normal"));
-    node = norm;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeMapping)) {
-    BL::ShaderNodeMapping b_mapping_node(b_node);
-    MappingNode *mapping = graph->create_node<MappingNode>();
-    mapping->set_mapping_type((NodeMappingType)b_mapping_node.vector_type());
-    node = mapping;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeFresnel)) {
-    node = graph->create_node<FresnelNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeLayerWeight)) {
-    node = graph->create_node<LayerWeightNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeAddShader)) {
-    node = graph->create_node<AddClosureNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeMixShader)) {
-    node = graph->create_node<MixClosureNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeAttribute)) {
-    BL::ShaderNodeAttribute b_attr_node(b_node);
-    AttributeNode *attr = graph->create_node<AttributeNode>();
-    attr->set_attribute(blender_attribute_name_add_type(b_attr_node.attribute_name(),
-                                                        b_attr_node.attribute_type()));
-    node = attr;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBackground)) {
-    node = graph->create_node<BackgroundNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeHoldout)) {
-    node = graph->create_node<HoldoutNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfAnisotropic)) {
-    BL::ShaderNodeBsdfAnisotropic b_aniso_node(b_node);
-    AnisotropicBsdfNode *aniso = graph->create_node<AnisotropicBsdfNode>();
-
-    switch (b_aniso_node.distribution()) {
-      case BL::ShaderNodeBsdfAnisotropic::distribution_BECKMANN:
-        aniso->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_ID);
-        break;
-      case BL::ShaderNodeBsdfAnisotropic::distribution_GGX:
-        aniso->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_ID);
-        break;
-      case BL::ShaderNodeBsdfAnisotropic::distribution_MULTI_GGX:
-        aniso->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID);
-        break;
-      case BL::ShaderNodeBsdfAnisotropic::distribution_ASHIKHMIN_SHIRLEY:
-        aniso->set_distribution(CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID);
-        break;
-    }
-
-    node = aniso;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfDiffuse)) {
-    node = graph->create_node<DiffuseBsdfNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeSubsurfaceScattering)) {
-    BL::ShaderNodeSubsurfaceScattering b_subsurface_node(b_node);
-
-    SubsurfaceScatteringNode *subsurface = graph->create_node<SubsurfaceScatteringNode>();
-
-    switch (b_subsurface_node.falloff()) {
-      case BL::ShaderNodeSubsurfaceScattering::falloff_BURLEY:
-        subsurface->set_method(CLOSURE_BSSRDF_BURLEY_ID);
-        break;
-      case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK_FIXED_RADIUS:
-        subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
-        break;
-      case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK:
-        subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
-        break;
-    }
-
-    node = subsurface;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfGlossy)) {
-    BL::ShaderNodeBsdfGlossy b_glossy_node(b_node);
-    GlossyBsdfNode *glossy = graph->create_node<GlossyBsdfNode>();
-
-    switch (b_glossy_node.distribution()) {
-      case BL::ShaderNodeBsdfGlossy::distribution_SHARP:
-        glossy->set_distribution(CLOSURE_BSDF_REFLECTION_ID);
-        break;
-      case BL::ShaderNodeBsdfGlossy::distribution_BECKMANN:
-        glossy->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_ID);
-        break;
-      case BL::ShaderNodeBsdfGlossy::distribution_GGX:
-        glossy->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_ID);
-        break;
-      case BL::ShaderNodeBsdfGlossy::distribution_ASHIKHMIN_SHIRLEY:
-        glossy->set_distribution(CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID);
-        break;
-      case BL::ShaderNodeBsdfGlossy::distribution_MULTI_GGX:
-        glossy->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID);
-        break;
-    }
-    node = glossy;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfGlass)) {
-    BL::ShaderNodeBsdfGlass b_glass_node(b_node);
-    GlassBsdfNode *glass = graph->create_node<GlassBsdfNode>();
-    switch (b_glass_node.distribution()) {
-      case BL::ShaderNodeBsdfGlass::distribution_SHARP:
-        glass->set_distribution(CLOSURE_BSDF_SHARP_GLASS_ID);
-        break;
-      case BL::ShaderNodeBsdfGlass::distribution_BECKMANN:
-        glass->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID);
-        break;
-      case BL::ShaderNodeBsdfGlass::distribution_GGX:
-        glass->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID);
-        break;
-      case BL::ShaderNodeBsdfGlass::distribution_MULTI_GGX:
-        glass->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID);
-        break;
-    }
-    node = glass;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfRefraction)) {
-    BL::ShaderNodeBsdfRefraction b_refraction_node(b_node);
-    RefractionBsdfNode *refraction = graph->create_node<RefractionBsdfNode>();
-    switch (b_refraction_node.distribution()) {
-      case BL::ShaderNodeBsdfRefraction::distribution_SHARP:
-        refraction->set_distribution(CLOSURE_BSDF_REFRACTION_ID);
-        break;
-      case BL::ShaderNodeBsdfRefraction::distribution_BECKMANN:
-        refraction->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID);
-        break;
-      case BL::ShaderNodeBsdfRefraction::distribution_GGX:
-        refraction->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID);
-        break;
-    }
-    node = refraction;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfToon)) {
-    BL::ShaderNodeBsdfToon b_toon_node(b_node);
-    ToonBsdfNode *toon = graph->create_node<ToonBsdfNode>();
-    switch (b_toon_node.component()) {
-      case BL::ShaderNodeBsdfToon::component_DIFFUSE:
-        toon->set_component(CLOSURE_BSDF_DIFFUSE_TOON_ID);
-        break;
-      case BL::ShaderNodeBsdfToon::component_GLOSSY:
-        toon->set_component(CLOSURE_BSDF_GLOSSY_TOON_ID);
-        break;
-    }
-    node = toon;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfHair)) {
-    BL::ShaderNodeBsdfHair b_hair_node(b_node);
-    HairBsdfNode *hair = graph->create_node<HairBsdfNode>();
-    switch (b_hair_node.component()) {
-      case BL::ShaderNodeBsdfHair::component_Reflection:
-        hair->set_component(CLOSURE_BSDF_HAIR_REFLECTION_ID);
-        break;
-      case BL::ShaderNodeBsdfHair::component_Transmission:
-        hair->set_component(CLOSURE_BSDF_HAIR_TRANSMISSION_ID);
-        break;
-    }
-    node = hair;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfHairPrincipled)) {
-    BL::ShaderNodeBsdfHairPrincipled b_principled_hair_node(b_node);
-    PrincipledHairBsdfNode *principled_hair = graph->create_node<PrincipledHairBsdfNode>();
-    principled_hair->set_parametrization(
-        (NodePrincipledHairParametrization)get_enum(b_principled_hair_node.ptr,
-                                                    "parametrization",
-                                                    NODE_PRINCIPLED_HAIR_NUM,
-                                                    NODE_PRINCIPLED_HAIR_REFLECTANCE));
-    node = principled_hair;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfPrincipled)) {
-    BL::ShaderNodeBsdfPrincipled b_principled_node(b_node);
-    PrincipledBsdfNode *principled = graph->create_node<PrincipledBsdfNode>();
-    switch (b_principled_node.distribution()) {
-      case BL::ShaderNodeBsdfPrincipled::distribution_GGX:
-        principled->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID);
-        break;
-      case BL::ShaderNodeBsdfPrincipled::distribution_MULTI_GGX:
-        principled->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID);
-        break;
-    }
-    switch (b_principled_node.subsurface_method()) {
-      case BL::ShaderNodeBsdfPrincipled::subsurface_method_BURLEY:
-        principled->set_subsurface_method(CLOSURE_BSSRDF_BURLEY_ID);
-        break;
-      case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK_FIXED_RADIUS:
-        principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
-        break;
-      case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK:
-        principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
-        break;
-    }
-    node = principled;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfTranslucent)) {
-    node = graph->create_node<TranslucentBsdfNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfTransparent)) {
-    node = graph->create_node<TransparentBsdfNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBsdfVelvet)) {
-    node = graph->create_node<VelvetBsdfNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeEmission)) {
-    node = graph->create_node<EmissionNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeAmbientOcclusion)) {
-    BL::ShaderNodeAmbientOcclusion b_ao_node(b_node);
-    AmbientOcclusionNode *ao = graph->create_node<AmbientOcclusionNode>();
-    ao->set_samples(b_ao_node.samples());
-    ao->set_inside(b_ao_node.inside());
-    ao->set_only_local(b_ao_node.only_local());
-    node = ao;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVolumeScatter)) {
-    node = graph->create_node<ScatterVolumeNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVolumeAbsorption)) {
-    node = graph->create_node<AbsorptionVolumeNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVolumePrincipled)) {
-    PrincipledVolumeNode *principled = graph->create_node<PrincipledVolumeNode>();
-    node = principled;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeNewGeometry)) {
-    node = graph->create_node<GeometryNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeWireframe)) {
-    BL::ShaderNodeWireframe b_wireframe_node(b_node);
-    WireframeNode *wire = graph->create_node<WireframeNode>();
-    wire->set_use_pixel_size(b_wireframe_node.use_pixel_size());
-    node = wire;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeWavelength)) {
-    node = graph->create_node<WavelengthNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBlackbody)) {
-    node = graph->create_node<BlackbodyNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeLightPath)) {
-    node = graph->create_node<LightPathNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeLightFalloff)) {
-    node = graph->create_node<LightFalloffNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeObjectInfo)) {
-    node = graph->create_node<ObjectInfoNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeParticleInfo)) {
-    node = graph->create_node<ParticleInfoNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeHairInfo)) {
-    node = graph->create_node<HairInfoNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVolumeInfo)) {
-    node = graph->create_node<VolumeInfoNode>();
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVertexColor)) {
-    BL::ShaderNodeVertexColor b_vertex_color_node(b_node);
-    VertexColorNode *vertex_color_node = graph->create_node<VertexColorNode>();
-    vertex_color_node->set_layer_name(ustring(b_vertex_color_node.layer_name()));
-    node = vertex_color_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBump)) {
-    BL::ShaderNodeBump b_bump_node(b_node);
-    BumpNode *bump = graph->create_node<BumpNode>();
-    bump->set_invert(b_bump_node.invert());
-    node = bump;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeScript)) {
-#ifdef WITH_OSL
-    if (scene->shader_manager->use_osl()) {
-      /* create script node */
-      BL::ShaderNodeScript b_script_node(b_node);
-
-      ShaderManager *manager = scene->shader_manager;
-      string bytecode_hash = b_script_node.bytecode_hash();
-
-      if (!bytecode_hash.empty()) {
-        node = OSLShaderManager::osl_node(
-            graph, manager, "", bytecode_hash, b_script_node.bytecode());
-      }
-      else {
-        string absolute_filepath = blender_absolute_path(
-            b_data, b_ntree, b_script_node.filepath());
-        node = OSLShaderManager::osl_node(graph, manager, absolute_filepath, "");
-      }
-    }
-#else
-    (void)b_data;
-    (void)b_ntree;
-#endif
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexImage)) {
-    BL::ShaderNodeTexImage b_image_node(b_node);
-    BL::Image b_image(b_image_node.image());
-    BL::ImageUser b_image_user(b_image_node.image_user());
-    ImageTextureNode *image = graph->create_node<ImageTextureNode>();
-
-    image->set_interpolation(get_image_interpolation(b_image_node));
-    image->set_extension(get_image_extension(b_image_node));
-    image->set_projection((NodeImageProjection)b_image_node.projection());
-    image->set_projection_blend(b_image_node.projection_blend());
-    BL::TexMapping b_texture_mapping(b_image_node.texture_mapping());
-    get_tex_mapping(image, b_texture_mapping);
-
-    if (b_image) {
-      PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
-      image->set_colorspace(ustring(get_enum_identifier(colorspace_ptr, "name")));
-
-      image->set_animated(b_image_node.image_user().use_auto_refresh());
-      image->set_alpha_type(get_image_alpha_type(b_image));
-
-      array<int> tiles;
-      for (BL::UDIMTile &b_tile : b_image.tiles) {
-        tiles.push_back_slow(b_tile.number());
-      }
-      image->set_tiles(tiles);
-
-      /* builtin images will use callback-based reading because
-       * they could only be loaded correct from blender side
-       */
-      bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
-                        b_image.source() == BL::Image::source_MOVIE ||
-                        (b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
-
-      if (is_builtin) {
-        /* for builtin images we're using image datablock name to find an image to
-         * read pixels from later
-         *
-         * also store frame number as well, so there's no differences in handling
-         * builtin names for packed images and movies
-         */
-        int scene_frame = b_scene.frame_current();
-        int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
-        image->handle = scene->image_manager->add_image(
-            new BlenderImageLoader(b_image, image_frame), image->image_params());
-      }
-      else {
-        ustring filename = ustring(
-            image_user_file_path(b_image_user, b_image, b_scene.frame_current(), true));
-        image->set_filename(filename);
-      }
-    }
-    node = image;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexEnvironment)) {
-    BL::ShaderNodeTexEnvironment b_env_node(b_node);
-    BL::Image b_image(b_env_node.image());
-    BL::ImageUser b_image_user(b_env_node.image_user());
-    EnvironmentTextureNode *env = graph->create_node<EnvironmentTextureNode>();
-
-    env->set_interpolation(get_image_interpolation(b_env_node));
-    env->set_projection((NodeEnvironmentProjection)b_env_node.projection());
-    BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
-    get_tex_mapping(env, b_texture_mapping);
-
-    if (b_image) {
-      PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
-      env->set_colorspace(ustring(get_enum_identifier(colorspace_ptr, "name")));
-
-      env->set_animated(b_env_node.image_user().use_auto_refresh());
-      env->set_alpha_type(get_image_alpha_type(b_image));
-
-      bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
-                        b_image.source() == BL::Image::source_MOVIE ||
-                        (b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
-
-      if (is_builtin) {
-        int scene_frame = b_scene.frame_current();
-        int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
-        env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
-                                                      env->image_params());
-      }
-      else {
-        env->set_filename(
-            ustring(image_user_file_path(b_image_user, b_image, b_scene.frame_current(), false)));
-      }
-    }
-    node = env;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexGradient)) {
-    BL::ShaderNodeTexGradient b_gradient_node(b_node);
-    GradientTextureNode *gradient = graph->create_node<GradientTextureNode>();
-    gradient->set_gradient_type((NodeGradientType)b_gradient_node.gradient_type());
-    BL::TexMapping b_texture_mapping(b_gradient_node.texture_mapping());
-    get_tex_mapping(gradient, b_texture_mapping);
-    node = gradient;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexVoronoi)) {
-    BL::ShaderNodeTexVoronoi b_voronoi_node(b_node);
-    VoronoiTextureNode *voronoi = graph->create_node<VoronoiTextureNode>();
-    voronoi->set_dimensions(b_voronoi_node.voronoi_dimensions());
-    voronoi->set_feature((NodeVoronoiFeature)b_voronoi_node.feature());
-    voronoi->set_metric((NodeVoronoiDistanceMetric)b_voronoi_node.distance());
-    BL::TexMapping b_texture_mapping(b_voronoi_node.texture_mapping());
-    get_tex_mapping(voronoi, b_texture_mapping);
-    node = voronoi;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexMagic)) {
-    BL::ShaderNodeTexMagic b_magic_node(b_node);
-    MagicTextureNode *magic = graph->create_node<MagicTextureNode>();
-    magic->set_depth(b_magic_node.turbulence_depth());
-    BL::TexMapping b_texture_mapping(b_magic_node.texture_mapping());
-    get_tex_mapping(magic, b_texture_mapping);
-    node = magic;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexWave)) {
-    BL::ShaderNodeTexWave b_wave_node(b_node);
-    WaveTextureNode *wave = graph->create_node<WaveTextureNode>();
-    wave->set_wave_type((NodeWaveType)b_wave_node.wave_type());
-    wave->set_bands_direction((NodeWaveBandsDirection)b_wave_node.bands_direction());
-    wave->set_rings_direction((NodeWaveRingsDirection)b_wave_node.rings_direction());
-    wave->set_profile((NodeWaveProfile)b_wave_node.wave_profile());
-    BL::TexMapping b_texture_mapping(b_wave_node.texture_mapping());
-    get_tex_mapping(wave, b_texture_mapping);
-    node = wave;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexChecker)) {
-    BL::ShaderNodeTexChecker b_checker_node(b_node);
-    CheckerTextureNode *checker = graph->create_node<CheckerTextureNode>();
-    BL::TexMapping b_texture_mapping(b_checker_node.texture_mapping());
-    get_tex_mapping(checker, b_texture_mapping);
-    node = checker;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexBrick)) {
-    BL::ShaderNodeTexBrick b_brick_node(b_node);
-    BrickTextureNode *brick = graph->create_node<BrickTextureNode>();
-    brick->set_offset(b_brick_node.offset());
-    brick->set_offset_frequency(b_brick_node.offset_frequency());
-    brick->set_squash(b_brick_node.squash());
-    brick->set_squash_frequency(b_brick_node.squash_frequency());
-    BL::TexMapping b_texture_mapping(b_brick_node.texture_mapping());
-    get_tex_mapping(brick, b_texture_mapping);
-    node = brick;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexNoise)) {
-    BL::ShaderNodeTexNoise b_noise_node(b_node);
-    NoiseTextureNode *noise = graph->create_node<NoiseTextureNode>();
-    noise->set_dimensions(b_noise_node.noise_dimensions());
-    BL::TexMapping b_texture_mapping(b_noise_node.texture_mapping());
-    get_tex_mapping(noise, b_texture_mapping);
-    node = noise;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexMusgrave)) {
-    BL::ShaderNodeTexMusgrave b_musgrave_node(b_node);
-    MusgraveTextureNode *musgrave_node = graph->create_node<MusgraveTextureNode>();
-    musgrave_node->set_musgrave_type((NodeMusgraveType)b_musgrave_node.musgrave_type());
-    musgrave_node->set_dimensions(b_musgrave_node.musgrave_dimensions());
-    BL::TexMapping b_texture_mapping(b_musgrave_node.texture_mapping());
-    get_tex_mapping(musgrave_node, b_texture_mapping);
-    node = musgrave_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexCoord)) {
-    BL::ShaderNodeTexCoord b_tex_coord_node(b_node);
-    TextureCoordinateNode *tex_coord = graph->create_node<TextureCoordinateNode>();
-    tex_coord->set_from_dupli(b_tex_coord_node.from_instancer());
-    if (b_tex_coord_node.object()) {
-      tex_coord->set_use_transform(true);
-      tex_coord->set_ob_tfm(get_transform(b_tex_coord_node.object().matrix_world()));
-    }
-    node = tex_coord;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexSky)) {
-    BL::ShaderNodeTexSky b_sky_node(b_node);
-    SkyTextureNode *sky = graph->create_node<SkyTextureNode>();
-    sky->set_sky_type((NodeSkyType)b_sky_node.sky_type());
-    sky->set_sun_direction(normalize(get_float3(b_sky_node.sun_direction())));
-    sky->set_turbidity(b_sky_node.turbidity());
-    sky->set_ground_albedo(b_sky_node.ground_albedo());
-    sky->set_sun_disc(b_sky_node.sun_disc());
-    sky->set_sun_size(b_sky_node.sun_size());
-    sky->set_sun_intensity(b_sky_node.sun_intensity());
-    sky->set_sun_elevation(b_sky_node.sun_elevation());
-    sky->set_sun_rotation(b_sky_node.sun_rotation());
-    sky->set_altitude(b_sky_node.altitude());
-    sky->set_air_density(b_sky_node.air_density());
-    sky->set_dust_density(b_sky_node.dust_density());
-    sky->set_ozone_density(b_sky_node.ozone_density());
-    BL::TexMapping b_texture_mapping(b_sky_node.texture_mapping());
-    get_tex_mapping(sky, b_texture_mapping);
-    node = sky;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexIES)) {
-    BL::ShaderNodeTexIES b_ies_node(b_node);
-    IESLightNode *ies = graph->create_node<IESLightNode>();
-    switch (b_ies_node.mode()) {
-      case BL::ShaderNodeTexIES::mode_EXTERNAL:
-        ies->set_filename(ustring(blender_absolute_path(b_data, b_ntree, b_ies_node.filepath())));
-        break;
-      case BL::ShaderNodeTexIES::mode_INTERNAL:
-        ustring ies_content = ustring(get_text_datablock_content(b_ies_node.ies().ptr));
-        if (ies_content.empty()) {
-          ies_content = "\n";
-        }
-        ies->set_ies(ies_content);
-        break;
-    }
-    node = ies;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexWhiteNoise)) {
-    BL::ShaderNodeTexWhiteNoise b_tex_white_noise_node(b_node);
-    WhiteNoiseTextureNode *white_noise_node = graph->create_node<WhiteNoiseTextureNode>();
-    white_noise_node->set_dimensions(b_tex_white_noise_node.noise_dimensions());
-    node = white_noise_node;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeNormalMap)) {
-    BL::ShaderNodeNormalMap b_normal_map_node(b_node);
-    NormalMapNode *nmap = graph->create_node<NormalMapNode>();
-    nmap->set_space((NodeNormalMapSpace)b_normal_map_node.space());
-    nmap->set_attribute(ustring(b_normal_map_node.uv_map()));
-    node = nmap;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTangent)) {
-    BL::ShaderNodeTangent b_tangent_node(b_node);
-    TangentNode *tangent = graph->create_node<TangentNode>();
-    tangent->set_direction_type((NodeTangentDirectionType)b_tangent_node.direction_type());
-    tangent->set_axis((NodeTangentAxis)b_tangent_node.axis());
-    tangent->set_attribute(ustring(b_tangent_node.uv_map()));
-    node = tangent;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeUVMap)) {
-    BL::ShaderNodeUVMap b_uvmap_node(b_node);
-    UVMapNode *uvm = graph->create_node<UVMapNode>();
-    uvm->set_attribute(ustring(b_uvmap_node.uv_map()));
-    uvm->set_from_dupli(b_uvmap_node.from_instancer());
-    node = uvm;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
-    BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
-    PointDensityTextureNode *point_density = graph->create_node<PointDensityTextureNode>();
-    point_density->set_space((NodeTexVoxelSpace)b_point_density_node.space());
-    point_density->set_interpolation(get_image_interpolation(b_point_density_node));
-    point_density->handle = scene->image_manager->add_image(
-        new BlenderPointDensityLoader(b_depsgraph, b_point_density_node),
-        point_density->image_params());
-
-    b_point_density_node.cache_point_density(b_depsgraph);
-    node = point_density;
-
-    /* Transformation form world space to texture space.
-     *
-     * NOTE: Do this after the texture is cached, this is because getting
-     * min/max will need to access this cache.
-     */
-    BL::Object b_ob(b_point_density_node.object());
-    if (b_ob) {
-      float3 loc, size;
-      point_density_texture_space(b_depsgraph, b_point_density_node, loc, size);
-      point_density->set_tfm(transform_translate(-loc) * transform_scale(size) *
-                             transform_inverse(get_transform(b_ob.matrix_world())));
-    }
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeBevel)) {
-    BL::ShaderNodeBevel b_bevel_node(b_node);
-    BevelNode *bevel = graph->create_node<BevelNode>();
-    bevel->set_samples(b_bevel_node.samples());
-    node = bevel;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeDisplacement)) {
-    BL::ShaderNodeDisplacement b_disp_node(b_node);
-    DisplacementNode *disp = graph->create_node<DisplacementNode>();
-    disp->set_space((NodeNormalMapSpace)b_disp_node.space());
-    node = disp;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeVectorDisplacement)) {
-    BL::ShaderNodeVectorDisplacement b_disp_node(b_node);
-    VectorDisplacementNode *disp = graph->create_node<VectorDisplacementNode>();
-    disp->set_space((NodeNormalMapSpace)b_disp_node.space());
-    disp->set_attribute(ustring(""));
-    node = disp;
-  }
-  else if (b_node.is_a(&RNA_ShaderNodeOutputAOV)) {
-    BL::ShaderNodeOutputAOV b_aov_node(b_node);
-    OutputAOVNode *aov = graph->create_node<OutputAOVNode>();
-    aov->set_name(ustring(b_aov_node.name()));
-    node = aov;
-  }
-
-  if (node) {
-    node->name = b_node.name();
-    graph->add(node);
-  }
-
-  return node;
-}
-
-static bool node_use_modified_socket_name(ShaderNode *node)
-{
-  if (node->special_type == SHADER_SPECIAL_TYPE_OSL)
-    return false;
-
-  return true;
-}
-
-static ShaderInput *node_find_input_by_name(ShaderNode *node, BL::NodeSocket &b_socket)
-{
-  string name = b_socket.identifier();
-  ShaderInput *input = node->input(name.c_str());
-
-  if (!input && node_use_modified_socket_name(node)) {
-    /* Different internal name for shader. */
-    if (string_startswith(name, "Shader")) {
-      string_replace(name, "Shader", "Closure");
-    }
-    input = node->input(name.c_str());
-
-    if (!input) {
-      /* Different internal numbering of two sockets with same name.
-       * Note that the Blender convention for unique socket names changed
-       * from . to _ at some point, so we check both to handle old files. */
-      if (string_endswith(name, "_001")) {
-        string_replace(name, "_001", "2");
-      }
-      else if (string_endswith(name, ".001")) {
-        string_replace(name, ".001", "2");
-      }
-      else if (string_endswith(name, "_002")) {
-        string_replace(name, "_002", "3");
-      }
-      else if (string_endswith(name, ".002")) {
-        string_replace(name, ".002", "3");
-      }
-      else {
-        name += "1";
-      }
-
-      input = node->input(name.c_str());
-    }
-  }
-
-  return input;
-}
-
-static ShaderOutput *node_find_output_by_name(ShaderNode *node, BL::NodeSocket &b_socket)
-{
-  string name = b_socket.identifier();
-  ShaderOutput *output = node->output(name.c_str());
-
-  if (!output && node_use_modified_socket_name(node)) {
-    /* Different internal name for shader. */
-    if (name == "Shader") {
-      name = "Closure";
-      output = node->output(name.c_str());
-    }
-  }
-
-  return output;
-}
-
-static void add_nodes(Scene *scene,
-                      BL::RenderEngine &b_engine,
-                      BL::BlendData &b_data,
-                      BL::Depsgraph &b_depsgraph,
-                      BL::Scene &b_scene,
-                      ShaderGraph *graph,
-                      BL::ShaderNodeTree &b_ntree,
-                      const ProxyMap &proxy_input_map,
-                      const ProxyMap &proxy_output_map)
-{
-  /* add nodes */
-  PtrInputMap input_map;
-  PtrOutputMap output_map;
-
-  /* find the node to use for output if there are multiple */
-  BL::ShaderNode output_node = b_ntree.get_output_node(
-      BL::ShaderNodeOutputMaterial::target_CYCLES);
-
-  /* add nodes */
-  for (BL::Node &b_node : b_ntree.nodes) {
-    if (b_node.mute() || b_node.is_a(&RNA_NodeReroute)) {
-      /* replace muted node with internal links */
-      for (BL::NodeLink &b_link : b_node.internal_links) {
-        BL::NodeSocket to_socket(b_link.to_socket());
-        SocketType::Type to_socket_type = convert_socket_type(to_socket);
-        if (to_socket_type == SocketType::UNDEFINED) {
-          continue;
-        }
-
-        ConvertNode *proxy = graph->create_node<ConvertNode>(to_socket_type, to_socket_type, true);
-
-        input_map[b_link.from_socket().ptr.data] = proxy->inputs[0];
-        output_map[b_link.to_socket().ptr.data] = proxy->outputs[0];
-
-        graph->add(proxy);
-      }
-    }
-    else if (b_node.is_a(&RNA_ShaderNodeGroup) || b_node.is_a(&RNA_NodeCustomGroup) ||
-             b_node.is_a(&RNA_ShaderNodeCustomGroup)) {
-
-      BL::ShaderNodeTree b_group_ntree(PointerRNA_NULL);
-      if (b_node.is_a(&RNA_ShaderNodeGroup))
-        b_group_ntree = BL::ShaderNodeTree(((BL::NodeGroup)(b_node)).node_tree());
-      else if (b_node.is_a(&RNA_NodeCustomGroup))
-        b_group_ntree = BL::ShaderNodeTree(((BL::NodeCustomGroup)(b_node)).node_tree());
-      else
-        b_group_ntree = BL::ShaderNodeTree(((BL::ShaderNodeCustomGroup)(b_node)).node_tree());
-
-      ProxyMap group_proxy_input_map, group_proxy_output_map;
-
-      /* Add a proxy node for each socket
-       * Do this even if the node group has no internal tree,
-       * so that links have something to connect to and assert won't fail.
-       */
-      for (BL::NodeSocket &b_input : b_node.inputs) {
-        SocketType::Type input_type = convert_socket_type(b_input);
-        if (input_type == SocketType::UNDEFINED) {
-          continue;
-        }
-
-        ConvertNode *proxy = graph->create_node<ConvertNode>(input_type, input_type, true);
-        graph->add(proxy);
-
-        /* register the proxy node for internal binding */
-        group_proxy_input_map[b_input.identifier()] = proxy;
-
-        input_map[b_input.ptr.data] = proxy->inputs[0];
-
-        set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
-      }
-      for (BL::NodeSocket &b_output : b_node.outputs) {
-        SocketType::Type output_type = convert_socket_type(b_output);
-        if (output_type == SocketType::UNDEFINED) {
-          continue;
-        }
-
-        ConvertNode *proxy = graph->create_node<ConvertNode>(output_type, output_type, true);
-        graph->add(proxy);
-
-        /* register the proxy node for internal binding */
-        group_proxy_output_map[b_output.identifier()] = proxy;
-
-        output_map[b_output.ptr.data] = proxy->outputs[0];
-      }
-
-      if (b_group_ntree) {
-        add_nodes(scene,
-                  b_engine,
-                  b_data,
-                  b_depsgraph,
-                  b_scene,
-                  graph,
-                  b_group_ntree,
-                  group_proxy_input_map,
-                  group_proxy_output_map);
-      }
-    }
-    else if (b_node.is_a(&RNA_NodeGroupInput)) {
-      /* map each socket to a proxy node */
-      for (BL::NodeSocket &b_output : b_node.outputs) {
-        ProxyMap::const_iterator proxy_it = proxy_input_map.find(b_output.identifier());
-        if (proxy_it != proxy_input_map.end()) {
-          ConvertNode *proxy = proxy_it->second;
-
-          output_map[b_output.ptr.data] = proxy->outputs[0];
-        }
-      }
-    }
-    else if (b_node.is_a(&RNA_NodeGroupOutput)) {
-      BL::NodeGroupOutput b_output_node(b_node);
-      /* only the active group output is used */
-      if (b_output_node.is_active_output()) {
-        /* map each socket to a proxy node */
-        for (BL::NodeSocket &b_input : b_node.inputs) {
-          ProxyMap::const_iterator proxy_it = proxy_output_map.find(b_input.identifier());
-          if (proxy_it != proxy_output_map.end()) {
-            ConvertNode *proxy = proxy_it->second;
-
-            input_map[b_input.ptr.data] = proxy->inputs[0];
-
-            set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
-          }
-        }
-      }
-    }
-    else {
-      ShaderNode *node = NULL;
-
-      if (b_node.ptr.data == output_node.ptr.data) {
-        node = graph->output();
-      }
-      else {
-        BL::ShaderNode b_shader_node(b_node);
-        node = add_node(
-            scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree, b_shader_node);
-      }
-
-      if (node) {
-        /* map node sockets for linking */
-        for (BL::NodeSocket &b_input : b_node.inputs) {
-          ShaderInput *input = node_find_input_by_name(node, b_input);
-          if (!input) {
-            /* XXX should not happen, report error? */
-            continue;
-          }
-          input_map[b_input.ptr.data] = input;
-
-          set_default_value(input, b_input, b_data, b_ntree);
-        }
-        for (BL::NodeSocket &b_output : b_node.outputs) {
-          ShaderOutput *output = node_find_output_by_name(node, b_output);
-          if (!output) {
-            /* XXX should not happen, report error? */
-            continue;
-          }
-          output_map[b_output.ptr.data] = output;
-        }
-      }
-    }
-  }
-
-  /* connect nodes */
-  for (BL::NodeLink &b_link : b_ntree.links) {
-    /* Ignore invalid links to avoid unwanted cycles created in graph.
-     * Also ignore links with unavailable sockets. */
-    if (!(b_link.is_valid() && b_link.from_socket().enabled() && b_link.to_socket().enabled()) ||
-        b_link.is_muted()) {
-      continue;
-    }
-    /* get blender link data */
-    BL::NodeSocket b_from_sock = b_link.from_socket();
-    BL::NodeSocket b_to_sock = b_link.to_socket();
-
-    ShaderOutput *output = 0;
-    ShaderInput *input = 0;
-
-    PtrOutputMap::iterator output_it = output_map.find(b_from_sock.ptr.data);
-    if (output_it != output_map.end())
-      output = output_it->second;
-    PtrInputMap::iterator input_it = input_map.find(b_to_sock.ptr.data);
-    if (input_it != input_map.end())
-      input = input_it->second;
-
-    /* either node may be NULL when the node was not exported, typically
-     * because the node type is not supported */
-    if (output && input)
-      graph->connect(output, input);
-  }
-}
-
-static void add_nodes(Scene *scene,
-                      BL::RenderEngine &b_engine,
-                      BL::BlendData &b_data,
-                      BL::Depsgraph &b_depsgraph,
-                      BL::Scene &b_scene,
-                      ShaderGraph *graph,
-                      BL::ShaderNodeTree &b_ntree)
-{
-  static const ProxyMap empty_proxy_map;
-  add_nodes(scene,
-            b_engine,
-            b_data,
-            b_depsgraph,
-            b_scene,
-            graph,
-            b_ntree,
-            empty_proxy_map,
-            empty_proxy_map);
-}
-
-/* Sync Materials */
-
-void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
-{
-  shader_map.set_default(scene->default_surface);
-
-  TaskPool pool;
-  set<Shader *> updated_shaders;
-
-  for (BL::ID &b_id : b_depsgraph.ids) {
-    if (!b_id.is_a(&RNA_Material)) {
-      continue;
-    }
-
-    BL::Material b_mat(b_id);
-    Shader *shader;
-
-    /* test if we need to sync */
-    if (shader_map.add_or_update(&shader, b_mat) || update_all) {
-      ShaderGraph *graph = new ShaderGraph();
-
-      shader->name = b_mat.name().c_str();
-      shader->set_pass_id(b_mat.pass_index());
-
-      /* create nodes */
-      if (b_mat.use_nodes() && b_mat.node_tree()) {
-        BL::ShaderNodeTree b_ntree(b_mat.node_tree());
-
-        add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
-      }
-      else {
-        DiffuseBsdfNode *diffuse = graph->create_node<DiffuseBsdfNode>();
-        diffuse->set_color(get_float3(b_mat.diffuse_color()));
-        graph->add(diffuse);
-
-        ShaderNode *out = graph->output();
-        graph->connect(diffuse->output("BSDF"), out->input("Surface"));
-      }
-
-      /* settings */
-      PointerRNA cmat = RNA_pointer_get(&b_mat.ptr, "cycles");
-      shader->set_use_mis(get_boolean(cmat, "sample_as_light"));
-      shader->set_use_transparent_shadow(get_boolean(cmat, "use_transparent_shadow"));
-      shader->set_heterogeneous_volume(!get_boolean(cmat, "homogeneous_volume"));
-      shader->set_volume_sampling_method(get_volume_sampling(cmat));
-      shader->set_volume_interpolation_method(get_volume_interpolation(cmat));
-      shader->set_volume_step_rate(get_float(cmat, "volume_step_rate"));
-      shader->set_displacement_method(get_displacement_method(cmat));
-
-      shader->set_graph(graph);
-
-      /* By simplifying the shader graph as soon as possible, some
-       * redundant shader nodes might be removed which prevents loading
-       * unnecessary attributes later.
-       *
-       * However, since graph simplification also accounts for e.g. mix
-       * weight, this would cause frequent expensive resyncs in interactive
-       * sessions, so for those sessions optimization is only performed
-       * right before compiling.
-       */
-      if (!preview) {
-        pool.push(function_bind(&ShaderGraph::simplify, graph, scene));
-        /* NOTE: Update shaders out of the threads since those routines
-         * are accessing and writing to a global context.
-         */
-        updated_shaders.insert(shader);
-      }
-      else {
-        /* NOTE: Update tagging can access links which are being
-         * optimized out.
-         */
-        shader->tag_update(scene);
-      }
-    }
-  }
-
-  pool.wait_work();
-
-  foreach (Shader *shader, updated_shaders) {
-    shader->tag_update(scene);
-  }
-}
-
-/* Sync World */
-
-void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all)
-{
-  Background *background = scene->background;
-  Integrator *integrator = scene->integrator;
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-  BL::World b_world = b_scene.world();
-
-  BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
-
-  if (world_recalc || update_all || b_world.ptr.data != world_map ||
-      viewport_parameters.shader_modified(new_viewport_parameters)) {
-    Shader *shader = scene->default_background;
-    ShaderGraph *graph = new ShaderGraph();
-
-    /* create nodes */
-    if (new_viewport_parameters.use_scene_world && b_world && b_world.use_nodes() &&
-        b_world.node_tree()) {
-      BL::ShaderNodeTree b_ntree(b_world.node_tree());
-
-      add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
-
-      /* volume */
-      PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
-      shader->set_heterogeneous_volume(!get_boolean(cworld, "homogeneous_volume"));
-      shader->set_volume_sampling_method(get_volume_sampling(cworld));
-      shader->set_volume_interpolation_method(get_volume_interpolation(cworld));
-      shader->set_volume_step_rate(get_float(cworld, "volume_step_size"));
-    }
-    else if (new_viewport_parameters.use_scene_world && b_world) {
-      BackgroundNode *background = graph->create_node<BackgroundNode>();
-      background->set_color(get_float3(b_world.color()));
-      graph->add(background);
-
-      ShaderNode *out = graph->output();
-      graph->connect(background->output("Background"), out->input("Surface"));
-    }
-    else if (!new_viewport_parameters.use_scene_world) {
-      float3 world_color;
-      if (b_world) {
-        world_color = get_float3(b_world.color());
-      }
-      else {
-        world_color = zero_float3();
-      }
-
-      BackgroundNode *background = graph->create_node<BackgroundNode>();
-      graph->add(background);
-
-      LightPathNode *light_path = graph->create_node<LightPathNode>();
-      graph->add(light_path);
-
-      MixNode *mix_scene_with_background = graph->create_node<MixNode>();
-      mix_scene_with_background->set_color2(world_color);
-      graph->add(mix_scene_with_background);
-
-      EnvironmentTextureNode *texture_environment = graph->create_node<EnvironmentTextureNode>();
-      texture_environment->set_tex_mapping_type(TextureMapping::VECTOR);
-      float3 rotation_z = texture_environment->get_tex_mapping_rotation();
-      rotation_z[2] = new_viewport_parameters.studiolight_rotate_z;
-      texture_environment->set_tex_mapping_rotation(rotation_z);
-      texture_environment->set_filename(new_viewport_parameters.studiolight_path);
-      graph->add(texture_environment);
-
-      MixNode *mix_intensity = graph->create_node<MixNode>();
-      mix_intensity->set_mix_type(NODE_MIX_MUL);
-      mix_intensity->set_fac(1.0f);
-      mix_intensity->set_color2(make_float3(new_viewport_parameters.studiolight_intensity,
-                                            new_viewport_parameters.studiolight_intensity,
-                                            new_viewport_parameters.studiolight_intensity));
-      graph->add(mix_intensity);
-
-      TextureCoordinateNode *texture_coordinate = graph->create_node<TextureCoordinateNode>();
-      graph->add(texture_coordinate);
-
-      MixNode *mix_background_with_environment = graph->create_node<MixNode>();
-      mix_background_with_environment->set_fac(
-          new_viewport_parameters.studiolight_background_alpha);
-      mix_background_with_environment->set_color1(world_color);
-      graph->add(mix_background_with_environment);
-
-      ShaderNode *out = graph->output();
-
-      graph->connect(texture_coordinate->output("Generated"),
-                     texture_environment->input("Vector"));
-      graph->connect(texture_environment->output("Color"), mix_intensity->input("Color1"));
-      graph->connect(light_path->output("Is Camera Ray"), mix_scene_with_background->input("Fac"));
-      graph->connect(mix_intensity->output("Color"), mix_scene_with_background->input("Color1"));
-      graph->connect(mix_intensity->output("Color"),
-                     mix_background_with_environment->input("Color2"));
-      graph->connect(mix_background_with_environment->output("Color"),
-                     mix_scene_with_background->input("Color2"));
-      graph->connect(mix_scene_with_background->output("Color"), background->input("Color"));
-      graph->connect(background->output("Background"), out->input("Surface"));
-    }
-
-    /* Visibility */
-    if (b_world) {
-      PointerRNA cvisibility = RNA_pointer_get(&b_world.ptr, "cycles_visibility");
-      uint visibility = 0;
-
-      visibility |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
-      visibility |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
-      visibility |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
-      visibility |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
-      visibility |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
-
-      background->set_visibility(visibility);
-    }
-
-    shader->set_graph(graph);
-    shader->tag_update(scene);
-  }
-
-  /* Fast GI */
-  if (b_world) {
-    BL::WorldLighting b_light = b_world.light_settings();
-    enum { FAST_GI_METHOD_REPLACE = 0, FAST_GI_METHOD_ADD = 1, FAST_GI_METHOD_NUM };
-
-    const bool use_fast_gi = get_boolean(cscene, "use_fast_gi");
-    if (use_fast_gi) {
-      const int fast_gi_method = get_enum(
-          cscene, "fast_gi_method", FAST_GI_METHOD_NUM, FAST_GI_METHOD_REPLACE);
-      integrator->set_ao_factor((fast_gi_method == FAST_GI_METHOD_REPLACE) ? b_light.ao_factor() :
-                                                                             0.0f);
-      integrator->set_ao_additive_factor(
-          (fast_gi_method == FAST_GI_METHOD_ADD) ? b_light.ao_factor() : 0.0f);
-    }
-    else {
-      integrator->set_ao_factor(0.0f);
-      integrator->set_ao_additive_factor(0.0f);
-    }
-
-    integrator->set_ao_distance(b_light.distance());
-  }
-  else {
-    integrator->set_ao_factor(0.0f);
-    integrator->set_ao_additive_factor(0.0f);
-    integrator->set_ao_distance(10.0f);
-  }
-
-  background->set_transparent(b_scene.render().film_transparent());
-
-  if (background->get_transparent()) {
-    background->set_transparent_glass(get_boolean(cscene, "film_transparent_glass"));
-    background->set_transparent_roughness_threshold(
-        get_float(cscene, "film_transparent_roughness"));
-  }
-  else {
-    background->set_transparent_glass(false);
-    background->set_transparent_roughness_threshold(0.0f);
-  }
-
-  background->set_use_shader(view_layer.use_background_shader ||
-                             viewport_parameters.use_custom_shader());
-
-  background->tag_update(scene);
-}
-
-/* Sync Lights */
-
-void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all)
-{
-  shader_map.set_default(scene->default_light);
-
-  for (BL::ID &b_id : b_depsgraph.ids) {
-    if (!b_id.is_a(&RNA_Light)) {
-      continue;
-    }
-
-    BL::Light b_light(b_id);
-    Shader *shader;
-
-    /* test if we need to sync */
-    if (shader_map.add_or_update(&shader, b_light) || update_all) {
-      ShaderGraph *graph = new ShaderGraph();
-
-      /* create nodes */
-      if (b_light.use_nodes() && b_light.node_tree()) {
-        shader->name = b_light.name().c_str();
-
-        BL::ShaderNodeTree b_ntree(b_light.node_tree());
-
-        add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
-      }
-      else {
-        EmissionNode *emission = graph->create_node<EmissionNode>();
-        emission->set_color(one_float3());
-        emission->set_strength(1.0f);
-        graph->add(emission);
-
-        ShaderNode *out = graph->output();
-        graph->connect(emission->output("Emission"), out->input("Surface"));
-      }
-
-      shader->set_graph(graph);
-      shader->tag_update(scene);
-    }
-  }
-}
-
-void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
-{
-  /* for auto refresh images */
-  ImageManager *image_manager = scene->image_manager;
-  const int frame = b_scene.frame_current();
-  const bool auto_refresh_update = image_manager->set_animation_frame_update(frame);
-
-  shader_map.pre_sync();
-
-  sync_world(b_depsgraph, b_v3d, auto_refresh_update);
-  sync_lights(b_depsgraph, auto_refresh_update);
-  sync_materials(b_depsgraph, auto_refresh_update);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
deleted file mode 100644
index 1725ea5ec93..00000000000
--- a/intern/cycles/blender/blender_sync.cpp
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/background.h"
-#include "scene/camera.h"
-#include "scene/curves.h"
-#include "scene/film.h"
-#include "scene/integrator.h"
-#include "scene/light.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/procedural.h"
-#include "scene/scene.h"
-#include "scene/shader.h"
-#include "scene/shader_graph.h"
-#include "scene/shader_nodes.h"
-
-#include "device/device.h"
-
-#include "blender/blender_device.h"
-#include "blender/blender_session.h"
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_opengl.h"
-#include "util/util_openimagedenoise.h"
-
-CCL_NAMESPACE_BEGIN
-
-static const char *cryptomatte_prefix = "Crypto";
-
-/* Constructor */
-
-BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
-                         BL::BlendData &b_data,
-                         BL::Scene &b_scene,
-                         Scene *scene,
-                         bool preview,
-                         bool use_developer_ui,
-                         Progress &progress)
-    : b_engine(b_engine),
-      b_data(b_data),
-      b_scene(b_scene),
-      shader_map(scene),
-      object_map(scene),
-      procedural_map(scene),
-      geometry_map(scene),
-      light_map(scene),
-      particle_system_map(scene),
-      world_map(NULL),
-      world_recalc(false),
-      scene(scene),
-      preview(preview),
-      experimental(false),
-      use_developer_ui(use_developer_ui),
-      dicing_rate(1.0f),
-      max_subdivisions(12),
-      progress(progress),
-      has_updates_(true)
-{
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
-                          RNA_float_get(&cscene, "dicing_rate");
-  max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
-}
-
-BlenderSync::~BlenderSync()
-{
-}
-
-void BlenderSync::reset(BL::BlendData &b_data, BL::Scene &b_scene)
-{
-  /* Update data and scene pointers in case they change in session reset,
-   * for example after undo.
-   * Note that we do not modify the `has_updates_` flag here because the sync
-   * reset is also used during viewport navigation. */
-  this->b_data = b_data;
-  this->b_scene = b_scene;
-}
-
-/* Sync */
-
-void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
-{
-  /* Sync recalc flags from blender to cycles. Actual update is done separate,
-   * so we can do it later on if doing it immediate is not suitable. */
-
-  if (experimental) {
-    /* Mark all meshes as needing to be exported again if dicing changed. */
-    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-    bool dicing_prop_changed = false;
-
-    float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
-                                          RNA_float_get(&cscene, "dicing_rate");
-
-    if (dicing_rate != updated_dicing_rate) {
-      dicing_rate = updated_dicing_rate;
-      dicing_prop_changed = true;
-    }
-
-    int updated_max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
-
-    if (max_subdivisions != updated_max_subdivisions) {
-      max_subdivisions = updated_max_subdivisions;
-      dicing_prop_changed = true;
-    }
-
-    if (dicing_prop_changed) {
-      has_updates_ = true;
-
-      for (const pair<const GeometryKey, Geometry *> &iter : geometry_map.key_to_scene_data()) {
-        Geometry *geom = iter.second;
-        if (geom->is_mesh()) {
-          Mesh *mesh = static_cast<Mesh *>(geom);
-          if (mesh->get_subdivision_type() != Mesh::SUBDIVISION_NONE) {
-            PointerRNA id_ptr;
-            RNA_id_pointer_create((::ID *)iter.first.id, &id_ptr);
-            geometry_map.set_recalc(BL::ID(id_ptr));
-          }
-        }
-      }
-    }
-  }
-
-  /* Iterate over all IDs in this depsgraph. */
-  for (BL::DepsgraphUpdate &b_update : b_depsgraph.updates) {
-    /* TODO(sergey): Can do more selective filter here. For example, ignore changes made to
-     * screen datablock. Note that sync_data() needs to be called after object deletion, and
-     * currently this is ensured by the scene ID tagged for update, which sets the `has_updates_`
-     * flag. */
-    has_updates_ = true;
-
-    BL::ID b_id(b_update.id());
-
-    /* Material */
-    if (b_id.is_a(&RNA_Material)) {
-      BL::Material b_mat(b_id);
-      shader_map.set_recalc(b_mat);
-    }
-    /* Light */
-    else if (b_id.is_a(&RNA_Light)) {
-      BL::Light b_light(b_id);
-      shader_map.set_recalc(b_light);
-    }
-    /* Object */
-    else if (b_id.is_a(&RNA_Object)) {
-      BL::Object b_ob(b_id);
-      const bool is_geometry = object_is_geometry(b_ob);
-      const bool is_light = !is_geometry && object_is_light(b_ob);
-
-      if (b_ob.is_instancer() && b_update.is_updated_shading()) {
-        /* Needed for e.g. object color updates on instancer. */
-        object_map.set_recalc(b_ob);
-      }
-
-      if (is_geometry || is_light) {
-        const bool updated_geometry = b_update.is_updated_geometry();
-
-        /* Geometry (mesh, hair, volume). */
-        if (is_geometry) {
-          if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
-            object_map.set_recalc(b_ob);
-          }
-
-          if (updated_geometry ||
-              (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
-            BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
-            geometry_map.set_recalc(key);
-          }
-
-          if (updated_geometry) {
-            BL::Object::particle_systems_iterator b_psys;
-            for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end();
-                 ++b_psys) {
-              particle_system_map.set_recalc(b_ob);
-            }
-          }
-        }
-        /* Light */
-        else if (is_light) {
-          if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
-            object_map.set_recalc(b_ob);
-            light_map.set_recalc(b_ob);
-          }
-
-          if (updated_geometry) {
-            light_map.set_recalc(b_ob);
-          }
-        }
-      }
-    }
-    /* Mesh */
-    else if (b_id.is_a(&RNA_Mesh)) {
-      BL::Mesh b_mesh(b_id);
-      geometry_map.set_recalc(b_mesh);
-    }
-    /* World */
-    else if (b_id.is_a(&RNA_World)) {
-      BL::World b_world(b_id);
-      if (world_map == b_world.ptr.data) {
-        world_recalc = true;
-      }
-    }
-    /* Volume */
-    else if (b_id.is_a(&RNA_Volume)) {
-      BL::Volume b_volume(b_id);
-      geometry_map.set_recalc(b_volume);
-    }
-  }
-
-  if (b_v3d) {
-    BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
-
-    if (viewport_parameters.shader_modified(new_viewport_parameters)) {
-      world_recalc = true;
-      has_updates_ = true;
-    }
-
-    has_updates_ |= viewport_parameters.modified(new_viewport_parameters);
-  }
-}
-
-void BlenderSync::sync_data(BL::RenderSettings &b_render,
-                            BL::Depsgraph &b_depsgraph,
-                            BL::SpaceView3D &b_v3d,
-                            BL::Object &b_override,
-                            int width,
-                            int height,
-                            void **python_thread_state)
-{
-  if (!has_updates_) {
-    return;
-  }
-
-  scoped_timer timer;
-
-  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
-
-  /* TODO(sergey): This feels weak to pass view layer to the integrator, and even weaker to have an
-   * implicit check on whether it is a background render or not. What is the nicer thing here? */
-  const bool background = !b_v3d;
-
-  sync_view_layer(b_view_layer);
-  sync_integrator(b_view_layer, background);
-  sync_film(b_view_layer, b_v3d);
-  sync_shaders(b_depsgraph, b_v3d);
-  sync_images();
-
-  geometry_synced.clear(); /* use for objects and motion sync */
-
-  if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
-      scene->camera->get_motion_position() == Camera::MOTION_POSITION_CENTER) {
-    sync_objects(b_depsgraph, b_v3d);
-  }
-  sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state);
-
-  geometry_synced.clear();
-
-  /* Shader sync done at the end, since object sync uses it.
-   * false = don't delete unused shaders, not supported. */
-  shader_map.post_sync(false);
-
-  free_data_after_sync(b_depsgraph);
-
-  VLOG(1) << "Total time spent synchronizing data: " << timer.get_time();
-
-  has_updates_ = false;
-}
-
-/* Integrator */
-
-void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
-{
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-  experimental = (get_enum(cscene, "feature_set") != 0);
-
-  Integrator *integrator = scene->integrator;
-
-  integrator->set_min_bounce(get_int(cscene, "min_light_bounces"));
-  integrator->set_max_bounce(get_int(cscene, "max_bounces"));
-
-  integrator->set_max_diffuse_bounce(get_int(cscene, "diffuse_bounces"));
-  integrator->set_max_glossy_bounce(get_int(cscene, "glossy_bounces"));
-  integrator->set_max_transmission_bounce(get_int(cscene, "transmission_bounces"));
-  integrator->set_max_volume_bounce(get_int(cscene, "volume_bounces"));
-
-  integrator->set_transparent_min_bounce(get_int(cscene, "min_transparent_bounces"));
-  integrator->set_transparent_max_bounce(get_int(cscene, "transparent_max_bounces"));
-
-  integrator->set_volume_max_steps(get_int(cscene, "volume_max_steps"));
-  float volume_step_rate = (preview) ? get_float(cscene, "volume_preview_step_rate") :
-                                       get_float(cscene, "volume_step_rate");
-  integrator->set_volume_step_rate(volume_step_rate);
-
-  integrator->set_caustics_reflective(get_boolean(cscene, "caustics_reflective"));
-  integrator->set_caustics_refractive(get_boolean(cscene, "caustics_refractive"));
-  integrator->set_filter_glossy(get_float(cscene, "blur_glossy"));
-
-  int seed = get_int(cscene, "seed");
-  if (get_boolean(cscene, "use_animated_seed")) {
-    seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed"));
-    if (b_scene.frame_subframe() != 0.0f) {
-      /* TODO(sergey): Ideally should be some sort of hash_merge,
-       * but this is good enough for now.
-       */
-      seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX),
-                         get_int(cscene, "seed"));
-    }
-  }
-
-  integrator->set_seed(seed);
-
-  integrator->set_sample_clamp_direct(get_float(cscene, "sample_clamp_direct"));
-  integrator->set_sample_clamp_indirect(get_float(cscene, "sample_clamp_indirect"));
-  if (!preview) {
-    integrator->set_motion_blur(view_layer.use_motion_blur);
-  }
-
-  integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
-
-  SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
-      cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
-  integrator->set_sampling_pattern(sampling_pattern);
-
-  if (preview) {
-    integrator->set_use_adaptive_sampling(
-        RNA_boolean_get(&cscene, "use_preview_adaptive_sampling"));
-    integrator->set_adaptive_threshold(get_float(cscene, "preview_adaptive_threshold"));
-    integrator->set_adaptive_min_samples(get_int(cscene, "preview_adaptive_min_samples"));
-  }
-  else {
-    integrator->set_use_adaptive_sampling(RNA_boolean_get(&cscene, "use_adaptive_sampling"));
-    integrator->set_adaptive_threshold(get_float(cscene, "adaptive_threshold"));
-    integrator->set_adaptive_min_samples(get_int(cscene, "adaptive_min_samples"));
-  }
-
-  if (get_boolean(cscene, "use_fast_gi")) {
-    if (preview) {
-      integrator->set_ao_bounces(get_int(cscene, "ao_bounces"));
-    }
-    else {
-      integrator->set_ao_bounces(get_int(cscene, "ao_bounces_render"));
-    }
-  }
-  else {
-    integrator->set_ao_bounces(0);
-  }
-
-  const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
-  integrator->set_use_denoise(denoise_params.use);
-
-  /* Only update denoiser parameters if the denoiser is actually used. This allows to tweak
-   * denoiser parameters before enabling it without render resetting on every change. The downside
-   * is that the interface and the integrator are technically out of sync. */
-  if (denoise_params.use) {
-    integrator->set_denoiser_type(denoise_params.type);
-    integrator->set_denoise_start_sample(denoise_params.start_sample);
-    integrator->set_use_denoise_pass_albedo(denoise_params.use_pass_albedo);
-    integrator->set_use_denoise_pass_normal(denoise_params.use_pass_normal);
-    integrator->set_denoiser_prefilter(denoise_params.prefilter);
-  }
-
-  /* UPDATE_NONE as we don't want to tag the integrator as modified (this was done by the
-   * set calls above), but we need to make sure that the dependent things are tagged. */
-  integrator->tag_update(scene, Integrator::UPDATE_NONE);
-}
-
-/* Film */
-
-void BlenderSync::sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d)
-{
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
-
-  Film *film = scene->film;
-
-  if (b_v3d) {
-    const BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
-    film->set_display_pass(new_viewport_parameters.display_pass);
-    film->set_show_active_pixels(new_viewport_parameters.show_active_pixels);
-  }
-
-  film->set_exposure(get_float(cscene, "film_exposure"));
-  film->set_filter_type(
-      (FilterType)get_enum(cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS));
-  float filter_width = (film->get_filter_type() == FILTER_BOX) ? 1.0f :
-                                                                 get_float(cscene, "filter_width");
-  film->set_filter_width(filter_width);
-
-  if (b_scene.world()) {
-    BL::WorldMistSettings b_mist = b_scene.world().mist_settings();
-
-    film->set_mist_start(b_mist.start());
-    film->set_mist_depth(b_mist.depth());
-
-    switch (b_mist.falloff()) {
-      case BL::WorldMistSettings::falloff_QUADRATIC:
-        film->set_mist_falloff(2.0f);
-        break;
-      case BL::WorldMistSettings::falloff_LINEAR:
-        film->set_mist_falloff(1.0f);
-        break;
-      case BL::WorldMistSettings::falloff_INVERSE_QUADRATIC:
-        film->set_mist_falloff(0.5f);
-        break;
-    }
-  }
-
-  /* Blender viewport does not support proper shadow catcher compositing, so force an approximate
-   * mode to improve visual feedback. */
-  if (b_v3d) {
-    film->set_use_approximate_shadow_catcher(true);
-  }
-  else {
-    film->set_use_approximate_shadow_catcher(!get_boolean(crl, "use_pass_shadow_catcher"));
-  }
-}
-
-/* Render Layer */
-
-void BlenderSync::sync_view_layer(BL::ViewLayer &b_view_layer)
-{
-  view_layer.name = b_view_layer.name();
-
-  /* Filter. */
-  view_layer.use_background_shader = b_view_layer.use_sky();
-  /* Always enable surfaces for baking, otherwise there is nothing to bake to. */
-  view_layer.use_surfaces = b_view_layer.use_solid() || scene->bake_manager->get_baking();
-  view_layer.use_hair = b_view_layer.use_strand();
-  view_layer.use_volumes = b_view_layer.use_volumes();
-  view_layer.use_motion_blur = b_view_layer.use_motion_blur() &&
-                               b_scene.render().use_motion_blur();
-
-  /* Material override. */
-  view_layer.material_override = b_view_layer.material_override();
-
-  /* Sample override. */
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  int use_layer_samples = get_enum(cscene, "use_layer_samples");
-
-  view_layer.bound_samples = (use_layer_samples == 1);
-  view_layer.samples = 0;
-
-  if (use_layer_samples != 2) {
-    int samples = b_view_layer.samples();
-    view_layer.samples = samples;
-  }
-}
-
-/* Images */
-void BlenderSync::sync_images()
-{
-  /* Sync is a convention for this API, but currently it frees unused buffers. */
-
-  const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
-  if (is_interface_locked == false && BlenderSession::headless == false) {
-    /* If interface is not locked, it's possible image is needed for
-     * the display.
-     */
-    return;
-  }
-  /* Free buffers used by images which are not needed for render. */
-  for (BL::Image &b_image : b_data.images) {
-    /* TODO(sergey): Consider making it an utility function to check
-     * whether image is considered builtin.
-     */
-    const bool is_builtin = b_image.packed_file() ||
-                            b_image.source() == BL::Image::source_GENERATED ||
-                            b_image.source() == BL::Image::source_MOVIE || b_engine.is_preview();
-    if (is_builtin == false) {
-      b_image.buffers_free();
-    }
-    /* TODO(sergey): Free builtin images not used by any shader. */
-  }
-}
-
-/* Passes */
-
-static PassType get_blender_pass_type(BL::RenderPass &b_pass)
-{
-  string name = b_pass.name();
-#define MAP_PASS(passname, passtype) \
-  if (name == passname) { \
-    return passtype; \
-  } \
-  ((void)0)
-
-  /* NOTE: Keep in sync with defined names from DNA_scene_types.h */
-
-  MAP_PASS("Combined", PASS_COMBINED);
-  MAP_PASS("Noisy Image", PASS_COMBINED);
-
-  MAP_PASS("Depth", PASS_DEPTH);
-  MAP_PASS("Mist", PASS_MIST);
-  MAP_PASS("Position", PASS_POSITION);
-  MAP_PASS("Normal", PASS_NORMAL);
-  MAP_PASS("IndexOB", PASS_OBJECT_ID);
-  MAP_PASS("UV", PASS_UV);
-  MAP_PASS("Vector", PASS_MOTION);
-  MAP_PASS("IndexMA", PASS_MATERIAL_ID);
-
-  MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
-  MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
-  MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
-  MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT);
-
-  MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
-  MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
-  MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
-  MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT);
-
-  MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
-  MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
-  MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
-
-  MAP_PASS("Emit", PASS_EMISSION);
-  MAP_PASS("Env", PASS_BACKGROUND);
-  MAP_PASS("AO", PASS_AO);
-  MAP_PASS("Shadow", PASS_SHADOW);
-
-  MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
-  MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
-
-  MAP_PASS("Denoising Normal", PASS_DENOISING_NORMAL);
-  MAP_PASS("Denoising Albedo", PASS_DENOISING_ALBEDO);
-  MAP_PASS("Denoising Depth", PASS_DENOISING_DEPTH);
-
-  MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
-  MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
-
-  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
-  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
-
-  if (string_startswith(name, cryptomatte_prefix)) {
-    return PASS_CRYPTOMATTE;
-  }
-
-#undef MAP_PASS
-
-  return PASS_NONE;
-}
-
-static Pass *pass_add(Scene *scene,
-                      PassType type,
-                      const char *name,
-                      PassMode mode = PassMode::DENOISED)
-{
-  Pass *pass = scene->create_node<Pass>();
-
-  pass->set_type(type);
-  pass->set_name(ustring(name));
-  pass->set_mode(mode);
-
-  return pass;
-}
-
-void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
-{
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-  /* Delete all existing passes. */
-  set<Pass *> clear_passes(scene->passes.begin(), scene->passes.end());
-  scene->delete_nodes(clear_passes);
-
-  /* Always add combined pass. */
-  pass_add(scene, PASS_COMBINED, "Combined");
-
-  /* Blender built-in data and light passes. */
-  for (BL::RenderPass &b_pass : b_rlay.passes) {
-    const PassType pass_type = get_blender_pass_type(b_pass);
-
-    if (pass_type == PASS_NONE) {
-      LOG(ERROR) << "Unknown pass " << b_pass.name();
-      continue;
-    }
-
-    if (pass_type == PASS_MOTION &&
-        (b_view_layer.use_motion_blur() && b_scene.render().use_motion_blur())) {
-      continue;
-    }
-
-    pass_add(scene, pass_type, b_pass.name().c_str());
-  }
-
-  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
-
-  /* Debug passes. */
-  if (get_boolean(crl, "pass_debug_sample_count")) {
-    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
-    pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
-  }
-
-  /* Cycles specific passes. */
-  if (get_boolean(crl, "use_pass_volume_direct")) {
-    b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_VOLUME_DIRECT, "VolumeDir");
-  }
-  if (get_boolean(crl, "use_pass_volume_indirect")) {
-    b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_VOLUME_INDIRECT, "VolumeInd");
-  }
-  if (get_boolean(crl, "use_pass_shadow_catcher")) {
-    b_engine.add_pass("Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_SHADOW_CATCHER, "Shadow Catcher");
-  }
-
-  /* Cryptomatte stores two ID/weight pairs per RGBA layer.
-   * User facing parameter is the number of pairs. */
-  int crypto_depth = divide_up(min(16, b_view_layer.pass_cryptomatte_depth()), 2);
-  scene->film->set_cryptomatte_depth(crypto_depth);
-  CryptomatteType cryptomatte_passes = CRYPT_NONE;
-  if (b_view_layer.use_pass_cryptomatte_object()) {
-    for (int i = 0; i < crypto_depth; i++) {
-      string passname = cryptomatte_prefix + string_printf("Object%02d", i);
-      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
-    }
-    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_OBJECT);
-  }
-  if (b_view_layer.use_pass_cryptomatte_material()) {
-    for (int i = 0; i < crypto_depth; i++) {
-      string passname = cryptomatte_prefix + string_printf("Material%02d", i);
-      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
-    }
-    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_MATERIAL);
-  }
-  if (b_view_layer.use_pass_cryptomatte_asset()) {
-    for (int i = 0; i < crypto_depth; i++) {
-      string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
-      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
-    }
-    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_ASSET);
-  }
-  scene->film->set_cryptomatte_passes(cryptomatte_passes);
-
-  /* Denoising passes. */
-  const bool use_denoising = get_boolean(cscene, "use_denoising") &&
-                             get_boolean(crl, "use_denoising");
-  const bool store_denoising_passes = get_boolean(crl, "denoising_store_passes");
-  if (use_denoising) {
-    b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
-    pass_add(scene, PASS_COMBINED, "Noisy Image", PassMode::NOISY);
-    if (get_boolean(crl, "use_pass_shadow_catcher")) {
-      b_engine.add_pass("Noisy Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
-      pass_add(scene, PASS_SHADOW_CATCHER, "Noisy Shadow Catcher", PassMode::NOISY);
-    }
-  }
-  if (store_denoising_passes) {
-    b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
-    pass_add(scene, PASS_DENOISING_NORMAL, "Denoising Normal", PassMode::NOISY);
-
-    b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
-    pass_add(scene, PASS_DENOISING_ALBEDO, "Denoising Albedo", PassMode::NOISY);
-
-    b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
-    pass_add(scene, PASS_DENOISING_DEPTH, "Denoising Depth", PassMode::NOISY);
-  }
-
-  /* Custom AOV passes. */
-  BL::ViewLayer::aovs_iterator b_aov_iter;
-  for (b_view_layer.aovs.begin(b_aov_iter); b_aov_iter != b_view_layer.aovs.end(); ++b_aov_iter) {
-    BL::AOV b_aov(*b_aov_iter);
-    if (!b_aov.is_valid()) {
-      continue;
-    }
-
-    string name = b_aov.name();
-    bool is_color = b_aov.type() == BL::AOV::type_COLOR;
-
-    if (is_color) {
-      b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
-      pass_add(scene, PASS_AOV_COLOR, name.c_str());
-    }
-    else {
-      b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
-      pass_add(scene, PASS_AOV_VALUE, name.c_str());
-    }
-  }
-
-  scene->film->set_pass_alpha_threshold(b_view_layer.pass_alpha_threshold());
-}
-
-void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
-{
-  /* When viewport display is not needed during render we can force some
-   * caches to be releases from blender side in order to reduce peak memory
-   * footprint during synchronization process.
-   */
-
-  const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
-  const bool is_persistent_data = b_engine.render() && b_engine.render().use_persistent_data();
-  const bool can_free_caches =
-      (BlenderSession::headless || is_interface_locked) &&
-      /* Baking re-uses the depsgraph multiple times, clearing crashes
-       * reading un-evaluated mesh data which isn't aligned with the
-       * geometry we're baking, see T71012. */
-      !scene->bake_manager->get_baking() &&
-      /* Persistent data must main caches for performance and correctness. */
-      !is_persistent_data;
-
-  if (!can_free_caches) {
-    return;
-  }
-  /* TODO(sergey): We can actually remove the whole dependency graph,
-   * but that will need some API support first.
-   */
-  for (BL::Object &b_ob : b_depsgraph.objects) {
-    b_ob.cache_release();
-  }
-}
-
-/* Scene Parameters */
-
-SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
-{
-  SceneParams params;
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
-
-  if (shadingsystem == 0)
-    params.shadingsystem = SHADINGSYSTEM_SVM;
-  else if (shadingsystem == 1)
-    params.shadingsystem = SHADINGSYSTEM_OSL;
-
-  if (background || DebugFlags().viewport_static_bvh)
-    params.bvh_type = BVH_TYPE_STATIC;
-  else
-    params.bvh_type = BVH_TYPE_DYNAMIC;
-
-  params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
-  params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
-  params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
-
-  PointerRNA csscene = RNA_pointer_get(&b_scene.ptr, "cycles_curves");
-  params.hair_subdivisions = get_int(csscene, "subdivisions");
-  params.hair_shape = (CurveShapeType)get_enum(
-      csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
-
-  int texture_limit;
-  if (background) {
-    texture_limit = RNA_enum_get(&cscene, "texture_limit_render");
-  }
-  else {
-    texture_limit = RNA_enum_get(&cscene, "texture_limit");
-  }
-  if (texture_limit > 0 && b_scene.render().use_simplify()) {
-    params.texture_limit = 1 << (texture_limit + 6);
-  }
-  else {
-    params.texture_limit = 0;
-  }
-
-  params.bvh_layout = DebugFlags().cpu.bvh_layout;
-
-  params.background = background;
-
-  return params;
-}
-
-/* Session Parameters */
-
-bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
-{
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-  return (background) ? false : get_boolean(cscene, "preview_pause");
-}
-
-SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
-                                              BL::Preferences &b_preferences,
-                                              BL::Scene &b_scene,
-                                              bool background)
-{
-  SessionParams params;
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-  /* feature set */
-  params.experimental = (get_enum(cscene, "feature_set") != 0);
-
-  /* Headless and background rendering. */
-  params.headless = BlenderSession::headless;
-  params.background = background;
-
-  /* Device */
-  params.threads = blender_device_threads(b_scene);
-  params.device = blender_device_info(b_preferences, b_scene, params.background);
-
-  /* samples */
-  int samples = get_int(cscene, "samples");
-  int preview_samples = get_int(cscene, "preview_samples");
-
-  if (background) {
-    params.samples = samples;
-  }
-  else {
-    params.samples = preview_samples;
-    if (params.samples == 0)
-      params.samples = INT_MAX;
-  }
-
-  /* Clamp samples. */
-  params.samples = min(params.samples, Integrator::MAX_SAMPLES);
-
-  /* Viewport Performance */
-  params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
-
-  if (background) {
-    params.pixel_size = 1;
-  }
-
-  /* shading system - scene level needs full refresh */
-  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
-
-  if (shadingsystem == 0)
-    params.shadingsystem = SHADINGSYSTEM_SVM;
-  else if (shadingsystem == 1)
-    params.shadingsystem = SHADINGSYSTEM_OSL;
-
-  /* Time limit. */
-  if (background) {
-    params.time_limit = get_float(cscene, "time_limit");
-  }
-  else {
-    /* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
-     * usually higher than acceptable level for the final frame. */
-    /* TODO: It might be useful to support time limit in the viewport as well, but needs some
-     * extra thoughts and input. */
-    params.time_limit = 0.0;
-  }
-
-  /* Profiling. */
-  params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
-                         BlenderSession::print_render_stats;
-
-  if (background) {
-    params.use_auto_tile = RNA_boolean_get(&cscene, "use_auto_tile");
-    params.tile_size = max(get_int(cscene, "tile_size"), 8);
-  }
-  else {
-    params.use_auto_tile = false;
-  }
-
-  return params;
-}
-
-DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
-                                              BL::ViewLayer &b_view_layer,
-                                              bool background)
-{
-  enum DenoiserInput {
-    DENOISER_INPUT_RGB = 1,
-    DENOISER_INPUT_RGB_ALBEDO = 2,
-    DENOISER_INPUT_RGB_ALBEDO_NORMAL = 3,
-
-    DENOISER_INPUT_NUM,
-  };
-
-  DenoiseParams denoising;
-  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
-
-  int input_passes = -1;
-
-  if (background) {
-    /* Final Render Denoising */
-    denoising.use = get_boolean(cscene, "use_denoising");
-    denoising.type = (DenoiserType)get_enum(cscene, "denoiser", DENOISER_NUM, DENOISER_NONE);
-    denoising.prefilter = (DenoiserPrefilter)get_enum(
-        cscene, "denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_NONE);
-
-    input_passes = (DenoiserInput)get_enum(
-        cscene, "denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO_NORMAL);
-
-    if (b_view_layer) {
-      PointerRNA clayer = RNA_pointer_get(&b_view_layer.ptr, "cycles");
-      if (!get_boolean(clayer, "use_denoising")) {
-        denoising.use = false;
-      }
-    }
-  }
-  else {
-    /* Viewport Denoising */
-    denoising.use = get_boolean(cscene, "use_preview_denoising");
-    denoising.type = (DenoiserType)get_enum(
-        cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
-    denoising.prefilter = (DenoiserPrefilter)get_enum(
-        cscene, "preview_denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_FAST);
-    denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");
-
-    input_passes = (DenoiserInput)get_enum(
-        cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO);
-
-    /* Auto select fastest denoiser. */
-    if (denoising.type == DENOISER_NONE) {
-      if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {
-        denoising.type = DENOISER_OPTIX;
-      }
-      else if (openimagedenoise_supported()) {
-        denoising.type = DENOISER_OPENIMAGEDENOISE;
-      }
-      else {
-        denoising.use = false;
-      }
-    }
-  }
-
-  switch (input_passes) {
-    case DENOISER_INPUT_RGB:
-      denoising.use_pass_albedo = false;
-      denoising.use_pass_normal = false;
-      break;
-
-    case DENOISER_INPUT_RGB_ALBEDO:
-      denoising.use_pass_albedo = true;
-      denoising.use_pass_normal = false;
-      break;
-
-    case DENOISER_INPUT_RGB_ALBEDO_NORMAL:
-      denoising.use_pass_albedo = true;
-      denoising.use_pass_normal = true;
-      break;
-
-    default:
-      LOG(ERROR) << "Unhandled input passes enum " << input_passes;
-      break;
-  }
-
-  return denoising;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
deleted file mode 100644
index 99e3f0bf02b..00000000000
--- a/intern/cycles/blender/blender_sync.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_SYNC_H__
-#define __BLENDER_SYNC_H__
-
-#include "MEM_guardedalloc.h"
-#include "RNA_access.h"
-#include "RNA_blender_cpp.h"
-#include "RNA_types.h"
-
-#include "blender/blender_id_map.h"
-#include "blender/blender_util.h"
-#include "blender/blender_viewport.h"
-
-#include "scene/scene.h"
-#include "session/session.h"
-
-#include "util/util_map.h"
-#include "util/util_set.h"
-#include "util/util_transform.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Background;
-class BlenderObjectCulling;
-class BlenderViewportParameters;
-class Camera;
-class Film;
-class Hair;
-class Light;
-class Mesh;
-class Object;
-class ParticleSystem;
-class Scene;
-class ViewLayer;
-class Shader;
-class ShaderGraph;
-class ShaderNode;
-class TaskPool;
-
-class BlenderSync {
- public:
-  BlenderSync(BL::RenderEngine &b_engine,
-              BL::BlendData &b_data,
-              BL::Scene &b_scene,
-              Scene *scene,
-              bool preview,
-              bool use_developer_ui,
-              Progress &progress);
-  ~BlenderSync();
-
-  void reset(BL::BlendData &b_data, BL::Scene &b_scene);
-
-  /* sync */
-  void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
-  void sync_data(BL::RenderSettings &b_render,
-                 BL::Depsgraph &b_depsgraph,
-                 BL::SpaceView3D &b_v3d,
-                 BL::Object &b_override,
-                 int width,
-                 int height,
-                 void **python_thread_state);
-  void sync_view_layer(BL::ViewLayer &b_view_layer);
-  void sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
-  void sync_integrator(BL::ViewLayer &b_view_layer, bool background);
-  void sync_camera(BL::RenderSettings &b_render,
-                   BL::Object &b_override,
-                   int width,
-                   int height,
-                   const char *viewname);
-  void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
-  inline int get_layer_samples()
-  {
-    return view_layer.samples;
-  }
-  inline int get_layer_bound_samples()
-  {
-    return view_layer.bound_samples;
-  }
-
-  /* get parameters */
-  static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
-  static SessionParams get_session_params(BL::RenderEngine &b_engine,
-                                          BL::Preferences &b_userpref,
-                                          BL::Scene &b_scene,
-                                          bool background);
-  static bool get_session_pause(BL::Scene &b_scene, bool background);
-  static BufferParams get_buffer_params(
-      BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);
-
- private:
-  static DenoiseParams get_denoise_params(BL::Scene &b_scene,
-                                          BL::ViewLayer &b_view_layer,
-                                          bool background);
-
-  /* sync */
-  void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
-  void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
-  void sync_objects(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, float motion_time = 0.0f);
-  void sync_motion(BL::RenderSettings &b_render,
-                   BL::Depsgraph &b_depsgraph,
-                   BL::SpaceView3D &b_v3d,
-                   BL::Object &b_override,
-                   int width,
-                   int height,
-                   void **python_thread_state);
-  void sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d);
-  void sync_view();
-
-  /* Shader */
-  array<Node *> find_used_shaders(BL::Object &b_ob);
-  void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all);
-  void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
-  void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
-
-  /* Object */
-  Object *sync_object(BL::Depsgraph &b_depsgraph,
-                      BL::ViewLayer &b_view_layer,
-                      BL::DepsgraphObjectInstance &b_instance,
-                      float motion_time,
-                      bool use_particle_hair,
-                      bool show_lights,
-                      BlenderObjectCulling &culling,
-                      bool *use_portal,
-                      TaskPool *geom_task_pool);
-  void sync_object_motion_init(BL::Object &b_parent, BL::Object &b_ob, Object *object);
-
-  void sync_procedural(BL::Object &b_ob,
-                       BL::MeshSequenceCacheModifier &b_mesh_cache,
-                       bool has_subdivision);
-
-  bool sync_object_attributes(BL::DepsgraphObjectInstance &b_instance, Object *object);
-
-  /* Volume */
-  void sync_volume(BObjectInfo &b_ob_info, Volume *volume);
-
-  /* Mesh */
-  void sync_mesh(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Mesh *mesh);
-  void sync_mesh_motion(BL::Depsgraph b_depsgraph,
-                        BObjectInfo &b_ob_info,
-                        Mesh *mesh,
-                        int motion_step);
-
-  /* Hair */
-  void sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair);
-  void sync_hair_motion(BL::Depsgraph b_depsgraph,
-                        BObjectInfo &b_ob_info,
-                        Hair *hair,
-                        int motion_step);
-  void sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
-  void sync_particle_hair(
-      Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
-  bool object_has_particle_hair(BL::Object b_ob);
-
-  /* Camera */
-  void sync_camera_motion(
-      BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
-
-  /* Geometry */
-  Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
-                          BObjectInfo &b_ob_info,
-                          bool object_updated,
-                          bool use_particle_hair,
-                          TaskPool *task_pool);
-
-  void sync_geometry_motion(BL::Depsgraph &b_depsgraph,
-                            BObjectInfo &b_ob_info,
-                            Object *object,
-                            float motion_time,
-                            bool use_particle_hair,
-                            TaskPool *task_pool);
-
-  /* Light */
-  void sync_light(BL::Object &b_parent,
-                  int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
-                  BObjectInfo &b_ob_info,
-                  int random_id,
-                  Transform &tfm,
-                  bool *use_portal);
-  void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal);
-
-  /* Particles */
-  bool sync_dupli_particle(BL::Object &b_ob,
-                           BL::DepsgraphObjectInstance &b_instance,
-                           Object *object);
-
-  /* Images. */
-  void sync_images();
-
-  /* Early data free. */
-  void free_data_after_sync(BL::Depsgraph &b_depsgraph);
-
-  /* util */
-  void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
-  bool BKE_object_is_modified(BL::Object &b_ob);
-  bool object_is_geometry(BL::Object &b_ob);
-  bool object_is_light(BL::Object &b_ob);
-
-  /* variables */
-  BL::RenderEngine b_engine;
-  BL::BlendData b_data;
-  BL::Scene b_scene;
-
-  id_map<void *, Shader> shader_map;
-  id_map<ObjectKey, Object> object_map;
-  id_map<void *, Procedural> procedural_map;
-  id_map<GeometryKey, Geometry> geometry_map;
-  id_map<ObjectKey, Light> light_map;
-  id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
-  set<Geometry *> geometry_synced;
-  set<Geometry *> geometry_motion_synced;
-  set<Geometry *> geometry_motion_attribute_synced;
-  set<float> motion_times;
-  void *world_map;
-  bool world_recalc;
-  BlenderViewportParameters viewport_parameters;
-
-  Scene *scene;
-  bool preview;
-  bool experimental;
-  bool use_developer_ui;
-
-  float dicing_rate;
-  int max_subdivisions;
-
-  struct RenderLayerInfo {
-    RenderLayerInfo()
-        : material_override(PointerRNA_NULL),
-          use_background_shader(true),
-          use_surfaces(true),
-          use_hair(true),
-          use_volumes(true),
-          use_motion_blur(true),
-          samples(0),
-          bound_samples(false)
-    {
-    }
-
-    string name;
-    BL::Material material_override;
-    bool use_background_shader;
-    bool use_surfaces;
-    bool use_hair;
-    bool use_volumes;
-    bool use_motion_blur;
-    int samples;
-    bool bound_samples;
-  } view_layer;
-
-  Progress &progress;
-
- protected:
-  /* Indicates that `sync_recalc()` detected changes in the scene.
-   * If this flag is false then the data is considered to be up-to-date and will not be
-   * synchronized at all. */
-  bool has_updates_ = true;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_SYNC_H__ */
diff --git a/intern/cycles/blender/blender_texture.cpp b/intern/cycles/blender/blender_texture.cpp
deleted file mode 100644
index 0d593f2b385..00000000000
--- a/intern/cycles/blender/blender_texture.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender/blender_texture.h"
-
-CCL_NAMESPACE_BEGIN
-
-namespace {
-
-/* Point density helpers. */
-
-void density_texture_space_invert(float3 &loc, float3 &size)
-{
-  if (size.x != 0.0f)
-    size.x = 0.5f / size.x;
-  if (size.y != 0.0f)
-    size.y = 0.5f / size.y;
-  if (size.z != 0.0f)
-    size.z = 0.5f / size.z;
-
-  loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
-}
-
-} /* namespace */
-
-void point_density_texture_space(BL::Depsgraph &b_depsgraph,
-                                 BL::ShaderNodeTexPointDensity &b_point_density_node,
-                                 float3 &loc,
-                                 float3 &size)
-{
-  BL::Object b_ob(b_point_density_node.object());
-  if (!b_ob) {
-    loc = zero_float3();
-    size = zero_float3();
-    return;
-  }
-  float3 min, max;
-  b_point_density_node.calc_point_density_minmax(b_depsgraph, &min[0], &max[0]);
-  loc = (min + max) * 0.5f;
-  size = (max - min) * 0.5f;
-  density_texture_space_invert(loc, size);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_texture.h b/intern/cycles/blender/blender_texture.h
deleted file mode 100644
index 8ab061aaed9..00000000000
--- a/intern/cycles/blender/blender_texture.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_TEXTURE_H__
-#define __BLENDER_TEXTURE_H__
-
-#include "blender/blender_sync.h"
-#include <stdlib.h>
-
-CCL_NAMESPACE_BEGIN
-
-void point_density_texture_space(BL::Depsgraph &b_depsgraph,
-                                 BL::ShaderNodeTexPointDensity &b_point_density_node,
-                                 float3 &loc,
-                                 float3 &size);
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_TEXTURE_H__ */
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
deleted file mode 100644
index a3dd2349525..00000000000
--- a/intern/cycles/blender/blender_util.h
+++ /dev/null
@@ -1,720 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_UTIL_H__
-#define __BLENDER_UTIL_H__
-
-#include "scene/mesh.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_array.h"
-#include "util/util_map.h"
-#include "util/util_path.h"
-#include "util/util_set.h"
-#include "util/util_transform.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-/* Hacks to hook into Blender API
- * todo: clean this up ... */
-
-extern "C" {
-void BKE_image_user_frame_calc(void *ima, void *iuser, int cfra);
-void BKE_image_user_file_path(void *iuser, void *ima, char *path);
-unsigned char *BKE_image_get_pixels_for_frame(void *image, int frame, int tile);
-float *BKE_image_get_float_pixels_for_frame(void *image, int frame, int tile);
-}
-
-CCL_NAMESPACE_BEGIN
-
-struct BObjectInfo {
-  /* Object directly provided by the depsgraph iterator. This object is only valid during one
-   * iteration and must not be accessed afterwards. Transforms and visibility should be checked on
-   * this object. */
-  BL::Object iter_object;
-
-  /* This object remains alive even after the object iterator is done. It corresponds to one
-   * original object. It is the object that owns the object data below. */
-  BL::Object real_object;
-
-  /* The object-data referenced by the iter object. This is still valid after the depsgraph
-   * iterator is done. It might have a different type compared to real_object.data(). */
-  BL::ID object_data;
-
-  /* True when the current geometry is the data of the referenced object. False when it is a
-   * geometry instance that does not have a 1-to-1 relationship with an object. */
-  bool is_real_object_data() const
-  {
-    return const_cast<BL::Object &>(real_object).data() == object_data;
-  }
-};
-
-typedef BL::ShaderNodeAttribute::attribute_type_enum BlenderAttributeType;
-BlenderAttributeType blender_attribute_name_split_type(ustring name, string *r_real_name);
-
-void python_thread_state_save(void **python_thread_state);
-void python_thread_state_restore(void **python_thread_state);
-
-static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
-                                      BObjectInfo &b_ob_info,
-                                      BL::Depsgraph & /*depsgraph*/,
-                                      bool /*calc_undeformed*/,
-                                      Mesh::SubdivisionType subdivision_type)
-{
-  /* TODO: make this work with copy-on-write, modifiers are already evaluated. */
-#if 0
-  bool subsurf_mod_show_render = false;
-  bool subsurf_mod_show_viewport = false;
-
-  if (subdivision_type != Mesh::SUBDIVISION_NONE) {
-    BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length() - 1];
-
-    subsurf_mod_show_render = subsurf_mod.show_render();
-    subsurf_mod_show_viewport = subsurf_mod.show_viewport();
-
-    subsurf_mod.show_render(false);
-    subsurf_mod.show_viewport(false);
-  }
-#endif
-
-  BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
-                                                            BL::Mesh(PointerRNA_NULL);
-
-  if (b_ob_info.is_real_object_data()) {
-    if (mesh) {
-      /* Make a copy to split faces if we use autosmooth, otherwise not needed.
-       * Also in edit mode do we need to make a copy, to ensure data layers like
-       * UV are not empty. */
-      if (mesh.is_editmode() ||
-          (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
-        BL::Depsgraph depsgraph(PointerRNA_NULL);
-        mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
-      }
-    }
-    else {
-      BL::Depsgraph depsgraph(PointerRNA_NULL);
-      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
-    }
-  }
-  else {
-    /* TODO: what to do about non-mesh geometry instances? */
-  }
-
-#if 0
-  if (subdivision_type != Mesh::SUBDIVISION_NONE) {
-    BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length() - 1];
-
-    subsurf_mod.show_render(subsurf_mod_show_render);
-    subsurf_mod.show_viewport(subsurf_mod_show_viewport);
-  }
-#endif
-
-  if ((bool)mesh && subdivision_type == Mesh::SUBDIVISION_NONE) {
-    if (mesh.use_auto_smooth()) {
-      mesh.split_faces(false);
-    }
-
-    mesh.calc_loop_triangles();
-  }
-
-  return mesh;
-}
-
-static inline void free_object_to_mesh(BL::BlendData & /*data*/,
-                                       BObjectInfo &b_ob_info,
-                                       BL::Mesh &mesh)
-{
-  if (!b_ob_info.is_real_object_data()) {
-    return;
-  }
-  /* Free mesh if we didn't just use the existing one. */
-  BL::Object object = b_ob_info.real_object;
-  if (object.data().ptr.data != mesh.ptr.data) {
-    object.to_mesh_clear();
-  }
-}
-
-static inline void colorramp_to_array(BL::ColorRamp &ramp,
-                                      array<float3> &ramp_color,
-                                      array<float> &ramp_alpha,
-                                      int size)
-{
-  ramp_color.resize(size);
-  ramp_alpha.resize(size);
-
-  for (int i = 0; i < size; i++) {
-    float color[4];
-
-    ramp.evaluate((float)i / (float)(size - 1), color);
-    ramp_color[i] = make_float3(color[0], color[1], color[2]);
-    ramp_alpha[i] = color[3];
-  }
-}
-
-static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *min_x, float *max_x)
-{
-  *min_x = min(*min_x, curve.points[0].location()[0]);
-  *max_x = max(*max_x, curve.points[curve.points.length() - 1].location()[0]);
-}
-
-static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
-                                       int num_curves,
-                                       float *min_x,
-                                       float *max_x)
-{
-  // const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
-  *min_x = FLT_MAX;
-  *max_x = -FLT_MAX;
-  for (int i = 0; i < num_curves; ++i) {
-    BL::CurveMap map(cumap.curves[i]);
-    curvemap_minmax_curve(map, min_x, max_x);
-  }
-}
-
-static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &data, int size)
-{
-  cumap.update();
-  BL::CurveMap curve = cumap.curves[0];
-  data.resize(size);
-  for (int i = 0; i < size; i++) {
-    float t = (float)i / (float)(size - 1);
-    data[i] = cumap.evaluate(curve, t);
-  }
-}
-
-static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
-                                               array<float> &data,
-                                               int size)
-{
-  float min = 0.0f, max = 1.0f;
-
-  curvemapping_minmax(cumap, 1, &min, &max);
-
-  const float range = max - min;
-
-  cumap.update();
-
-  BL::CurveMap map = cumap.curves[0];
-
-  data.resize(size);
-
-  for (int i = 0; i < size; i++) {
-    float t = min + (float)i / (float)(size - 1) * range;
-    data[i] = cumap.evaluate(map, t);
-  }
-}
-
-static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
-                                               array<float3> &data,
-                                               int size,
-                                               bool rgb_curve)
-{
-  float min_x = 0.0f, max_x = 1.0f;
-
-  /* TODO(sergey): There is no easy way to automatically guess what is
-   * the range to be used here for the case when mapping is applied on
-   * top of another mapping (i.e. R curve applied on top of common
-   * one).
-   *
-   * Using largest possible range form all curves works correct for the
-   * cases like vector curves and should be good enough heuristic for
-   * the color curves as well.
-   *
-   * There might be some better estimations here tho.
-   */
-  const int num_curves = rgb_curve ? 4 : 3;
-  curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
-
-  const float range_x = max_x - min_x;
-
-  cumap.update();
-
-  BL::CurveMap mapR = cumap.curves[0];
-  BL::CurveMap mapG = cumap.curves[1];
-  BL::CurveMap mapB = cumap.curves[2];
-
-  data.resize(size);
-
-  if (rgb_curve) {
-    BL::CurveMap mapI = cumap.curves[3];
-    for (int i = 0; i < size; i++) {
-      const float t = min_x + (float)i / (float)(size - 1) * range_x;
-      data[i] = make_float3(cumap.evaluate(mapR, cumap.evaluate(mapI, t)),
-                            cumap.evaluate(mapG, cumap.evaluate(mapI, t)),
-                            cumap.evaluate(mapB, cumap.evaluate(mapI, t)));
-    }
-  }
-  else {
-    for (int i = 0; i < size; i++) {
-      float t = min_x + (float)i / (float)(size - 1) * range_x;
-      data[i] = make_float3(
-          cumap.evaluate(mapR, t), cumap.evaluate(mapG, t), cumap.evaluate(mapB, t));
-    }
-  }
-}
-
-static inline bool BKE_object_is_modified(BL::Object &self, BL::Scene &scene, bool preview)
-{
-  return self.is_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true : false;
-}
-
-static inline bool BKE_object_is_deform_modified(BObjectInfo &self, BL::Scene &scene, bool preview)
-{
-  if (!self.is_real_object_data()) {
-    return false;
-  }
-  return self.real_object.is_deform_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true :
-                                                                                       false;
-}
-
-static inline int render_resolution_x(BL::RenderSettings &b_render)
-{
-  return b_render.resolution_x() * b_render.resolution_percentage() / 100;
-}
-
-static inline int render_resolution_y(BL::RenderSettings &b_render)
-{
-  return b_render.resolution_y() * b_render.resolution_percentage() / 100;
-}
-
-static inline string image_user_file_path(BL::ImageUser &iuser,
-                                          BL::Image &ima,
-                                          int cfra,
-                                          bool load_tiled)
-{
-  char filepath[1024];
-  iuser.tile(0);
-  BKE_image_user_frame_calc(ima.ptr.data, iuser.ptr.data, cfra);
-  BKE_image_user_file_path(iuser.ptr.data, ima.ptr.data, filepath);
-
-  string filepath_str = string(filepath);
-  if (load_tiled && ima.source() == BL::Image::source_TILED) {
-    string udim;
-    if (ima.tiles.length() > 0) {
-      udim = to_string(ima.tiles[0].number());
-    }
-    string_replace(filepath_str, udim, "<UDIM>");
-  }
-  return filepath_str;
-}
-
-static inline int image_user_frame_number(BL::ImageUser &iuser, BL::Image &ima, int cfra)
-{
-  BKE_image_user_frame_calc(ima.ptr.data, iuser.ptr.data, cfra);
-  return iuser.frame_current();
-}
-
-static inline unsigned char *image_get_pixels_for_frame(BL::Image &image, int frame, int tile)
-{
-  return BKE_image_get_pixels_for_frame(image.ptr.data, frame, tile);
-}
-
-static inline float *image_get_float_pixels_for_frame(BL::Image &image, int frame, int tile)
-{
-  return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame, tile);
-}
-
-static inline void render_add_metadata(BL::RenderResult &b_rr, string name, string value)
-{
-  b_rr.stamp_data_add_field(name.c_str(), value.c_str());
-}
-
-/* Utilities */
-
-static inline Transform get_transform(const BL::Array<float, 16> &array)
-{
-  ProjectionTransform projection;
-
-  /* We assume both types to be just 16 floats, and transpose because blender
-   * use column major matrix order while we use row major. */
-  memcpy((void *)&projection, &array, sizeof(float) * 16);
-  projection = projection_transpose(projection);
-
-  /* Drop last row, matrix is assumed to be affine transform. */
-  return projection_to_transform(projection);
-}
-
-static inline float2 get_float2(const BL::Array<float, 2> &array)
-{
-  return make_float2(array[0], array[1]);
-}
-
-static inline float3 get_float3(const BL::Array<float, 2> &array)
-{
-  return make_float3(array[0], array[1], 0.0f);
-}
-
-static inline float3 get_float3(const BL::Array<float, 3> &array)
-{
-  return make_float3(array[0], array[1], array[2]);
-}
-
-static inline float3 get_float3(const BL::Array<float, 4> &array)
-{
-  return make_float3(array[0], array[1], array[2]);
-}
-
-static inline float4 get_float4(const BL::Array<float, 4> &array)
-{
-  return make_float4(array[0], array[1], array[2], array[3]);
-}
-
-static inline int3 get_int3(const BL::Array<int, 3> &array)
-{
-  return make_int3(array[0], array[1], array[2]);
-}
-
-static inline int4 get_int4(const BL::Array<int, 4> &array)
-{
-  return make_int4(array[0], array[1], array[2], array[3]);
-}
-
-static inline float3 get_float3(PointerRNA &ptr, const char *name)
-{
-  float3 f;
-  RNA_float_get_array(&ptr, name, &f.x);
-  return f;
-}
-
-static inline void set_float3(PointerRNA &ptr, const char *name, float3 value)
-{
-  RNA_float_set_array(&ptr, name, &value.x);
-}
-
-static inline float4 get_float4(PointerRNA &ptr, const char *name)
-{
-  float4 f;
-  RNA_float_get_array(&ptr, name, &f.x);
-  return f;
-}
-
-static inline void set_float4(PointerRNA &ptr, const char *name, float4 value)
-{
-  RNA_float_set_array(&ptr, name, &value.x);
-}
-
-static inline bool get_boolean(PointerRNA &ptr, const char *name)
-{
-  return RNA_boolean_get(&ptr, name) ? true : false;
-}
-
-static inline void set_boolean(PointerRNA &ptr, const char *name, bool value)
-{
-  RNA_boolean_set(&ptr, name, (int)value);
-}
-
-static inline float get_float(PointerRNA &ptr, const char *name)
-{
-  return RNA_float_get(&ptr, name);
-}
-
-static inline void set_float(PointerRNA &ptr, const char *name, float value)
-{
-  RNA_float_set(&ptr, name, value);
-}
-
-static inline int get_int(PointerRNA &ptr, const char *name)
-{
-  return RNA_int_get(&ptr, name);
-}
-
-static inline void set_int(PointerRNA &ptr, const char *name, int value)
-{
-  RNA_int_set(&ptr, name, value);
-}
-
-/* Get a RNA enum value with sanity check: if the RNA value is above num_values
- * the function will return a fallback default value.
- *
- * NOTE: This function assumes that RNA enum values are a continuous sequence
- * from 0 to num_values-1. Be careful to use it with enums where some values are
- * deprecated!
- */
-static inline int get_enum(PointerRNA &ptr,
-                           const char *name,
-                           int num_values = -1,
-                           int default_value = -1)
-{
-  int value = RNA_enum_get(&ptr, name);
-  if (num_values != -1 && value >= num_values) {
-    assert(default_value != -1);
-    value = default_value;
-  }
-  return value;
-}
-
-static inline string get_enum_identifier(PointerRNA &ptr, const char *name)
-{
-  PropertyRNA *prop = RNA_struct_find_property(&ptr, name);
-  const char *identifier = "";
-  int value = RNA_property_enum_get(&ptr, prop);
-
-  RNA_property_enum_identifier(NULL, &ptr, prop, value, &identifier);
-
-  return string(identifier);
-}
-
-static inline void set_enum(PointerRNA &ptr, const char *name, int value)
-{
-  RNA_enum_set(&ptr, name, value);
-}
-
-static inline void set_enum(PointerRNA &ptr, const char *name, const string &identifier)
-{
-  RNA_enum_set_identifier(NULL, &ptr, name, identifier.c_str());
-}
-
-static inline string get_string(PointerRNA &ptr, const char *name)
-{
-  char cstrbuf[1024];
-  char *cstr = RNA_string_get_alloc(&ptr, name, cstrbuf, sizeof(cstrbuf), NULL);
-  string str(cstr);
-  if (cstr != cstrbuf)
-    MEM_freeN(cstr);
-
-  return str;
-}
-
-static inline void set_string(PointerRNA &ptr, const char *name, const string &value)
-{
-  RNA_string_set(&ptr, name, value.c_str());
-}
-
-/* Relative Paths */
-
-static inline string blender_absolute_path(BL::BlendData &b_data, BL::ID &b_id, const string &path)
-{
-  if (path.size() >= 2 && path[0] == '/' && path[1] == '/') {
-    string dirname;
-
-    if (b_id.library()) {
-      BL::ID b_library_id(b_id.library());
-      dirname = blender_absolute_path(b_data, b_library_id, b_id.library().filepath());
-    }
-    else
-      dirname = b_data.filepath();
-
-    return path_join(path_dirname(dirname), path.substr(2));
-  }
-
-  return path;
-}
-
-static inline string get_text_datablock_content(const PointerRNA &ptr)
-{
-  if (ptr.data == NULL) {
-    return "";
-  }
-
-  string content;
-  BL::Text::lines_iterator iter;
-  for (iter.begin(ptr); iter; ++iter) {
-    content += iter->body() + "\n";
-  }
-
-  return content;
-}
-
-/* Texture Space */
-
-static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &size)
-{
-  loc = get_float3(b_mesh.texspace_location());
-  size = get_float3(b_mesh.texspace_size());
-
-  if (size.x != 0.0f)
-    size.x = 0.5f / size.x;
-  if (size.y != 0.0f)
-    size.y = 0.5f / size.y;
-  if (size.z != 0.0f)
-    size.z = 0.5f / size.z;
-
-  loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
-}
-
-/* Object motion steps, returns 0 if no motion blur needed. */
-static inline uint object_motion_steps(BL::Object &b_parent,
-                                       BL::Object &b_ob,
-                                       const int max_steps = INT_MAX)
-{
-  /* Get motion enabled and steps from object itself. */
-  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
-  bool use_motion = get_boolean(cobject, "use_motion_blur");
-  if (!use_motion) {
-    return 0;
-  }
-
-  int steps = max(1, get_int(cobject, "motion_steps"));
-
-  /* Also check parent object, so motion blur and steps can be
-   * controlled by dupligroup duplicator for linked groups. */
-  if (b_parent.ptr.data != b_ob.ptr.data) {
-    PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
-    use_motion &= get_boolean(parent_cobject, "use_motion_blur");
-
-    if (!use_motion) {
-      return 0;
-    }
-
-    steps = max(steps, get_int(parent_cobject, "motion_steps"));
-  }
-
-  /* Use uneven number of steps so we get one keyframe at the current frame,
-   * and use 2^(steps - 1) so objects with more/fewer steps still have samples
-   * at the same times, to avoid sampling at many different times. */
-  return min((2 << (steps - 1)) + 1, max_steps);
-}
-
-/* object uses deformation motion blur */
-static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_ob)
-{
-  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
-  bool use_deform_motion = get_boolean(cobject, "use_deform_motion");
-  /* If motion blur is enabled for the object we also check
-   * whether it's enabled for the parent object as well.
-   *
-   * This way we can control motion blur from the dupligroup
-   * duplicator much easier.
-   */
-  if (use_deform_motion && b_parent.ptr.data != b_ob.ptr.data) {
-    PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
-    use_deform_motion &= get_boolean(parent_cobject, "use_deform_motion");
-  }
-  return use_deform_motion;
-}
-
-static inline BL::FluidDomainSettings object_fluid_gas_domain_find(BL::Object &b_ob)
-{
-  for (BL::Modifier &b_mod : b_ob.modifiers) {
-    if (b_mod.is_a(&RNA_FluidModifier)) {
-      BL::FluidModifier b_mmd(b_mod);
-
-      if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN &&
-          b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_GAS) {
-        return b_mmd.domain_settings();
-      }
-    }
-  }
-
-  return BL::FluidDomainSettings(PointerRNA_NULL);
-}
-
-static inline BL::MeshSequenceCacheModifier object_mesh_cache_find(BL::Object &b_ob,
-                                                                   bool *has_subdivision_modifier)
-{
-  for (int i = b_ob.modifiers.length() - 1; i >= 0; --i) {
-    BL::Modifier b_mod = b_ob.modifiers[i];
-
-    if (b_mod.type() == BL::Modifier::type_MESH_SEQUENCE_CACHE) {
-      BL::MeshSequenceCacheModifier mesh_cache = BL::MeshSequenceCacheModifier(b_mod);
-      return mesh_cache;
-    }
-
-    /* Skip possible particles system modifiers as they do not modify the geometry. */
-    if (b_mod.type() == BL::Modifier::type_PARTICLE_SYSTEM) {
-      continue;
-    }
-
-    if (b_mod.type() == BL::Modifier::type_SUBSURF) {
-      if (has_subdivision_modifier) {
-        *has_subdivision_modifier = true;
-      }
-      continue;
-    }
-
-    break;
-  }
-
-  return BL::MeshSequenceCacheModifier(PointerRNA_NULL);
-}
-
-static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
-                                                            bool preview,
-                                                            bool experimental)
-{
-  PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
-
-  if (cobj.data && b_ob.modifiers.length() > 0 && experimental) {
-    BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
-    bool enabled = preview ? mod.show_viewport() : mod.show_render();
-
-    if (enabled && mod.type() == BL::Modifier::type_SUBSURF &&
-        RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
-      BL::SubsurfModifier subsurf(mod);
-
-      if (subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) {
-        return Mesh::SUBDIVISION_CATMULL_CLARK;
-      }
-      else {
-        return Mesh::SUBDIVISION_LINEAR;
-      }
-    }
-  }
-
-  return Mesh::SUBDIVISION_NONE;
-}
-
-static inline uint object_ray_visibility(BL::Object &b_ob)
-{
-  uint flag = 0;
-
-  flag |= b_ob.visible_camera() ? PATH_RAY_CAMERA : 0;
-  flag |= b_ob.visible_diffuse() ? PATH_RAY_DIFFUSE : 0;
-  flag |= b_ob.visible_glossy() ? PATH_RAY_GLOSSY : 0;
-  flag |= b_ob.visible_transmission() ? PATH_RAY_TRANSMIT : 0;
-  flag |= b_ob.visible_shadow() ? PATH_RAY_SHADOW : 0;
-  flag |= b_ob.visible_volume_scatter() ? PATH_RAY_VOLUME_SCATTER : 0;
-
-  return flag;
-}
-
-class EdgeMap {
- public:
-  EdgeMap()
-  {
-  }
-
-  void clear()
-  {
-    edges_.clear();
-  }
-
-  void insert(int v0, int v1)
-  {
-    get_sorted_verts(v0, v1);
-    edges_.insert(std::pair<int, int>(v0, v1));
-  }
-
-  bool exists(int v0, int v1)
-  {
-    get_sorted_verts(v0, v1);
-    return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
-  }
-
- protected:
-  void get_sorted_verts(int &v0, int &v1)
-  {
-    if (v0 > v1) {
-      swap(v0, v1);
-    }
-  }
-
-  set<std::pair<int, int>> edges_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BLENDER_UTIL_H__ */
diff --git a/intern/cycles/blender/blender_viewport.cpp b/intern/cycles/blender/blender_viewport.cpp
deleted file mode 100644
index b8deb77b621..00000000000
--- a/intern/cycles/blender/blender_viewport.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "blender_viewport.h"
-
-#include "blender_util.h"
-#include "scene/pass.h"
-#include "util/util_logging.h"
-
-CCL_NAMESPACE_BEGIN
-
-BlenderViewportParameters::BlenderViewportParameters()
-    : use_scene_world(true),
-      use_scene_lights(true),
-      studiolight_rotate_z(0.0f),
-      studiolight_intensity(1.0f),
-      studiolight_background_alpha(1.0f),
-      display_pass(PASS_COMBINED),
-      show_active_pixels(false)
-{
-}
-
-BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui)
-    : BlenderViewportParameters()
-{
-  if (!b_v3d) {
-    return;
-  }
-
-  BL::View3DShading shading = b_v3d.shading();
-  PointerRNA cshading = RNA_pointer_get(&shading.ptr, "cycles");
-
-  /* We only copy the shading parameters if we are in look-dev mode.
-   * Otherwise defaults are being used. These defaults mimic normal render settings. */
-  if (shading.type() == BL::View3DShading::type_RENDERED) {
-    use_scene_world = shading.use_scene_world_render();
-    use_scene_lights = shading.use_scene_lights_render();
-
-    if (!use_scene_world) {
-      studiolight_rotate_z = shading.studiolight_rotate_z();
-      studiolight_intensity = shading.studiolight_intensity();
-      studiolight_background_alpha = shading.studiolight_background_alpha();
-      studiolight_path = shading.selected_studio_light().path();
-    }
-  }
-
-  /* Film. */
-
-  /* Lookup display pass based on the enum identifier.
-   * This is because integer values of python enum are not aligned with the passes definition in
-   * the kernel. */
-
-  display_pass = PASS_COMBINED;
-
-  const string display_pass_identifier = get_enum_identifier(cshading, "render_pass");
-  if (!display_pass_identifier.empty()) {
-    const ustring pass_type_identifier(string_to_lower(display_pass_identifier));
-    const NodeEnum *pass_type_enum = Pass::get_type_enum();
-    if (pass_type_enum->exists(pass_type_identifier)) {
-      display_pass = static_cast<PassType>((*pass_type_enum)[pass_type_identifier]);
-    }
-  }
-
-  if (use_developer_ui) {
-    show_active_pixels = get_boolean(cshading, "show_active_pixels");
-  }
-}
-
-bool BlenderViewportParameters::shader_modified(const BlenderViewportParameters &other) const
-{
-  return use_scene_world != other.use_scene_world || use_scene_lights != other.use_scene_lights ||
-         studiolight_rotate_z != other.studiolight_rotate_z ||
-         studiolight_intensity != other.studiolight_intensity ||
-         studiolight_background_alpha != other.studiolight_background_alpha ||
-         studiolight_path != other.studiolight_path;
-}
-
-bool BlenderViewportParameters::film_modified(const BlenderViewportParameters &other) const
-{
-  return display_pass != other.display_pass || show_active_pixels != other.show_active_pixels;
-}
-
-bool BlenderViewportParameters::modified(const BlenderViewportParameters &other) const
-{
-  return shader_modified(other) || film_modified(other);
-}
-
-bool BlenderViewportParameters::use_custom_shader() const
-{
-  return !(use_scene_world && use_scene_lights);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_viewport.h b/intern/cycles/blender/blender_viewport.h
deleted file mode 100644
index a445973f4d2..00000000000
--- a/intern/cycles/blender/blender_viewport.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BLENDER_VIEWPORT_H__
-#define __BLENDER_VIEWPORT_H__
-
-#include "MEM_guardedalloc.h"
-
-#include "RNA_access.h"
-#include "RNA_blender_cpp.h"
-#include "RNA_types.h"
-
-#include "scene/film.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BlenderViewportParameters {
- public:
-  /* Shader. */
-  bool use_scene_world;
-  bool use_scene_lights;
-  float studiolight_rotate_z;
-  float studiolight_intensity;
-  float studiolight_background_alpha;
-  ustring studiolight_path;
-
-  /* Film. */
-  PassType display_pass;
-  bool show_active_pixels;
-
-  BlenderViewportParameters();
-  BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui);
-
-  /* Check whether any of shading related settings are different from the given parameters. */
-  bool shader_modified(const BlenderViewportParameters &other) const;
-
-  /* Check whether any of film related settings are different from the given parameters. */
-  bool film_modified(const BlenderViewportParameters &other) const;
-
-  /* Check whether any of settings are different from the given parameters. */
-  bool modified(const BlenderViewportParameters &other) const;
-
-  /* Returns truth when a custom shader defined by the viewport is to be used instead of the
-   * regular background shader or scene light. */
-  bool use_custom_shader() const;
-};
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/blender/blender_volume.cpp b/intern/cycles/blender/blender_volume.cpp
deleted file mode 100644
index 46083cb29dd..00000000000
--- a/intern/cycles/blender/blender_volume.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/colorspace.h"
-#include "scene/image.h"
-#include "scene/image_vdb.h"
-#include "scene/object.h"
-#include "scene/volume.h"
-
-#include "blender/blender_sync.h"
-#include "blender/blender_util.h"
-
-#ifdef WITH_OPENVDB
-#  include <openvdb/openvdb.h>
-openvdb::GridBase::ConstPtr BKE_volume_grid_openvdb_for_read(const struct Volume *volume,
-                                                             const struct VolumeGrid *grid);
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* TODO: verify this is not loading unnecessary attributes. */
-class BlenderSmokeLoader : public ImageLoader {
- public:
-  BlenderSmokeLoader(BL::Object &b_ob, AttributeStandard attribute)
-      : b_domain(object_fluid_gas_domain_find(b_ob)), attribute(attribute)
-  {
-    BL::Mesh b_mesh(b_ob.data());
-    mesh_texture_space(b_mesh, texspace_loc, texspace_size);
-  }
-
-  bool load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata) override
-  {
-    if (!b_domain) {
-      return false;
-    }
-
-    if (attribute == ATTR_STD_VOLUME_DENSITY || attribute == ATTR_STD_VOLUME_FLAME ||
-        attribute == ATTR_STD_VOLUME_HEAT || attribute == ATTR_STD_VOLUME_TEMPERATURE) {
-      metadata.type = IMAGE_DATA_TYPE_FLOAT;
-      metadata.channels = 1;
-    }
-    else if (attribute == ATTR_STD_VOLUME_COLOR) {
-      metadata.type = IMAGE_DATA_TYPE_FLOAT4;
-      metadata.channels = 4;
-    }
-    else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
-      metadata.type = IMAGE_DATA_TYPE_FLOAT4;
-      metadata.channels = 3;
-    }
-    else {
-      return false;
-    }
-
-    int3 resolution = get_int3(b_domain.domain_resolution());
-    int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
-
-    /* Velocity and heat data is always low-resolution. */
-    if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
-      amplify = 1;
-    }
-
-    metadata.width = resolution.x * amplify;
-    metadata.height = resolution.y * amplify;
-    metadata.depth = resolution.z * amplify;
-
-    /* Create a matrix to transform from object space to mesh texture space.
-     * This does not work with deformations but that can probably only be done
-     * well with a volume grid mapping of coordinates. */
-    metadata.transform_3d = transform_translate(-texspace_loc) * transform_scale(texspace_size);
-    metadata.use_transform_3d = true;
-
-    return true;
-  }
-
-  bool load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool) override
-  {
-    if (!b_domain) {
-      return false;
-    }
-#ifdef WITH_FLUID
-    int3 resolution = get_int3(b_domain.domain_resolution());
-    int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
-
-    /* Velocity and heat data is always low-resolution. */
-    if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
-      amplify = 1;
-    }
-
-    const int width = resolution.x * amplify;
-    const int height = resolution.y * amplify;
-    const int depth = resolution.z * amplify;
-    const size_t num_pixels = ((size_t)width) * height * depth;
-
-    float *fpixels = (float *)pixels;
-
-    if (attribute == ATTR_STD_VOLUME_DENSITY) {
-      FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
-      if (length == num_pixels) {
-        FluidDomainSettings_density_grid_get(&b_domain.ptr, fpixels);
-        return true;
-      }
-    }
-    else if (attribute == ATTR_STD_VOLUME_FLAME) {
-      /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
-       * as 1500..3000 K with the first part faded to zero density */
-      FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
-      if (length == num_pixels) {
-        FluidDomainSettings_flame_grid_get(&b_domain.ptr, fpixels);
-        return true;
-      }
-    }
-    else if (attribute == ATTR_STD_VOLUME_COLOR) {
-      /* the RGB is "premultiplied" by density for better interpolation results */
-      FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
-      if (length == num_pixels * 4) {
-        FluidDomainSettings_color_grid_get(&b_domain.ptr, fpixels);
-        return true;
-      }
-    }
-    else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
-      FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
-      if (length == num_pixels * 3) {
-        FluidDomainSettings_velocity_grid_get(&b_domain.ptr, fpixels);
-        return true;
-      }
-    }
-    else if (attribute == ATTR_STD_VOLUME_HEAT) {
-      FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
-      if (length == num_pixels) {
-        FluidDomainSettings_heat_grid_get(&b_domain.ptr, fpixels);
-        return true;
-      }
-    }
-    else if (attribute == ATTR_STD_VOLUME_TEMPERATURE) {
-      FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
-      if (length == num_pixels) {
-        FluidDomainSettings_temperature_grid_get(&b_domain.ptr, fpixels);
-        return true;
-      }
-    }
-    else {
-      fprintf(stderr,
-              "Cycles error: unknown volume attribute %s, skipping\n",
-              Attribute::standard_name(attribute));
-      fpixels[0] = 0.0f;
-      return false;
-    }
-#else
-    (void)pixels;
-#endif
-    fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
-    return false;
-  }
-
-  string name() const override
-  {
-    return Attribute::standard_name(attribute);
-  }
-
-  bool equals(const ImageLoader &other) const override
-  {
-    const BlenderSmokeLoader &other_loader = (const BlenderSmokeLoader &)other;
-    return b_domain == other_loader.b_domain && attribute == other_loader.attribute;
-  }
-
-  BL::FluidDomainSettings b_domain;
-  float3 texspace_loc, texspace_size;
-  AttributeStandard attribute;
-};
-
-static void sync_smoke_volume(Scene *scene, BObjectInfo &b_ob_info, Volume *volume, float frame)
-{
-  if (!b_ob_info.is_real_object_data()) {
-    return;
-  }
-  BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob_info.real_object);
-  if (!b_domain) {
-    return;
-  }
-
-  AttributeStandard attributes[] = {ATTR_STD_VOLUME_DENSITY,
-                                    ATTR_STD_VOLUME_COLOR,
-                                    ATTR_STD_VOLUME_FLAME,
-                                    ATTR_STD_VOLUME_HEAT,
-                                    ATTR_STD_VOLUME_TEMPERATURE,
-                                    ATTR_STD_VOLUME_VELOCITY,
-                                    ATTR_STD_NONE};
-
-  for (int i = 0; attributes[i] != ATTR_STD_NONE; i++) {
-    AttributeStandard std = attributes[i];
-    if (!volume->need_attribute(scene, std)) {
-      continue;
-    }
-
-    volume->set_clipping(b_domain.clipping());
-
-    Attribute *attr = volume->attributes.add(std);
-
-    ImageLoader *loader = new BlenderSmokeLoader(b_ob_info.real_object, std);
-    ImageParams params;
-    params.frame = frame;
-
-    attr->data_voxel() = scene->image_manager->add_image(loader, params);
-  }
-}
-
-class BlenderVolumeLoader : public VDBImageLoader {
- public:
-  BlenderVolumeLoader(BL::BlendData &b_data, BL::Volume &b_volume, const string &grid_name)
-      : VDBImageLoader(grid_name), b_volume(b_volume)
-  {
-    b_volume.grids.load(b_data.ptr.data);
-
-#ifdef WITH_OPENVDB
-    for (BL::VolumeGrid &b_volume_grid : b_volume.grids) {
-      if (b_volume_grid.name() == grid_name) {
-        const bool unload = !b_volume_grid.is_loaded();
-
-        ::Volume *volume = (::Volume *)b_volume.ptr.data;
-        const VolumeGrid *volume_grid = (VolumeGrid *)b_volume_grid.ptr.data;
-        grid = BKE_volume_grid_openvdb_for_read(volume, volume_grid);
-
-        if (unload) {
-          b_volume_grid.unload();
-        }
-
-        break;
-      }
-    }
-#endif
-  }
-
-  BL::Volume b_volume;
-};
-
-static void sync_volume_object(BL::BlendData &b_data,
-                               BObjectInfo &b_ob_info,
-                               Scene *scene,
-                               Volume *volume)
-{
-  BL::Volume b_volume(b_ob_info.object_data);
-  b_volume.grids.load(b_data.ptr.data);
-
-  BL::VolumeRender b_render(b_volume.render());
-
-  volume->set_clipping(b_render.clipping());
-  volume->set_step_size(b_render.step_size());
-  volume->set_object_space((b_render.space() == BL::VolumeRender::space_OBJECT));
-
-  /* Find grid with matching name. */
-  for (BL::VolumeGrid &b_grid : b_volume.grids) {
-    ustring name = ustring(b_grid.name());
-    AttributeStandard std = ATTR_STD_NONE;
-
-    if (name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
-      std = ATTR_STD_VOLUME_DENSITY;
-    }
-    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
-      std = ATTR_STD_VOLUME_COLOR;
-    }
-    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
-      std = ATTR_STD_VOLUME_FLAME;
-    }
-    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
-      std = ATTR_STD_VOLUME_HEAT;
-    }
-    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
-      std = ATTR_STD_VOLUME_TEMPERATURE;
-    }
-    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
-      std = ATTR_STD_VOLUME_VELOCITY;
-    }
-
-    if ((std != ATTR_STD_NONE && volume->need_attribute(scene, std)) ||
-        volume->need_attribute(scene, name)) {
-      Attribute *attr = (std != ATTR_STD_NONE) ?
-                            volume->attributes.add(std) :
-                            volume->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
-
-      ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, name.string());
-      ImageParams params;
-      params.frame = b_volume.grids.frame();
-
-      attr->data_voxel() = scene->image_manager->add_image(loader, params, false);
-    }
-  }
-}
-
-void BlenderSync::sync_volume(BObjectInfo &b_ob_info, Volume *volume)
-{
-  volume->clear(true);
-
-  if (view_layer.use_volumes) {
-    if (b_ob_info.object_data.is_a(&RNA_Volume)) {
-      /* Volume object. Create only attributes, bounding mesh will then
-       * be automatically generated later. */
-      sync_volume_object(b_data, b_ob_info, scene, volume);
-    }
-    else {
-      /* Smoke domain. */
-      sync_smoke_volume(scene, b_ob_info, volume, b_scene.frame_current());
-    }
-  }
-
-  /* Tag update. */
-  volume->tag_update(scene, true);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/camera.cpp b/intern/cycles/blender/camera.cpp
new file mode 100644
index 00000000000..f87ebe39d21
--- /dev/null
+++ b/intern/cycles/blender/camera.cpp
@@ -0,0 +1,965 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/camera.h"
+#include "scene/scene.h"
+
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "util/log.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Blender Camera Intermediate: we first convert both the offline and 3d view
+ * render camera to this, and from there convert to our native camera format. */
+
+struct BlenderCamera {
+  float nearclip;
+  float farclip;
+
+  CameraType type;
+  float ortho_scale;
+
+  float lens;
+  float shuttertime;
+  Camera::MotionPosition motion_position;
+  array<float> shutter_curve;
+
+  Camera::RollingShutterType rolling_shutter_type;
+  float rolling_shutter_duration;
+
+  float aperturesize;
+  uint apertureblades;
+  float aperturerotation;
+  float focaldistance;
+
+  float2 shift;
+  float2 offset;
+  float zoom;
+
+  float2 pixelaspect;
+
+  float aperture_ratio;
+
+  PanoramaType panorama_type;
+  float fisheye_fov;
+  float fisheye_lens;
+  float latitude_min;
+  float latitude_max;
+  float longitude_min;
+  float longitude_max;
+  bool use_spherical_stereo;
+  float interocular_distance;
+  float convergence_distance;
+  bool use_pole_merge;
+  float pole_merge_angle_from;
+  float pole_merge_angle_to;
+
+  enum { AUTO, HORIZONTAL, VERTICAL } sensor_fit;
+  float sensor_width;
+  float sensor_height;
+
+  int full_width;
+  int full_height;
+
+  int render_width;
+  int render_height;
+
+  BoundBox2D border;
+  BoundBox2D viewport_camera_border;
+  BoundBox2D pano_viewplane;
+  float pano_aspectratio;
+
+  float passepartout_alpha;
+
+  Transform matrix;
+
+  float offscreen_dicing_scale;
+
+  int motion_steps;
+};
+
+static void blender_camera_init(BlenderCamera *bcam, BL::RenderSettings &b_render)
+{
+  memset((void *)bcam, 0, sizeof(BlenderCamera));
+
+  bcam->nearclip = 1e-5f;
+  bcam->farclip = 1e5f;
+
+  bcam->type = CAMERA_PERSPECTIVE;
+  bcam->ortho_scale = 1.0f;
+
+  bcam->lens = 50.0f;
+  bcam->shuttertime = 1.0f;
+
+  bcam->rolling_shutter_type = Camera::ROLLING_SHUTTER_NONE;
+  bcam->rolling_shutter_duration = 0.1f;
+
+  bcam->aperturesize = 0.0f;
+  bcam->apertureblades = 0;
+  bcam->aperturerotation = 0.0f;
+  bcam->focaldistance = 10.0f;
+
+  bcam->zoom = 1.0f;
+  bcam->pixelaspect = one_float2();
+  bcam->aperture_ratio = 1.0f;
+
+  bcam->sensor_width = 36.0f;
+  bcam->sensor_height = 24.0f;
+  bcam->sensor_fit = BlenderCamera::AUTO;
+  bcam->motion_position = Camera::MOTION_POSITION_CENTER;
+  bcam->border.right = 1.0f;
+  bcam->border.top = 1.0f;
+  bcam->viewport_camera_border.right = 1.0f;
+  bcam->viewport_camera_border.top = 1.0f;
+  bcam->pano_viewplane.right = 1.0f;
+  bcam->pano_viewplane.top = 1.0f;
+  bcam->pano_aspectratio = 0.0f;
+  bcam->passepartout_alpha = 0.5f;
+  bcam->offscreen_dicing_scale = 1.0f;
+  bcam->matrix = transform_identity();
+
+  /* render resolution */
+  bcam->render_width = render_resolution_x(b_render);
+  bcam->render_height = render_resolution_y(b_render);
+  bcam->full_width = bcam->render_width;
+  bcam->full_height = bcam->render_height;
+}
+
+static float blender_camera_focal_distance(BL::RenderEngine &b_engine,
+                                           BL::Object &b_ob,
+                                           BL::Camera &b_camera,
+                                           BlenderCamera *bcam)
+{
+  BL::Object b_dof_object = b_camera.dof().focus_object();
+
+  if (!b_dof_object)
+    return b_camera.dof().focus_distance();
+
+  /* for dof object, return distance along camera Z direction */
+  BL::Array<float, 16> b_ob_matrix;
+  b_engine.camera_model_matrix(b_ob, bcam->use_spherical_stereo, b_ob_matrix);
+  Transform obmat = transform_clear_scale(get_transform(b_ob_matrix));
+  Transform dofmat = get_transform(b_dof_object.matrix_world());
+  float3 view_dir = normalize(transform_get_column(&obmat, 2));
+  float3 dof_dir = transform_get_column(&obmat, 3) - transform_get_column(&dofmat, 3);
+  return fabsf(dot(view_dir, dof_dir));
+}
+
+static void blender_camera_from_object(BlenderCamera *bcam,
+                                       BL::RenderEngine &b_engine,
+                                       BL::Object &b_ob,
+                                       bool skip_panorama = false)
+{
+  BL::ID b_ob_data = b_ob.data();
+
+  if (b_ob_data.is_a(&RNA_Camera)) {
+    BL::Camera b_camera(b_ob_data);
+    PointerRNA ccamera = RNA_pointer_get(&b_camera.ptr, "cycles");
+
+    bcam->nearclip = b_camera.clip_start();
+    bcam->farclip = b_camera.clip_end();
+
+    switch (b_camera.type()) {
+      case BL::Camera::type_ORTHO:
+        bcam->type = CAMERA_ORTHOGRAPHIC;
+        break;
+      case BL::Camera::type_PANO:
+        if (!skip_panorama)
+          bcam->type = CAMERA_PANORAMA;
+        else
+          bcam->type = CAMERA_PERSPECTIVE;
+        break;
+      case BL::Camera::type_PERSP:
+      default:
+        bcam->type = CAMERA_PERSPECTIVE;
+        break;
+    }
+
+    bcam->panorama_type = (PanoramaType)get_enum(
+        ccamera, "panorama_type", PANORAMA_NUM_TYPES, PANORAMA_EQUIRECTANGULAR);
+
+    bcam->fisheye_fov = RNA_float_get(&ccamera, "fisheye_fov");
+    bcam->fisheye_lens = RNA_float_get(&ccamera, "fisheye_lens");
+    bcam->latitude_min = RNA_float_get(&ccamera, "latitude_min");
+    bcam->latitude_max = RNA_float_get(&ccamera, "latitude_max");
+    bcam->longitude_min = RNA_float_get(&ccamera, "longitude_min");
+    bcam->longitude_max = RNA_float_get(&ccamera, "longitude_max");
+
+    bcam->interocular_distance = b_camera.stereo().interocular_distance();
+    if (b_camera.stereo().convergence_mode() == BL::CameraStereoData::convergence_mode_PARALLEL) {
+      bcam->convergence_distance = FLT_MAX;
+    }
+    else {
+      bcam->convergence_distance = b_camera.stereo().convergence_distance();
+    }
+    bcam->use_spherical_stereo = b_engine.use_spherical_stereo(b_ob);
+
+    bcam->use_pole_merge = b_camera.stereo().use_pole_merge();
+    bcam->pole_merge_angle_from = b_camera.stereo().pole_merge_angle_from();
+    bcam->pole_merge_angle_to = b_camera.stereo().pole_merge_angle_to();
+
+    bcam->ortho_scale = b_camera.ortho_scale();
+
+    bcam->lens = b_camera.lens();
+
+    bcam->passepartout_alpha = b_camera.show_passepartout() ? b_camera.passepartout_alpha() : 0.0f;
+
+    if (b_camera.dof().use_dof()) {
+      /* allow f/stop number to change aperture_size but still
+       * give manual control over aperture radius */
+      float fstop = b_camera.dof().aperture_fstop();
+      fstop = max(fstop, 1e-5f);
+
+      if (bcam->type == CAMERA_ORTHOGRAPHIC)
+        bcam->aperturesize = 1.0f / (2.0f * fstop);
+      else
+        bcam->aperturesize = (bcam->lens * 1e-3f) / (2.0f * fstop);
+
+      bcam->apertureblades = b_camera.dof().aperture_blades();
+      bcam->aperturerotation = b_camera.dof().aperture_rotation();
+      bcam->focaldistance = blender_camera_focal_distance(b_engine, b_ob, b_camera, bcam);
+      bcam->aperture_ratio = b_camera.dof().aperture_ratio();
+    }
+    else {
+      /* DOF is turned of for the camera. */
+      bcam->aperturesize = 0.0f;
+      bcam->apertureblades = 0;
+      bcam->aperturerotation = 0.0f;
+      bcam->focaldistance = 0.0f;
+      bcam->aperture_ratio = 1.0f;
+    }
+
+    bcam->shift.x = b_engine.camera_shift_x(b_ob, bcam->use_spherical_stereo);
+    bcam->shift.y = b_camera.shift_y();
+
+    bcam->sensor_width = b_camera.sensor_width();
+    bcam->sensor_height = b_camera.sensor_height();
+
+    if (b_camera.sensor_fit() == BL::Camera::sensor_fit_AUTO)
+      bcam->sensor_fit = BlenderCamera::AUTO;
+    else if (b_camera.sensor_fit() == BL::Camera::sensor_fit_HORIZONTAL)
+      bcam->sensor_fit = BlenderCamera::HORIZONTAL;
+    else
+      bcam->sensor_fit = BlenderCamera::VERTICAL;
+  }
+  else if (b_ob_data.is_a(&RNA_Light)) {
+    /* Can also look through spot light. */
+    BL::SpotLight b_light(b_ob_data);
+    float lens = 16.0f / tanf(b_light.spot_size() * 0.5f);
+    if (lens > 0.0f) {
+      bcam->lens = lens;
+    }
+  }
+
+  bcam->motion_steps = object_motion_steps(b_ob, b_ob);
+}
+
+static Transform blender_camera_matrix(const Transform &tfm,
+                                       const CameraType type,
+                                       const PanoramaType panorama_type)
+{
+  Transform result;
+
+  if (type == CAMERA_PANORAMA) {
+    if (panorama_type == PANORAMA_MIRRORBALL) {
+      /* Mirror ball camera is looking into the negative Y direction
+       * which matches texture mirror ball mapping.
+       */
+      result = tfm * make_transform(
+                         1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f);
+    }
+    else {
+      /* Make it so environment camera needs to be pointed in the direction
+       * of the positive x-axis to match an environment texture, this way
+       * it is looking at the center of the texture
+       */
+      result = tfm * make_transform(
+                         0.0f, -1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 0.0f, 0.0f);
+    }
+  }
+  else {
+    /* note the blender camera points along the negative z-axis */
+    result = tfm * transform_scale(1.0f, 1.0f, -1.0f);
+  }
+
+  return transform_clear_scale(result);
+}
+
+static void blender_camera_viewplane(BlenderCamera *bcam,
+                                     int width,
+                                     int height,
+                                     BoundBox2D *viewplane,
+                                     float *aspectratio,
+                                     float *sensor_size)
+{
+  /* dimensions */
+  float xratio = (float)width * bcam->pixelaspect.x;
+  float yratio = (float)height * bcam->pixelaspect.y;
+
+  /* compute x/y aspect and ratio */
+  float xaspect, yaspect;
+  bool horizontal_fit;
+
+  /* sensor fitting */
+  if (bcam->sensor_fit == BlenderCamera::AUTO) {
+    horizontal_fit = (xratio > yratio);
+    if (sensor_size != NULL) {
+      *sensor_size = bcam->sensor_width;
+    }
+  }
+  else if (bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
+    horizontal_fit = true;
+    if (sensor_size != NULL) {
+      *sensor_size = bcam->sensor_width;
+    }
+  }
+  else {
+    horizontal_fit = false;
+    if (sensor_size != NULL) {
+      *sensor_size = bcam->sensor_height;
+    }
+  }
+
+  if (horizontal_fit) {
+    if (aspectratio != NULL) {
+      *aspectratio = xratio / yratio;
+    }
+    xaspect = *aspectratio;
+    yaspect = 1.0f;
+  }
+  else {
+    if (aspectratio != NULL) {
+      *aspectratio = yratio / xratio;
+    }
+    xaspect = 1.0f;
+    yaspect = *aspectratio;
+  }
+
+  /* modify aspect for orthographic scale */
+  if (bcam->type == CAMERA_ORTHOGRAPHIC) {
+    xaspect = xaspect * bcam->ortho_scale / (*aspectratio * 2.0f);
+    yaspect = yaspect * bcam->ortho_scale / (*aspectratio * 2.0f);
+    if (aspectratio != NULL) {
+      *aspectratio = bcam->ortho_scale / 2.0f;
+    }
+  }
+
+  if (bcam->type == CAMERA_PANORAMA) {
+    /* Set viewplane for panoramic camera. */
+    if (viewplane != NULL) {
+      *viewplane = bcam->pano_viewplane;
+
+      /* Modify viewplane for camera shift. */
+      const float shift_factor = (bcam->pano_aspectratio == 0.0f) ?
+                                     1.0f :
+                                     *aspectratio / bcam->pano_aspectratio;
+      const float dx = bcam->shift.x * shift_factor;
+      const float dy = bcam->shift.y * shift_factor;
+
+      viewplane->left += dx;
+      viewplane->right += dx;
+      viewplane->bottom += dy;
+      viewplane->top += dy;
+    }
+  }
+  else {
+    /* set viewplane */
+    if (viewplane != NULL) {
+      viewplane->left = -xaspect;
+      viewplane->right = xaspect;
+      viewplane->bottom = -yaspect;
+      viewplane->top = yaspect;
+
+      /* zoom for 3d camera view */
+      *viewplane = (*viewplane) * bcam->zoom;
+
+      /* modify viewplane with camera shift and 3d camera view offset */
+      const float dx = 2.0f * (*aspectratio * bcam->shift.x + bcam->offset.x * xaspect * 2.0f);
+      const float dy = 2.0f * (*aspectratio * bcam->shift.y + bcam->offset.y * yaspect * 2.0f);
+
+      viewplane->left += dx;
+      viewplane->right += dx;
+      viewplane->bottom += dy;
+      viewplane->top += dy;
+    }
+  }
+}
+
+static void blender_camera_sync(Camera *cam,
+                                BlenderCamera *bcam,
+                                int width,
+                                int height,
+                                const char *viewname,
+                                PointerRNA *cscene)
+{
+  float aspectratio, sensor_size;
+
+  /* viewplane */
+  BoundBox2D viewplane;
+  blender_camera_viewplane(bcam, width, height, &viewplane, &aspectratio, &sensor_size);
+
+  cam->set_viewplane_left(viewplane.left);
+  cam->set_viewplane_right(viewplane.right);
+  cam->set_viewplane_top(viewplane.top);
+  cam->set_viewplane_bottom(viewplane.bottom);
+
+  cam->set_full_width(width);
+  cam->set_full_height(height);
+
+  /* panorama sensor */
+  if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
+    float fit_xratio = (float)bcam->render_width * bcam->pixelaspect.x;
+    float fit_yratio = (float)bcam->render_height * bcam->pixelaspect.y;
+    bool horizontal_fit;
+    float sensor_size;
+
+    if (bcam->sensor_fit == BlenderCamera::AUTO) {
+      horizontal_fit = (fit_xratio > fit_yratio);
+      sensor_size = bcam->sensor_width;
+    }
+    else if (bcam->sensor_fit == BlenderCamera::HORIZONTAL) {
+      horizontal_fit = true;
+      sensor_size = bcam->sensor_width;
+    }
+    else { /* vertical */
+      horizontal_fit = false;
+      sensor_size = bcam->sensor_height;
+    }
+
+    if (horizontal_fit) {
+      cam->set_sensorwidth(sensor_size);
+      cam->set_sensorheight(sensor_size * fit_yratio / fit_xratio);
+    }
+    else {
+      cam->set_sensorwidth(sensor_size * fit_xratio / fit_yratio);
+      cam->set_sensorheight(sensor_size);
+    }
+  }
+
+  /* clipping distances */
+  cam->set_nearclip(bcam->nearclip);
+  cam->set_farclip(bcam->farclip);
+
+  /* type */
+  cam->set_camera_type(bcam->type);
+
+  /* panorama */
+  cam->set_panorama_type(bcam->panorama_type);
+  cam->set_fisheye_fov(bcam->fisheye_fov);
+  cam->set_fisheye_lens(bcam->fisheye_lens);
+  cam->set_latitude_min(bcam->latitude_min);
+  cam->set_latitude_max(bcam->latitude_max);
+
+  cam->set_longitude_min(bcam->longitude_min);
+  cam->set_longitude_max(bcam->longitude_max);
+
+  /* panorama stereo */
+  cam->set_interocular_distance(bcam->interocular_distance);
+  cam->set_convergence_distance(bcam->convergence_distance);
+  cam->set_use_spherical_stereo(bcam->use_spherical_stereo);
+
+  if (cam->get_use_spherical_stereo()) {
+    if (strcmp(viewname, "left") == 0)
+      cam->set_stereo_eye(Camera::STEREO_LEFT);
+    else if (strcmp(viewname, "right") == 0)
+      cam->set_stereo_eye(Camera::STEREO_RIGHT);
+    else
+      cam->set_stereo_eye(Camera::STEREO_NONE);
+  }
+
+  cam->set_use_pole_merge(bcam->use_pole_merge);
+  cam->set_pole_merge_angle_from(bcam->pole_merge_angle_from);
+  cam->set_pole_merge_angle_to(bcam->pole_merge_angle_to);
+
+  /* anamorphic lens bokeh */
+  cam->set_aperture_ratio(bcam->aperture_ratio);
+
+  /* perspective */
+  cam->set_fov(2.0f * atanf((0.5f * sensor_size) / bcam->lens / aspectratio));
+  cam->set_focaldistance(bcam->focaldistance);
+  cam->set_aperturesize(bcam->aperturesize);
+  cam->set_blades(bcam->apertureblades);
+  cam->set_bladesrotation(bcam->aperturerotation);
+
+  /* transform */
+  cam->set_matrix(blender_camera_matrix(bcam->matrix, bcam->type, bcam->panorama_type));
+
+  array<Transform> motion;
+  motion.resize(bcam->motion_steps, cam->get_matrix());
+  cam->set_motion(motion);
+  cam->set_use_perspective_motion(false);
+
+  cam->set_shuttertime(bcam->shuttertime);
+  cam->set_fov_pre(cam->get_fov());
+  cam->set_fov_post(cam->get_fov());
+  cam->set_motion_position(bcam->motion_position);
+
+  cam->set_rolling_shutter_type(bcam->rolling_shutter_type);
+  cam->set_rolling_shutter_duration(bcam->rolling_shutter_duration);
+
+  cam->set_shutter_curve(bcam->shutter_curve);
+
+  /* border */
+  cam->set_border_left(bcam->border.left);
+  cam->set_border_right(bcam->border.right);
+  cam->set_border_top(bcam->border.top);
+  cam->set_border_bottom(bcam->border.bottom);
+
+  cam->set_viewport_camera_border_left(bcam->viewport_camera_border.left);
+  cam->set_viewport_camera_border_right(bcam->viewport_camera_border.right);
+  cam->set_viewport_camera_border_top(bcam->viewport_camera_border.top);
+  cam->set_viewport_camera_border_bottom(bcam->viewport_camera_border.bottom);
+
+  bcam->offscreen_dicing_scale = RNA_float_get(cscene, "offscreen_dicing_scale");
+  cam->set_offscreen_dicing_scale(bcam->offscreen_dicing_scale);
+}
+
+/* Sync Render Camera */
+
+void BlenderSync::sync_camera(BL::RenderSettings &b_render,
+                              BL::Object &b_override,
+                              int width,
+                              int height,
+                              const char *viewname)
+{
+  BlenderCamera bcam;
+  blender_camera_init(&bcam, b_render);
+
+  /* pixel aspect */
+  bcam.pixelaspect.x = b_render.pixel_aspect_x();
+  bcam.pixelaspect.y = b_render.pixel_aspect_y();
+  bcam.shuttertime = b_render.motion_blur_shutter();
+
+  BL::CurveMapping b_shutter_curve(b_render.motion_blur_shutter_curve());
+  curvemapping_to_array(b_shutter_curve, bcam.shutter_curve, RAMP_TABLE_SIZE);
+
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  bcam.motion_position = (Camera::MotionPosition)get_enum(cscene,
+                                                          "motion_blur_position",
+                                                          Camera::MOTION_NUM_POSITIONS,
+                                                          Camera::MOTION_POSITION_CENTER);
+  bcam.rolling_shutter_type = (Camera::RollingShutterType)get_enum(
+      cscene,
+      "rolling_shutter_type",
+      Camera::ROLLING_SHUTTER_NUM_TYPES,
+      Camera::ROLLING_SHUTTER_NONE);
+  bcam.rolling_shutter_duration = RNA_float_get(&cscene, "rolling_shutter_duration");
+
+  /* border */
+  if (b_render.use_border()) {
+    bcam.border.left = b_render.border_min_x();
+    bcam.border.right = b_render.border_max_x();
+    bcam.border.bottom = b_render.border_min_y();
+    bcam.border.top = b_render.border_max_y();
+  }
+
+  /* camera object */
+  BL::Object b_ob = b_scene.camera();
+
+  if (b_override)
+    b_ob = b_override;
+
+  if (b_ob) {
+    BL::Array<float, 16> b_ob_matrix;
+    blender_camera_from_object(&bcam, b_engine, b_ob);
+    b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
+    bcam.matrix = get_transform(b_ob_matrix);
+  }
+
+  /* sync */
+  Camera *cam = scene->camera;
+  blender_camera_sync(cam, &bcam, width, height, viewname, &cscene);
+
+  /* dicing camera */
+  b_ob = BL::Object(RNA_pointer_get(&cscene, "dicing_camera"));
+  if (b_ob) {
+    BL::Array<float, 16> b_ob_matrix;
+    blender_camera_from_object(&bcam, b_engine, b_ob);
+    b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
+    bcam.matrix = get_transform(b_ob_matrix);
+
+    blender_camera_sync(scene->dicing_camera, &bcam, width, height, viewname, &cscene);
+  }
+  else {
+    *scene->dicing_camera = *cam;
+  }
+}
+
+void BlenderSync::sync_camera_motion(
+    BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time)
+{
+  if (!b_ob)
+    return;
+
+  Camera *cam = scene->camera;
+  BL::Array<float, 16> b_ob_matrix;
+  b_engine.camera_model_matrix(b_ob, cam->get_use_spherical_stereo(), b_ob_matrix);
+  Transform tfm = get_transform(b_ob_matrix);
+  tfm = blender_camera_matrix(tfm, cam->get_camera_type(), cam->get_panorama_type());
+
+  if (motion_time == 0.0f) {
+    /* When motion blur is not centered in frame, cam->matrix gets reset. */
+    cam->set_matrix(tfm);
+  }
+
+  /* Set transform in motion array. */
+  int motion_step = cam->motion_step(motion_time);
+  if (motion_step >= 0) {
+    array<Transform> motion = cam->get_motion();
+    motion[motion_step] = tfm;
+    cam->set_motion(motion);
+  }
+
+  if (cam->get_camera_type() == CAMERA_PERSPECTIVE) {
+    BlenderCamera bcam;
+    float aspectratio, sensor_size;
+    blender_camera_init(&bcam, b_render);
+
+    /* TODO(sergey): Consider making it a part of blender_camera_init(). */
+    bcam.pixelaspect.x = b_render.pixel_aspect_x();
+    bcam.pixelaspect.y = b_render.pixel_aspect_y();
+
+    blender_camera_from_object(&bcam, b_engine, b_ob);
+    blender_camera_viewplane(&bcam, width, height, NULL, &aspectratio, &sensor_size);
+    /* TODO(sergey): De-duplicate calculation with camera sync. */
+    float fov = 2.0f * atanf((0.5f * sensor_size) / bcam.lens / aspectratio);
+    if (fov != cam->get_fov()) {
+      VLOG(1) << "Camera " << b_ob.name() << " FOV change detected.";
+      if (motion_time == 0.0f) {
+        cam->set_fov(fov);
+      }
+      else if (motion_time == -1.0f) {
+        cam->set_fov_pre(fov);
+        cam->set_use_perspective_motion(true);
+      }
+      else if (motion_time == 1.0f) {
+        cam->set_fov_post(fov);
+        cam->set_use_perspective_motion(true);
+      }
+    }
+  }
+}
+
+/* Sync 3D View Camera */
+
+static void blender_camera_view_subset(BL::RenderEngine &b_engine,
+                                       BL::RenderSettings &b_render,
+                                       BL::Scene &b_scene,
+                                       BL::Object &b_ob,
+                                       BL::SpaceView3D &b_v3d,
+                                       BL::RegionView3D &b_rv3d,
+                                       int width,
+                                       int height,
+                                       BoundBox2D *view_box,
+                                       BoundBox2D *cam_box,
+                                       float *view_aspect);
+
+static void blender_camera_from_view(BlenderCamera *bcam,
+                                     BL::RenderEngine &b_engine,
+                                     BL::Scene &b_scene,
+                                     BL::SpaceView3D &b_v3d,
+                                     BL::RegionView3D &b_rv3d,
+                                     int width,
+                                     int height,
+                                     bool skip_panorama = false)
+{
+  /* 3d view parameters */
+  bcam->nearclip = b_v3d.clip_start();
+  bcam->farclip = b_v3d.clip_end();
+  bcam->lens = b_v3d.lens();
+  bcam->shuttertime = b_scene.render().motion_blur_shutter();
+
+  BL::CurveMapping b_shutter_curve(b_scene.render().motion_blur_shutter_curve());
+  curvemapping_to_array(b_shutter_curve, bcam->shutter_curve, RAMP_TABLE_SIZE);
+
+  if (b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA) {
+    /* camera view */
+    BL::Object b_ob = (b_v3d.use_local_camera()) ? b_v3d.camera() : b_scene.camera();
+
+    if (b_ob) {
+      blender_camera_from_object(bcam, b_engine, b_ob, skip_panorama);
+
+      if (!skip_panorama && bcam->type == CAMERA_PANORAMA) {
+        /* in panorama camera view, we map viewplane to camera border */
+        BoundBox2D view_box, cam_box;
+        float view_aspect;
+
+        BL::RenderSettings b_render_settings(b_scene.render());
+        blender_camera_view_subset(b_engine,
+                                   b_render_settings,
+                                   b_scene,
+                                   b_ob,
+                                   b_v3d,
+                                   b_rv3d,
+                                   width,
+                                   height,
+                                   &view_box,
+                                   &cam_box,
+                                   &view_aspect);
+
+        bcam->pano_viewplane = view_box.make_relative_to(cam_box);
+        bcam->pano_aspectratio = view_aspect;
+      }
+      else {
+        /* magic zoom formula */
+        bcam->zoom = (float)b_rv3d.view_camera_zoom();
+        bcam->zoom = (1.41421f + bcam->zoom / 50.0f);
+        bcam->zoom *= bcam->zoom;
+        bcam->zoom = 2.0f / bcam->zoom;
+
+        /* offset */
+        bcam->offset = get_float2(b_rv3d.view_camera_offset());
+      }
+    }
+  }
+  else if (b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_ORTHO) {
+    /* orthographic view */
+    bcam->farclip *= 0.5f;
+    bcam->nearclip = -bcam->farclip;
+
+    float sensor_size;
+    if (bcam->sensor_fit == BlenderCamera::VERTICAL)
+      sensor_size = bcam->sensor_height;
+    else
+      sensor_size = bcam->sensor_width;
+
+    bcam->type = CAMERA_ORTHOGRAPHIC;
+    bcam->ortho_scale = b_rv3d.view_distance() * sensor_size / b_v3d.lens();
+  }
+
+  bcam->zoom *= 2.0f;
+
+  /* 3d view transform */
+  bcam->matrix = transform_inverse(get_transform(b_rv3d.view_matrix()));
+
+  /* dimensions */
+  bcam->full_width = width;
+  bcam->full_height = height;
+}
+
+static void blender_camera_view_subset(BL::RenderEngine &b_engine,
+                                       BL::RenderSettings &b_render,
+                                       BL::Scene &b_scene,
+                                       BL::Object &b_ob,
+                                       BL::SpaceView3D &b_v3d,
+                                       BL::RegionView3D &b_rv3d,
+                                       int width,
+                                       int height,
+                                       BoundBox2D *view_box,
+                                       BoundBox2D *cam_box,
+                                       float *view_aspect)
+{
+  BoundBox2D cam, view;
+  float cam_aspect, sensor_size;
+
+  /* Get viewport viewplane. */
+  BlenderCamera view_bcam;
+  blender_camera_init(&view_bcam, b_render);
+  blender_camera_from_view(&view_bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height, true);
+
+  blender_camera_viewplane(&view_bcam, width, height, &view, view_aspect, &sensor_size);
+
+  /* Get camera viewplane. */
+  BlenderCamera cam_bcam;
+  blender_camera_init(&cam_bcam, b_render);
+  blender_camera_from_object(&cam_bcam, b_engine, b_ob, true);
+
+  /* Camera border is affect by aspect, viewport is not. */
+  cam_bcam.pixelaspect.x = b_render.pixel_aspect_x();
+  cam_bcam.pixelaspect.y = b_render.pixel_aspect_y();
+
+  blender_camera_viewplane(
+      &cam_bcam, cam_bcam.full_width, cam_bcam.full_height, &cam, &cam_aspect, &sensor_size);
+
+  /* Return */
+  *view_box = view * (1.0f / *view_aspect);
+  *cam_box = cam * (1.0f / cam_aspect);
+}
+
+static void blender_camera_border_subset(BL::RenderEngine &b_engine,
+                                         BL::RenderSettings &b_render,
+                                         BL::Scene &b_scene,
+                                         BL::SpaceView3D &b_v3d,
+                                         BL::RegionView3D &b_rv3d,
+                                         BL::Object &b_ob,
+                                         int width,
+                                         int height,
+                                         const BoundBox2D &border,
+                                         BoundBox2D *result)
+{
+  /* Determine camera viewport subset. */
+  BoundBox2D view_box, cam_box;
+  float view_aspect;
+  blender_camera_view_subset(b_engine,
+                             b_render,
+                             b_scene,
+                             b_ob,
+                             b_v3d,
+                             b_rv3d,
+                             width,
+                             height,
+                             &view_box,
+                             &cam_box,
+                             &view_aspect);
+
+  /* Determine viewport subset matching given border. */
+  cam_box = cam_box.make_relative_to(view_box);
+  *result = cam_box.subset(border);
+}
+
+static void blender_camera_border(BlenderCamera *bcam,
+                                  BL::RenderEngine &b_engine,
+                                  BL::RenderSettings &b_render,
+                                  BL::Scene &b_scene,
+                                  BL::SpaceView3D &b_v3d,
+                                  BL::RegionView3D &b_rv3d,
+                                  int width,
+                                  int height)
+{
+  bool is_camera_view;
+
+  /* camera view? */
+  is_camera_view = b_rv3d.view_perspective() == BL::RegionView3D::view_perspective_CAMERA;
+
+  if (!is_camera_view) {
+    /* for non-camera view check whether render border is enabled for viewport
+     * and if so use border from 3d viewport
+     * assume viewport has got correctly clamped border already
+     */
+    if (b_v3d.use_render_border()) {
+      bcam->border.left = b_v3d.render_border_min_x();
+      bcam->border.right = b_v3d.render_border_max_x();
+      bcam->border.bottom = b_v3d.render_border_min_y();
+      bcam->border.top = b_v3d.render_border_max_y();
+    }
+    return;
+  }
+
+  BL::Object b_ob = (b_v3d.use_local_camera()) ? b_v3d.camera() : b_scene.camera();
+
+  if (!b_ob)
+    return;
+
+  /* Determine camera border inside the viewport. */
+  BoundBox2D full_border;
+  blender_camera_border_subset(b_engine,
+                               b_render,
+                               b_scene,
+                               b_v3d,
+                               b_rv3d,
+                               b_ob,
+                               width,
+                               height,
+                               full_border,
+                               &bcam->viewport_camera_border);
+
+  if (b_render.use_border()) {
+    bcam->border.left = b_render.border_min_x();
+    bcam->border.right = b_render.border_max_x();
+    bcam->border.bottom = b_render.border_min_y();
+    bcam->border.top = b_render.border_max_y();
+  }
+  else if (bcam->passepartout_alpha == 1.0f) {
+    bcam->border = full_border;
+  }
+  else {
+    return;
+  }
+
+  /* Determine viewport subset matching camera border. */
+  blender_camera_border_subset(b_engine,
+                               b_render,
+                               b_scene,
+                               b_v3d,
+                               b_rv3d,
+                               b_ob,
+                               width,
+                               height,
+                               bcam->border,
+                               &bcam->border);
+  bcam->border = bcam->border.clamp();
+}
+
+void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
+                            BL::RegionView3D &b_rv3d,
+                            int width,
+                            int height)
+{
+  BlenderCamera bcam;
+  BL::RenderSettings b_render_settings(b_scene.render());
+  blender_camera_init(&bcam, b_render_settings);
+  blender_camera_from_view(&bcam, b_engine, b_scene, b_v3d, b_rv3d, width, height);
+  blender_camera_border(&bcam, b_engine, b_render_settings, b_scene, b_v3d, b_rv3d, width, height);
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  blender_camera_sync(scene->camera, &bcam, width, height, "", &cscene);
+
+  /* dicing camera */
+  BL::Object b_ob = BL::Object(RNA_pointer_get(&cscene, "dicing_camera"));
+  if (b_ob) {
+    BL::Array<float, 16> b_ob_matrix;
+    blender_camera_from_object(&bcam, b_engine, b_ob);
+    b_engine.camera_model_matrix(b_ob, bcam.use_spherical_stereo, b_ob_matrix);
+    bcam.matrix = get_transform(b_ob_matrix);
+
+    blender_camera_sync(scene->dicing_camera, &bcam, width, height, "", &cscene);
+  }
+  else {
+    *scene->dicing_camera = *scene->camera;
+  }
+}
+
+BufferParams BlenderSync::get_buffer_params(
+    BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height)
+{
+  BufferParams params;
+  bool use_border = false;
+
+  params.full_width = width;
+  params.full_height = height;
+
+  if (b_v3d && b_rv3d && b_rv3d.view_perspective() != BL::RegionView3D::view_perspective_CAMERA)
+    use_border = b_v3d.use_render_border();
+  else
+    /* the camera can always have a passepartout */
+    use_border = true;
+
+  if (use_border) {
+    /* border render */
+    /* the viewport may offset the border outside the view */
+    BoundBox2D border = cam->border.clamp();
+    params.full_x = (int)(border.left * (float)width);
+    params.full_y = (int)(border.bottom * (float)height);
+    params.width = (int)(border.right * (float)width) - params.full_x;
+    params.height = (int)(border.top * (float)height) - params.full_y;
+
+    /* survive in case border goes out of view or becomes too small */
+    params.width = max(params.width, 1);
+    params.height = max(params.height, 1);
+  }
+  else {
+    params.width = width;
+    params.height = height;
+  }
+
+  params.window_width = params.width;
+  params.window_height = params.height;
+
+  return params;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/curves.cpp b/intern/cycles/blender/curves.cpp
new file mode 100644
index 00000000000..fb2b329e61d
--- /dev/null
+++ b/intern/cycles/blender/curves.cpp
@@ -0,0 +1,915 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "scene/attribute.h"
+#include "scene/camera.h"
+#include "scene/curves.h"
+#include "scene/hair.h"
+#include "scene/object.h"
+#include "scene/scene.h"
+
+#include "util/color.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+
+CCL_NAMESPACE_BEGIN
+
+ParticleCurveData::ParticleCurveData()
+{
+}
+
+ParticleCurveData::~ParticleCurveData()
+{
+}
+
+static float shaperadius(float shape, float root, float tip, float time)
+{
+  assert(time >= 0.0f);
+  assert(time <= 1.0f);
+  float radius = 1.0f - time;
+
+  if (shape != 0.0f) {
+    if (shape < 0.0f)
+      radius = powf(radius, 1.0f + shape);
+    else
+      radius = powf(radius, 1.0f / (1.0f - shape));
+  }
+  return (radius * (root - tip)) + tip;
+}
+
+/* curve functions */
+
+static bool ObtainCacheParticleData(
+    Hair *hair, BL::Mesh *b_mesh, BL::Object *b_ob, ParticleCurveData *CData, bool background)
+{
+  int curvenum = 0;
+  int keyno = 0;
+
+  if (!(hair && b_mesh && b_ob && CData))
+    return false;
+
+  Transform tfm = get_transform(b_ob->matrix_world());
+  Transform itfm = transform_quick_inverse(tfm);
+
+  for (BL::Modifier &b_mod : b_ob->modifiers) {
+    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
+        (background ? b_mod.show_render() : b_mod.show_viewport())) {
+      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
+      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
+      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
+
+      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
+          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
+        int shader = clamp(b_part.material() - 1, 0, hair->get_used_shaders().size() - 1);
+        int display_step = background ? b_part.render_step() : b_part.display_step();
+        int totparts = b_psys.particles.length();
+        int totchild = background ? b_psys.child_particles.length() :
+                                    (int)((float)b_psys.child_particles.length() *
+                                          (float)b_part.display_percentage() / 100.0f);
+        int totcurves = totchild;
+
+        if (b_part.child_type() == 0 || totchild == 0)
+          totcurves += totparts;
+
+        if (totcurves == 0)
+          continue;
+
+        int ren_step = (1 << display_step) + 1;
+        if (b_part.kink() == BL::ParticleSettings::kink_SPIRAL)
+          ren_step += b_part.kink_extra_steps();
+
+        CData->psys_firstcurve.push_back_slow(curvenum);
+        CData->psys_curvenum.push_back_slow(totcurves);
+        CData->psys_shader.push_back_slow(shader);
+
+        float radius = b_part.radius_scale() * 0.5f;
+
+        CData->psys_rootradius.push_back_slow(radius * b_part.root_radius());
+        CData->psys_tipradius.push_back_slow(radius * b_part.tip_radius());
+        CData->psys_shape.push_back_slow(b_part.shape());
+        CData->psys_closetip.push_back_slow(b_part.use_close_tip());
+
+        int pa_no = 0;
+        if (!(b_part.child_type() == 0) && totchild != 0)
+          pa_no = totparts;
+
+        int num_add = (totparts + totchild - pa_no);
+        CData->curve_firstkey.reserve(CData->curve_firstkey.size() + num_add);
+        CData->curve_keynum.reserve(CData->curve_keynum.size() + num_add);
+        CData->curve_length.reserve(CData->curve_length.size() + num_add);
+        CData->curvekey_co.reserve(CData->curvekey_co.size() + num_add * ren_step);
+        CData->curvekey_time.reserve(CData->curvekey_time.size() + num_add * ren_step);
+
+        for (; pa_no < totparts + totchild; pa_no++) {
+          int keynum = 0;
+          CData->curve_firstkey.push_back_slow(keyno);
+
+          float curve_length = 0.0f;
+          float3 prev_co_world = zero_float3();
+          float3 prev_co_object = zero_float3();
+          for (int step_no = 0; step_no < ren_step; step_no++) {
+            float3 co_world = prev_co_world;
+            b_psys.co_hair(*b_ob, pa_no, step_no, &co_world.x);
+            float3 co_object = transform_point(&itfm, co_world);
+            if (step_no > 0) {
+              const float step_length = len(co_object - prev_co_object);
+              curve_length += step_length;
+            }
+            CData->curvekey_co.push_back_slow(co_object);
+            CData->curvekey_time.push_back_slow(curve_length);
+            prev_co_object = co_object;
+            prev_co_world = co_world;
+            keynum++;
+          }
+          keyno += keynum;
+
+          CData->curve_keynum.push_back_slow(keynum);
+          CData->curve_length.push_back_slow(curve_length);
+          curvenum++;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ObtainCacheParticleUV(Hair *hair,
+                                  BL::Mesh *b_mesh,
+                                  BL::Object *b_ob,
+                                  ParticleCurveData *CData,
+                                  bool background,
+                                  int uv_num)
+{
+  if (!(hair && b_mesh && b_ob && CData))
+    return false;
+
+  CData->curve_uv.clear();
+
+  for (BL::Modifier &b_mod : b_ob->modifiers) {
+    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
+        (background ? b_mod.show_render() : b_mod.show_viewport())) {
+      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
+      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
+      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
+
+      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
+          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
+        int totparts = b_psys.particles.length();
+        int totchild = background ? b_psys.child_particles.length() :
+                                    (int)((float)b_psys.child_particles.length() *
+                                          (float)b_part.display_percentage() / 100.0f);
+        int totcurves = totchild;
+
+        if (b_part.child_type() == 0 || totchild == 0)
+          totcurves += totparts;
+
+        if (totcurves == 0)
+          continue;
+
+        int pa_no = 0;
+        if (!(b_part.child_type() == 0) && totchild != 0)
+          pa_no = totparts;
+
+        int num_add = (totparts + totchild - pa_no);
+        CData->curve_uv.reserve(CData->curve_uv.size() + num_add);
+
+        BL::ParticleSystem::particles_iterator b_pa;
+        b_psys.particles.begin(b_pa);
+        for (; pa_no < totparts + totchild; pa_no++) {
+          /* Add UVs */
+          BL::Mesh::uv_layers_iterator l;
+          b_mesh->uv_layers.begin(l);
+
+          float2 uv = zero_float2();
+          if (b_mesh->uv_layers.length())
+            b_psys.uv_on_emitter(psmd, *b_pa, pa_no, uv_num, &uv.x);
+          CData->curve_uv.push_back_slow(uv);
+
+          if (pa_no < totparts && b_pa != b_psys.particles.end())
+            ++b_pa;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ObtainCacheParticleVcol(Hair *hair,
+                                    BL::Mesh *b_mesh,
+                                    BL::Object *b_ob,
+                                    ParticleCurveData *CData,
+                                    bool background,
+                                    int vcol_num)
+{
+  if (!(hair && b_mesh && b_ob && CData))
+    return false;
+
+  CData->curve_vcol.clear();
+
+  for (BL::Modifier &b_mod : b_ob->modifiers) {
+    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
+        (background ? b_mod.show_render() : b_mod.show_viewport())) {
+      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
+      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
+      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
+
+      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
+          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
+        int totparts = b_psys.particles.length();
+        int totchild = background ? b_psys.child_particles.length() :
+                                    (int)((float)b_psys.child_particles.length() *
+                                          (float)b_part.display_percentage() / 100.0f);
+        int totcurves = totchild;
+
+        if (b_part.child_type() == 0 || totchild == 0)
+          totcurves += totparts;
+
+        if (totcurves == 0)
+          continue;
+
+        int pa_no = 0;
+        if (!(b_part.child_type() == 0) && totchild != 0)
+          pa_no = totparts;
+
+        int num_add = (totparts + totchild - pa_no);
+        CData->curve_vcol.reserve(CData->curve_vcol.size() + num_add);
+
+        BL::ParticleSystem::particles_iterator b_pa;
+        b_psys.particles.begin(b_pa);
+        for (; pa_no < totparts + totchild; pa_no++) {
+          /* Add vertex colors */
+          BL::Mesh::vertex_colors_iterator l;
+          b_mesh->vertex_colors.begin(l);
+
+          float4 vcol = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+          if (b_mesh->vertex_colors.length())
+            b_psys.mcol_on_emitter(psmd, *b_pa, pa_no, vcol_num, &vcol.x);
+          CData->curve_vcol.push_back_slow(vcol);
+
+          if (pa_no < totparts && b_pa != b_psys.particles.end())
+            ++b_pa;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CData)
+{
+  int num_keys = 0;
+  int num_curves = 0;
+
+  if (hair->num_curves())
+    return;
+
+  Attribute *attr_intercept = NULL;
+  Attribute *attr_length = NULL;
+  Attribute *attr_random = NULL;
+
+  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
+    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
+  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
+    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
+  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
+    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
+
+  /* compute and reserve size of arrays */
+  for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+    for (int curve = CData->psys_firstcurve[sys];
+         curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
+         curve++) {
+      num_keys += CData->curve_keynum[curve];
+      num_curves++;
+    }
+  }
+
+  if (num_curves > 0) {
+    VLOG(1) << "Exporting curve segments for mesh " << hair->name;
+  }
+
+  hair->reserve_curves(hair->num_curves() + num_curves, hair->get_curve_keys().size() + num_keys);
+
+  num_keys = 0;
+  num_curves = 0;
+
+  /* actually export */
+  for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+    for (int curve = CData->psys_firstcurve[sys];
+         curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
+         curve++) {
+      size_t num_curve_keys = 0;
+
+      for (int curvekey = CData->curve_firstkey[curve];
+           curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
+           curvekey++) {
+        const float3 ickey_loc = CData->curvekey_co[curvekey];
+        const float curve_time = CData->curvekey_time[curvekey];
+        const float curve_length = CData->curve_length[curve];
+        const float time = (curve_length > 0.0f) ? curve_time / curve_length : 0.0f;
+        float radius = shaperadius(
+            CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
+        if (CData->psys_closetip[sys] &&
+            (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) {
+          radius = 0.0f;
+        }
+        hair->add_curve_key(ickey_loc, radius);
+        if (attr_intercept)
+          attr_intercept->add(time);
+
+        num_curve_keys++;
+      }
+
+      if (attr_length != NULL) {
+        attr_length->add(CData->curve_length[curve]);
+      }
+
+      if (attr_random != NULL) {
+        attr_random->add(hash_uint2_to_float(num_curves, 0));
+      }
+
+      hair->add_curve(num_keys, CData->psys_shader[sys]);
+      num_keys += num_curve_keys;
+      num_curves++;
+    }
+  }
+
+  /* check allocation */
+  if ((hair->get_curve_keys().size() != num_keys) || (hair->num_curves() != num_curves)) {
+    VLOG(1) << "Allocation failed, clearing data";
+    hair->clear(true);
+  }
+}
+
+static float4 CurveSegmentMotionCV(ParticleCurveData *CData, int sys, int curve, int curvekey)
+{
+  const float3 ickey_loc = CData->curvekey_co[curvekey];
+  const float curve_time = CData->curvekey_time[curvekey];
+  const float curve_length = CData->curve_length[curve];
+  float time = (curve_length > 0.0f) ? curve_time / curve_length : 0.0f;
+  float radius = shaperadius(
+      CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
+
+  if (CData->psys_closetip[sys] &&
+      (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
+    radius = 0.0f;
+
+  /* curve motion keys store both position and radius in float4 */
+  float4 mP = float3_to_float4(ickey_loc);
+  mP.w = radius;
+  return mP;
+}
+
+static float4 LerpCurveSegmentMotionCV(ParticleCurveData *CData, int sys, int curve, float step)
+{
+  assert(step >= 0.0f);
+  assert(step <= 1.0f);
+  const int first_curve_key = CData->curve_firstkey[curve];
+  const float curve_key_f = step * (CData->curve_keynum[curve] - 1);
+  int curvekey = (int)floorf(curve_key_f);
+  const float remainder = curve_key_f - curvekey;
+  if (remainder == 0.0f) {
+    return CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey);
+  }
+  int curvekey2 = curvekey + 1;
+  if (curvekey2 >= (CData->curve_keynum[curve] - 1)) {
+    curvekey2 = (CData->curve_keynum[curve] - 1);
+    curvekey = curvekey2 - 1;
+  }
+  const float4 mP = CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey);
+  const float4 mP2 = CurveSegmentMotionCV(CData, sys, curve, first_curve_key + curvekey2);
+  return lerp(mP, mP2, remainder);
+}
+
+static void export_hair_motion_validate_attribute(Hair *hair,
+                                                  int motion_step,
+                                                  int num_motion_keys,
+                                                  bool have_motion)
+{
+  Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  const int num_keys = hair->get_curve_keys().size();
+
+  if (num_motion_keys != num_keys || !have_motion) {
+    /* No motion or hair "topology" changed, remove attributes again. */
+    if (num_motion_keys != num_keys) {
+      VLOG(1) << "Hair topology changed, removing attribute.";
+    }
+    else {
+      VLOG(1) << "No motion, removing attribute.";
+    }
+    hair->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+  else if (motion_step > 0) {
+    VLOG(1) << "Filling in new motion vertex position for motion_step " << motion_step;
+
+    /* Motion, fill up previous steps that we might have skipped because
+     * they had no motion, but we need them anyway now. */
+    for (int step = 0; step < motion_step; step++) {
+      float4 *mP = attr_mP->data_float4() + step * num_keys;
+
+      for (int key = 0; key < num_keys; key++) {
+        mP[key] = float3_to_float4(hair->get_curve_keys()[key]);
+        mP[key].w = hair->get_curve_radius()[key];
+      }
+    }
+  }
+}
+
+static void ExportCurveSegmentsMotion(Hair *hair, ParticleCurveData *CData, int motion_step)
+{
+  VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
+          << motion_step;
+
+  /* find attribute */
+  Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  bool new_attribute = false;
+
+  /* add new attribute if it doesn't exist already */
+  if (!attr_mP) {
+    VLOG(1) << "Creating new motion vertex position attribute";
+    attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+    new_attribute = true;
+  }
+
+  /* export motion vectors for curve keys */
+  size_t numkeys = hair->get_curve_keys().size();
+  float4 *mP = attr_mP->data_float4() + motion_step * numkeys;
+  bool have_motion = false;
+  int i = 0;
+  int num_curves = 0;
+
+  for (int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
+    for (int curve = CData->psys_firstcurve[sys];
+         curve < CData->psys_firstcurve[sys] + CData->psys_curvenum[sys];
+         curve++) {
+      /* Curve lengths may not match! Curves can be clipped. */
+      int curve_key_end = (num_curves + 1 < (int)hair->get_curve_first_key().size() ?
+                               hair->get_curve_first_key()[num_curves + 1] :
+                               (int)hair->get_curve_keys().size());
+      const int num_center_curve_keys = curve_key_end - hair->get_curve_first_key()[num_curves];
+      const int is_num_keys_different = CData->curve_keynum[curve] - num_center_curve_keys;
+
+      if (!is_num_keys_different) {
+        for (int curvekey = CData->curve_firstkey[curve];
+             curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve];
+             curvekey++) {
+          if (i < hair->get_curve_keys().size()) {
+            mP[i] = CurveSegmentMotionCV(CData, sys, curve, curvekey);
+            if (!have_motion) {
+              /* unlike mesh coordinates, these tend to be slightly different
+               * between frames due to particle transforms into/out of object
+               * space, so we use an epsilon to detect actual changes */
+              float4 curve_key = float3_to_float4(hair->get_curve_keys()[i]);
+              curve_key.w = hair->get_curve_radius()[i];
+              if (len_squared(mP[i] - curve_key) > 1e-5f * 1e-5f)
+                have_motion = true;
+            }
+          }
+          i++;
+        }
+      }
+      else {
+        /* Number of keys has changed. Generate an interpolated version
+         * to preserve motion blur. */
+        const float step_size = num_center_curve_keys > 1 ? 1.0f / (num_center_curve_keys - 1) :
+                                                            0.0f;
+        for (int step_index = 0; step_index < num_center_curve_keys; ++step_index) {
+          const float step = step_index * step_size;
+          mP[i] = LerpCurveSegmentMotionCV(CData, sys, curve, step);
+          i++;
+        }
+        have_motion = true;
+      }
+      num_curves++;
+    }
+  }
+
+  /* In case of new attribute, we verify if there really was any motion. */
+  if (new_attribute) {
+    export_hair_motion_validate_attribute(hair, motion_step, i, have_motion);
+  }
+}
+
+/* Hair Curve Sync */
+
+bool BlenderSync::object_has_particle_hair(BL::Object b_ob)
+{
+  /* Test if the object has a particle modifier with hair. */
+  for (BL::Modifier &b_mod : b_ob.modifiers) {
+    if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
+        (preview ? b_mod.show_viewport() : b_mod.show_render())) {
+      BL::ParticleSystemModifier psmd((const PointerRNA)b_mod.ptr);
+      BL::ParticleSystem b_psys((const PointerRNA)psmd.particle_system().ptr);
+      BL::ParticleSettings b_part((const PointerRNA)b_psys.settings().ptr);
+
+      if ((b_part.render_type() == BL::ParticleSettings::render_type_PATH) &&
+          (b_part.type() == BL::ParticleSettings::type_HAIR)) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/* Old particle hair. */
+void BlenderSync::sync_particle_hair(
+    Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step)
+{
+  if (!b_ob_info.is_real_object_data()) {
+    return;
+  }
+  BL::Object b_ob = b_ob_info.real_object;
+
+  /* obtain general settings */
+  if (b_ob.mode() == b_ob.mode_PARTICLE_EDIT || b_ob.mode() == b_ob.mode_EDIT) {
+    return;
+  }
+
+  /* Extract particle hair data - should be combined with connecting to mesh later. */
+
+  ParticleCurveData CData;
+
+  ObtainCacheParticleData(hair, &b_mesh, &b_ob, &CData, !preview);
+
+  /* add hair geometry */
+  if (motion)
+    ExportCurveSegmentsMotion(hair, &CData, motion_step);
+  else
+    ExportCurveSegments(scene, hair, &CData);
+
+  /* generated coordinates from first key. we should ideally get this from
+   * blender to handle deforming objects */
+  if (!motion) {
+    if (hair->need_attribute(scene, ATTR_STD_GENERATED)) {
+      float3 loc, size;
+      mesh_texture_space(b_mesh, loc, size);
+
+      Attribute *attr_generated = hair->attributes.add(ATTR_STD_GENERATED);
+      float3 *generated = attr_generated->data_float3();
+
+      for (size_t i = 0; i < hair->num_curves(); i++) {
+        float3 co = hair->get_curve_keys()[hair->get_curve(i).first_key];
+        generated[i] = co * size - loc;
+      }
+    }
+  }
+
+  /* create vertex color attributes */
+  if (!motion) {
+    BL::Mesh::vertex_colors_iterator l;
+    int vcol_num = 0;
+
+    for (b_mesh.vertex_colors.begin(l); l != b_mesh.vertex_colors.end(); ++l, vcol_num++) {
+      if (!hair->need_attribute(scene, ustring(l->name().c_str())))
+        continue;
+
+      ObtainCacheParticleVcol(hair, &b_mesh, &b_ob, &CData, !preview, vcol_num);
+
+      Attribute *attr_vcol = hair->attributes.add(
+          ustring(l->name().c_str()), TypeRGBA, ATTR_ELEMENT_CURVE);
+
+      float4 *fdata = attr_vcol->data_float4();
+
+      if (fdata) {
+        size_t i = 0;
+
+        /* Encode vertex color using the sRGB curve. */
+        for (size_t curve = 0; curve < CData.curve_vcol.size(); curve++) {
+          fdata[i++] = color_srgb_to_linear_v4(CData.curve_vcol[curve]);
+        }
+      }
+    }
+  }
+
+  /* create UV attributes */
+  if (!motion) {
+    BL::Mesh::uv_layers_iterator l;
+    int uv_num = 0;
+
+    for (b_mesh.uv_layers.begin(l); l != b_mesh.uv_layers.end(); ++l, uv_num++) {
+      bool active_render = l->active_render();
+      AttributeStandard std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
+      ustring name = ustring(l->name().c_str());
+
+      /* UV map */
+      if (hair->need_attribute(scene, name) || hair->need_attribute(scene, std)) {
+        Attribute *attr_uv;
+
+        ObtainCacheParticleUV(hair, &b_mesh, &b_ob, &CData, !preview, uv_num);
+
+        if (active_render)
+          attr_uv = hair->attributes.add(std, name);
+        else
+          attr_uv = hair->attributes.add(name, TypeFloat2, ATTR_ELEMENT_CURVE);
+
+        float2 *uv = attr_uv->data_float2();
+
+        if (uv) {
+          size_t i = 0;
+
+          for (size_t curve = 0; curve < CData.curve_uv.size(); curve++) {
+            uv[i++] = CData.curve_uv[curve];
+          }
+        }
+      }
+    }
+  }
+}
+
+#ifdef WITH_HAIR_NODES
+static float4 hair_point_as_float4(BL::HairPoint b_point)
+{
+  float4 mP = float3_to_float4(get_float3(b_point.co()));
+  mP.w = b_point.radius();
+  return mP;
+}
+
+static float4 interpolate_hair_points(BL::Hair b_hair,
+                                      const int first_point_index,
+                                      const int num_points,
+                                      const float step)
+{
+  const float curve_t = step * (num_points - 1);
+  const int point_a = clamp((int)curve_t, 0, num_points - 1);
+  const int point_b = min(point_a + 1, num_points - 1);
+  const float t = curve_t - (float)point_a;
+  return lerp(hair_point_as_float4(b_hair.points[first_point_index + point_a]),
+              hair_point_as_float4(b_hair.points[first_point_index + point_b]),
+              t);
+}
+
+static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
+{
+  /* TODO: optimize so we can straight memcpy arrays from Blender? */
+
+  /* Add requested attributes. */
+  Attribute *attr_intercept = NULL;
+  Attribute *attr_length = NULL;
+  Attribute *attr_random = NULL;
+
+  if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
+    attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
+  }
+  if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH)) {
+    attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
+  }
+  if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
+    attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
+  }
+
+  /* Reserve memory. */
+  const int num_keys = b_hair.points.length();
+  const int num_curves = b_hair.curves.length();
+
+  if (num_curves > 0) {
+    VLOG(1) << "Exporting curve segments for hair " << hair->name;
+  }
+
+  hair->reserve_curves(num_curves, num_keys);
+
+  /* Export curves and points. */
+  vector<float> points_length;
+
+  for (BL::HairCurve &b_curve : b_hair.curves) {
+    const int first_point_index = b_curve.first_point_index();
+    const int num_points = b_curve.num_points();
+
+    float3 prev_co = zero_float3();
+    float length = 0.0f;
+    if (attr_intercept) {
+      points_length.clear();
+      points_length.reserve(num_points);
+    }
+
+    /* Position and radius. */
+    for (int i = 0; i < num_points; i++) {
+      BL::HairPoint b_point = b_hair.points[first_point_index + i];
+
+      const float3 co = get_float3(b_point.co());
+      const float radius = b_point.radius();
+      hair->add_curve_key(co, radius);
+
+      if (attr_intercept) {
+        if (i > 0) {
+          length += len(co - prev_co);
+          points_length.push_back(length);
+        }
+        prev_co = co;
+      }
+    }
+
+    /* Normalized 0..1 attribute along curve. */
+    if (attr_intercept) {
+      for (int i = 0; i < num_points; i++) {
+        attr_intercept->add((length == 0.0f) ? 0.0f : points_length[i] / length);
+      }
+    }
+
+    if (attr_length) {
+      attr_length->add(length);
+    }
+
+    /* Random number per curve. */
+    if (attr_random != NULL) {
+      attr_random->add(hash_uint2_to_float(b_curve.index(), 0));
+    }
+
+    /* Curve. */
+    const int shader_index = 0;
+    hair->add_curve(first_point_index, shader_index);
+  }
+}
+
+static void export_hair_curves_motion(Hair *hair, BL::Hair b_hair, int motion_step)
+{
+  VLOG(1) << "Exporting curve motion segments for hair " << hair->name << ", motion step "
+          << motion_step;
+
+  /* Find or add attribute. */
+  Attribute *attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  bool new_attribute = false;
+
+  if (!attr_mP) {
+    VLOG(1) << "Creating new motion vertex position attribute";
+    attr_mP = hair->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+    new_attribute = true;
+  }
+
+  /* Export motion keys. */
+  const int num_keys = hair->get_curve_keys().size();
+  float4 *mP = attr_mP->data_float4() + motion_step * num_keys;
+  bool have_motion = false;
+  int num_motion_keys = 0;
+  int curve_index = 0;
+
+  for (BL::HairCurve &b_curve : b_hair.curves) {
+    const int first_point_index = b_curve.first_point_index();
+    const int num_points = b_curve.num_points();
+
+    Hair::Curve curve = hair->get_curve(curve_index);
+    curve_index++;
+
+    if (num_points == curve.num_keys) {
+      /* Number of keys matches. */
+      for (int i = 0; i < num_points; i++) {
+        int point_index = first_point_index + i;
+
+        if (point_index < num_keys) {
+          mP[num_motion_keys] = hair_point_as_float4(b_hair.points[point_index]);
+          num_motion_keys++;
+
+          if (!have_motion) {
+            /* TODO: use epsilon for comparison? Was needed for particles due to
+             * transform, but ideally should not happen anymore. */
+            float4 curve_key = float3_to_float4(hair->get_curve_keys()[i]);
+            curve_key.w = hair->get_curve_radius()[i];
+            have_motion = !(mP[i] == curve_key);
+          }
+        }
+      }
+    }
+    else {
+      /* Number of keys has changed. Generate an interpolated version
+       * to preserve motion blur. */
+      const float step_size = curve.num_keys > 1 ? 1.0f / (curve.num_keys - 1) : 0.0f;
+      for (int i = 0; i < curve.num_keys; i++) {
+        const float step = i * step_size;
+        mP[num_motion_keys] = interpolate_hair_points(b_hair, first_point_index, num_points, step);
+        num_motion_keys++;
+      }
+      have_motion = true;
+    }
+  }
+
+  /* In case of new attribute, we verify if there really was any motion. */
+  if (new_attribute) {
+    export_hair_motion_validate_attribute(hair, motion_step, num_motion_keys, have_motion);
+  }
+}
+
+/* Hair object. */
+void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
+{
+  /* Convert Blender hair to Cycles curves. */
+  BL::Hair b_hair(b_ob_info.object_data);
+  if (motion) {
+    export_hair_curves_motion(hair, b_hair, motion_step);
+  }
+  else {
+    export_hair_curves(scene, hair, b_hair);
+  }
+}
+#else
+void BlenderSync::sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step)
+{
+  (void)hair;
+  (void)b_ob_info;
+  (void)motion;
+  (void)motion_step;
+}
+#endif
+
+void BlenderSync::sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair)
+{
+  /* make a copy of the shaders as the caller in the main thread still need them for syncing the
+   * attributes */
+  array<Node *> used_shaders = hair->get_used_shaders();
+
+  Hair new_hair;
+  new_hair.set_used_shaders(used_shaders);
+
+  if (view_layer.use_hair) {
+    if (b_ob_info.object_data.is_a(&RNA_Hair)) {
+      /* Hair object. */
+      sync_hair(&new_hair, b_ob_info, false);
+    }
+    else {
+      /* Particle hair. */
+      bool need_undeformed = new_hair.need_attribute(scene, ATTR_STD_GENERATED);
+      BL::Mesh b_mesh = object_to_mesh(
+          b_data, b_ob_info, b_depsgraph, need_undeformed, Mesh::SUBDIVISION_NONE);
+
+      if (b_mesh) {
+        sync_particle_hair(&new_hair, b_mesh, b_ob_info, false);
+        free_object_to_mesh(b_data, b_ob_info, b_mesh);
+      }
+    }
+  }
+
+  /* update original sockets */
+
+  for (const SocketType &socket : new_hair.type->inputs) {
+    /* Those sockets are updated in sync_object, so do not modify them. */
+    if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
+        socket.name == "used_shaders") {
+      continue;
+    }
+    hair->set_value(socket, new_hair, socket);
+  }
+
+  hair->attributes.update(std::move(new_hair.attributes));
+
+  /* tag update */
+
+  /* Compares curve_keys rather than strands in order to handle quick hair
+   * adjustments in dynamic BVH - other methods could probably do this better. */
+  const bool rebuild = (hair->curve_keys_is_modified() || hair->curve_radius_is_modified());
+
+  hair->tag_update(scene, rebuild);
+}
+
+void BlenderSync::sync_hair_motion(BL::Depsgraph b_depsgraph,
+                                   BObjectInfo &b_ob_info,
+                                   Hair *hair,
+                                   int motion_step)
+{
+  /* Skip if nothing exported. */
+  if (hair->num_keys() == 0) {
+    return;
+  }
+
+  /* Export deformed coordinates. */
+  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
+    if (b_ob_info.object_data.is_a(&RNA_Hair)) {
+      /* Hair object. */
+      sync_hair(hair, b_ob_info, true, motion_step);
+      return;
+    }
+    else {
+      /* Particle hair. */
+      BL::Mesh b_mesh = object_to_mesh(
+          b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
+      if (b_mesh) {
+        sync_particle_hair(hair, b_mesh, b_ob_info, true, motion_step);
+        free_object_to_mesh(b_data, b_ob_info, b_mesh);
+        return;
+      }
+    }
+  }
+
+  /* No deformation on this frame, copy coordinates if other frames did have it. */
+  hair->copy_center_to_motion_step(motion_step);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/device.cpp b/intern/cycles/blender/device.cpp
new file mode 100644
index 00000000000..9fabc33a96b
--- /dev/null
+++ b/intern/cycles/blender/device.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/device.h"
+#include "blender/session.h"
+#include "blender/util.h"
+
+#include "util/foreach.h"
+
+CCL_NAMESPACE_BEGIN
+
+enum ComputeDevice {
+  COMPUTE_DEVICE_CPU = 0,
+  COMPUTE_DEVICE_CUDA = 1,
+  COMPUTE_DEVICE_OPTIX = 3,
+  COMPUTE_DEVICE_HIP = 4,
+
+  COMPUTE_DEVICE_NUM
+};
+
+int blender_device_threads(BL::Scene &b_scene)
+{
+  BL::RenderSettings b_r = b_scene.render();
+
+  if (b_r.threads_mode() == BL::RenderSettings::threads_mode_FIXED)
+    return b_r.threads();
+  else
+    return 0;
+}
+
+DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scene, bool background)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  /* Find cycles preferences. */
+  PointerRNA cpreferences;
+  for (BL::Addon &b_addon : b_preferences.addons) {
+    if (b_addon.module() == "cycles") {
+      cpreferences = b_addon.preferences().ptr;
+      break;
+    }
+  }
+
+  /* Default to CPU device. */
+  DeviceInfo device = Device::available_devices(DEVICE_MASK_CPU).front();
+
+  if (BlenderSession::device_override != DEVICE_MASK_ALL) {
+    vector<DeviceInfo> devices = Device::available_devices(BlenderSession::device_override);
+
+    if (devices.empty()) {
+      device = Device::dummy_device("Found no Cycles device of the specified type");
+    }
+    else {
+      int threads = blender_device_threads(b_scene);
+      device = Device::get_multi_device(devices, threads, background);
+    }
+  }
+  else if (get_enum(cscene, "device") == 1) {
+    /* Test if we are using GPU devices. */
+    ComputeDevice compute_device = (ComputeDevice)get_enum(
+        cpreferences, "compute_device_type", COMPUTE_DEVICE_NUM, COMPUTE_DEVICE_CPU);
+
+    if (compute_device != COMPUTE_DEVICE_CPU) {
+      /* Query GPU devices with matching types. */
+      uint mask = DEVICE_MASK_CPU;
+      if (compute_device == COMPUTE_DEVICE_CUDA) {
+        mask |= DEVICE_MASK_CUDA;
+      }
+      else if (compute_device == COMPUTE_DEVICE_OPTIX) {
+        mask |= DEVICE_MASK_OPTIX;
+      }
+      else if (compute_device == COMPUTE_DEVICE_HIP) {
+        mask |= DEVICE_MASK_HIP;
+      }
+      vector<DeviceInfo> devices = Device::available_devices(mask);
+
+      /* Match device preferences and available devices. */
+      vector<DeviceInfo> used_devices;
+      RNA_BEGIN (&cpreferences, device, "devices") {
+        if (get_boolean(device, "use")) {
+          string id = get_string(device, "id");
+          foreach (DeviceInfo &info, devices) {
+            if (info.id == id) {
+              used_devices.push_back(info);
+              break;
+            }
+          }
+        }
+      }
+      RNA_END;
+
+      if (!used_devices.empty()) {
+        int threads = blender_device_threads(b_scene);
+        device = Device::get_multi_device(used_devices, threads, background);
+      }
+      /* Else keep using the CPU device that was set before. */
+    }
+  }
+
+  if (!get_boolean(cpreferences, "peer_memory")) {
+    device.has_peer_memory = false;
+  }
+
+  return device;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/device.h b/intern/cycles/blender/device.h
new file mode 100644
index 00000000000..8d2ecac7483
--- /dev/null
+++ b/intern/cycles/blender/device.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_DEVICE_H__
+#define __BLENDER_DEVICE_H__
+
+#include "MEM_guardedalloc.h"
+#include "RNA_access.h"
+#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
+
+#include "device/device.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Get number of threads to use for rendering. */
+int blender_device_threads(BL::Scene &b_scene);
+
+/* Convert Blender settings to device specification. */
+DeviceInfo blender_device_info(BL::Preferences &b_preferences,
+                               BL::Scene &b_scene,
+                               bool background);
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_DEVICE_H__ */
diff --git a/intern/cycles/blender/display_driver.cpp b/intern/cycles/blender/display_driver.cpp
new file mode 100644
index 00000000000..d5f6d85251e
--- /dev/null
+++ b/intern/cycles/blender/display_driver.cpp
@@ -0,0 +1,771 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/display_driver.h"
+
+#include "device/device.h"
+#include "util/log.h"
+#include "util/opengl.h"
+
+extern "C" {
+struct RenderEngine;
+
+bool RE_engine_has_render_context(struct RenderEngine *engine);
+void RE_engine_render_context_enable(struct RenderEngine *engine);
+void RE_engine_render_context_disable(struct RenderEngine *engine);
+
+bool DRW_opengl_context_release();
+void DRW_opengl_context_activate(bool drw_state);
+
+void *WM_opengl_context_create();
+void WM_opengl_context_activate(void *gl_context);
+void WM_opengl_context_dispose(void *gl_context);
+void WM_opengl_context_release(void *context);
+}
+
+CCL_NAMESPACE_BEGIN
+
+/* --------------------------------------------------------------------
+ * BlenderDisplayShader.
+ */
+
+unique_ptr<BlenderDisplayShader> BlenderDisplayShader::create(BL::RenderEngine &b_engine,
+                                                              BL::Scene &b_scene)
+{
+  if (b_engine.support_display_space_shader(b_scene)) {
+    return make_unique<BlenderDisplaySpaceShader>(b_engine, b_scene);
+  }
+
+  return make_unique<BlenderFallbackDisplayShader>();
+}
+
+int BlenderDisplayShader::get_position_attrib_location()
+{
+  if (position_attribute_location_ == -1) {
+    const uint shader_program = get_shader_program();
+    position_attribute_location_ = glGetAttribLocation(shader_program, position_attribute_name);
+  }
+  return position_attribute_location_;
+}
+
+int BlenderDisplayShader::get_tex_coord_attrib_location()
+{
+  if (tex_coord_attribute_location_ == -1) {
+    const uint shader_program = get_shader_program();
+    tex_coord_attribute_location_ = glGetAttribLocation(shader_program, tex_coord_attribute_name);
+  }
+  return tex_coord_attribute_location_;
+}
+
+/* --------------------------------------------------------------------
+ * BlenderFallbackDisplayShader.
+ */
+
+/* TODO move shaders to standalone .glsl file. */
+static const char *FALLBACK_VERTEX_SHADER =
+    "#version 330\n"
+    "uniform vec2 fullscreen;\n"
+    "in vec2 texCoord;\n"
+    "in vec2 pos;\n"
+    "out vec2 texCoord_interp;\n"
+    "\n"
+    "vec2 normalize_coordinates()\n"
+    "{\n"
+    "   return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
+    "}\n"
+    "\n"
+    "void main()\n"
+    "{\n"
+    "   gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
+    "   texCoord_interp = texCoord;\n"
+    "}\n\0";
+
+static const char *FALLBACK_FRAGMENT_SHADER =
+    "#version 330\n"
+    "uniform sampler2D image_texture;\n"
+    "in vec2 texCoord_interp;\n"
+    "out vec4 fragColor;\n"
+    "\n"
+    "void main()\n"
+    "{\n"
+    "   fragColor = texture(image_texture, texCoord_interp);\n"
+    "}\n\0";
+
+static void shader_print_errors(const char *task, const char *log, const char *code)
+{
+  LOG(ERROR) << "Shader: " << task << " error:";
+  LOG(ERROR) << "===== shader string ====";
+
+  stringstream stream(code);
+  string partial;
+
+  int line = 1;
+  while (getline(stream, partial, '\n')) {
+    if (line < 10) {
+      LOG(ERROR) << " " << line << " " << partial;
+    }
+    else {
+      LOG(ERROR) << line << " " << partial;
+    }
+    line++;
+  }
+  LOG(ERROR) << log;
+}
+
+static int compile_fallback_shader(void)
+{
+  const struct Shader {
+    const char *source;
+    const GLenum type;
+  } shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
+                  {FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
+
+  const GLuint program = glCreateProgram();
+
+  for (int i = 0; i < 2; i++) {
+    const GLuint shader = glCreateShader(shaders[i].type);
+
+    string source_str = shaders[i].source;
+    const char *c_str = source_str.c_str();
+
+    glShaderSource(shader, 1, &c_str, NULL);
+    glCompileShader(shader);
+
+    GLint compile_status;
+    glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
+
+    if (!compile_status) {
+      GLchar log[5000];
+      GLsizei length = 0;
+      glGetShaderInfoLog(shader, sizeof(log), &length, log);
+      shader_print_errors("compile", log, c_str);
+      return 0;
+    }
+
+    glAttachShader(program, shader);
+  }
+
+  /* Link output. */
+  glBindFragDataLocation(program, 0, "fragColor");
+
+  /* Link and error check. */
+  glLinkProgram(program);
+
+  /* TODO(sergey): Find a way to nicely de-duplicate the error checking. */
+  GLint link_status;
+  glGetProgramiv(program, GL_LINK_STATUS, &link_status);
+  if (!link_status) {
+    GLchar log[5000];
+    GLsizei length = 0;
+    /* TODO(sergey): Is it really program passed to glGetShaderInfoLog? */
+    glGetShaderInfoLog(program, sizeof(log), &length, log);
+    shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
+    shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
+    return 0;
+  }
+
+  return program;
+}
+
+void BlenderFallbackDisplayShader::bind(int width, int height)
+{
+  create_shader_if_needed();
+
+  if (!shader_program_) {
+    return;
+  }
+
+  glUseProgram(shader_program_);
+  glUniform1i(image_texture_location_, 0);
+  glUniform2f(fullscreen_location_, width, height);
+}
+
+void BlenderFallbackDisplayShader::unbind()
+{
+}
+
+uint BlenderFallbackDisplayShader::get_shader_program()
+{
+  return shader_program_;
+}
+
+void BlenderFallbackDisplayShader::create_shader_if_needed()
+{
+  if (shader_program_ || shader_compile_attempted_) {
+    return;
+  }
+
+  shader_compile_attempted_ = true;
+
+  shader_program_ = compile_fallback_shader();
+  if (!shader_program_) {
+    return;
+  }
+
+  glUseProgram(shader_program_);
+
+  image_texture_location_ = glGetUniformLocation(shader_program_, "image_texture");
+  if (image_texture_location_ < 0) {
+    LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
+    destroy_shader();
+    return;
+  }
+
+  fullscreen_location_ = glGetUniformLocation(shader_program_, "fullscreen");
+  if (fullscreen_location_ < 0) {
+    LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
+    destroy_shader();
+    return;
+  }
+}
+
+void BlenderFallbackDisplayShader::destroy_shader()
+{
+  glDeleteProgram(shader_program_);
+  shader_program_ = 0;
+}
+
+/* --------------------------------------------------------------------
+ * BlenderDisplaySpaceShader.
+ */
+
+BlenderDisplaySpaceShader::BlenderDisplaySpaceShader(BL::RenderEngine &b_engine,
+                                                     BL::Scene &b_scene)
+    : b_engine_(b_engine), b_scene_(b_scene)
+{
+  DCHECK(b_engine_.support_display_space_shader(b_scene_));
+}
+
+void BlenderDisplaySpaceShader::bind(int /*width*/, int /*height*/)
+{
+  b_engine_.bind_display_space_shader(b_scene_);
+}
+
+void BlenderDisplaySpaceShader::unbind()
+{
+  b_engine_.unbind_display_space_shader();
+}
+
+uint BlenderDisplaySpaceShader::get_shader_program()
+{
+  if (!shader_program_) {
+    glGetIntegerv(GL_CURRENT_PROGRAM, reinterpret_cast<int *>(&shader_program_));
+  }
+
+  if (!shader_program_) {
+    LOG(ERROR) << "Error retrieving shader program for display space shader.";
+  }
+
+  return shader_program_;
+}
+
+/* --------------------------------------------------------------------
+ * BlenderDisplayDriver.
+ */
+
+BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
+    : b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
+{
+  /* Create context while on the main thread. */
+  gl_context_create();
+}
+
+BlenderDisplayDriver::~BlenderDisplayDriver()
+{
+  gl_resources_destroy();
+}
+
+/* --------------------------------------------------------------------
+ * Update procedure.
+ */
+
+bool BlenderDisplayDriver::update_begin(const Params &params,
+                                        int texture_width,
+                                        int texture_height)
+{
+  /* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
+   * the texture does not happen at the same time. This is achieved indirectly.
+   *
+   * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
+   * This same lock is also held when do_draw() is called, which together ensure mutual
+   * exclusion.
+   *
+   * This locking is not performed on the Cycles side, because that would cause lock inversion. */
+  if (!gl_context_enable()) {
+    return false;
+  }
+
+  if (gl_render_sync_) {
+    glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
+  }
+
+  if (!gl_texture_resources_ensure()) {
+    gl_context_disable();
+    return false;
+  }
+
+  /* Update texture dimensions if needed. */
+  if (texture_.width != texture_width || texture_.height != texture_height) {
+    glActiveTexture(GL_TEXTURE0);
+    glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+    glTexImage2D(
+        GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0);
+    texture_.width = texture_width;
+    texture_.height = texture_height;
+    glBindTexture(GL_TEXTURE_2D, 0);
+
+    /* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to
+     * avoid undefined content. */
+    texture_.need_clear = true;
+  }
+
+  /* Update PBO dimensions if needed.
+   *
+   * NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
+   * at a resolution divider 1. This was we don't need to recreate graphics interoperability
+   * objects which are costly and which are tied to the specific underlying buffer size.
+   * The downside of this approach is that when graphics interoperability is not used we are
+   * sending too much data to GPU when resolution divider is not 1. */
+  /* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop
+   * mode faster. */
+  const int buffer_width = params.full_size.x;
+  const int buffer_height = params.full_size.y;
+  if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
+    const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
+    glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    texture_.buffer_width = buffer_width;
+    texture_.buffer_height = buffer_height;
+  }
+
+  /* New content will be provided to the texture in one way or another, so mark this in a
+   * centralized place. */
+  texture_.need_update = true;
+
+  texture_.params = params;
+
+  return true;
+}
+
+void BlenderDisplayDriver::update_end()
+{
+  gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+  glFlush();
+
+  gl_context_disable();
+}
+
+/* --------------------------------------------------------------------
+ * Texture buffer mapping.
+ */
+
+half4 *BlenderDisplayDriver::map_texture_buffer()
+{
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
+
+  half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
+      glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
+  if (!mapped_rgba_pixels) {
+    LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
+  }
+
+  if (texture_.need_clear) {
+    const int64_t texture_width = texture_.width;
+    const int64_t texture_height = texture_.height;
+    memset(reinterpret_cast<void *>(mapped_rgba_pixels),
+           0,
+           texture_width * texture_height * sizeof(half4));
+    texture_.need_clear = false;
+  }
+
+  return mapped_rgba_pixels;
+}
+
+void BlenderDisplayDriver::unmap_texture_buffer()
+{
+  glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+/* --------------------------------------------------------------------
+ * Graphics interoperability.
+ */
+
+BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
+{
+  GraphicsInterop interop_dst;
+
+  interop_dst.buffer_width = texture_.buffer_width;
+  interop_dst.buffer_height = texture_.buffer_height;
+  interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
+
+  interop_dst.need_clear = texture_.need_clear;
+  texture_.need_clear = false;
+
+  return interop_dst;
+}
+
+void BlenderDisplayDriver::graphics_interop_activate()
+{
+  gl_context_enable();
+}
+
+void BlenderDisplayDriver::graphics_interop_deactivate()
+{
+  gl_context_disable();
+}
+
+/* --------------------------------------------------------------------
+ * Drawing.
+ */
+
+void BlenderDisplayDriver::clear()
+{
+  texture_.need_clear = true;
+}
+
+void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
+{
+  zoom_ = make_float2(zoom_x, zoom_y);
+}
+
+void BlenderDisplayDriver::draw(const Params &params)
+{
+  /* See do_update_begin() for why no locking is required here. */
+  const bool transparent = true;  // TODO(sergey): Derive this from Film.
+
+  if (!gl_draw_resources_ensure()) {
+    return;
+  }
+
+  if (use_gl_context_) {
+    gl_context_mutex_.lock();
+  }
+
+  if (texture_.need_clear) {
+    /* Texture is requested to be cleared and was not yet cleared.
+     *
+     * Do early return which should be equivalent of drawing all-zero texture.
+     * Watch out for the lock though so that the clear happening during update is properly
+     * synchronized here. */
+    gl_context_mutex_.unlock();
+    return;
+  }
+
+  if (gl_upload_sync_) {
+    glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
+  }
+
+  if (transparent) {
+    glEnable(GL_BLEND);
+    glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+  }
+
+  display_shader_->bind(params.full_size.x, params.full_size.y);
+
+  glActiveTexture(GL_TEXTURE0);
+  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+
+  /* Trick to keep sharp rendering without jagged edges on all GPUs.
+   *
+   * The idea here is to enforce driver to use linear interpolation when the image is not zoomed
+   * in.
+   * For the render result with a resolution divider in effect we always use nearest interpolation.
+   *
+   * Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
+   * the zoom level 1. The MAG filter is always NEAREST. */
+  const float zoomed_width = params.size.x * zoom_.x;
+  const float zoomed_height = params.size.y * zoom_.y;
+  if (texture_.width != params.size.x || texture_.height != params.size.y) {
+    /* Resolution divider is different from 1, force nearest interpolation. */
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  }
+  else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  }
+  else {
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+  }
+
+  glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
+
+  texture_update_if_needed();
+  vertex_buffer_update(params);
+
+  /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
+  GLuint vertex_array_object;
+  glGenVertexArrays(1, &vertex_array_object);
+  glBindVertexArray(vertex_array_object);
+
+  const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location();
+  const int position_attribute = display_shader_->get_position_attrib_location();
+
+  glEnableVertexAttribArray(texcoord_attribute);
+  glEnableVertexAttribArray(position_attribute);
+
+  glVertexAttribPointer(
+      texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+  glVertexAttribPointer(position_attribute,
+                        2,
+                        GL_FLOAT,
+                        GL_FALSE,
+                        4 * sizeof(float),
+                        (const GLvoid *)(sizeof(float) * 2));
+
+  glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+  glBindBuffer(GL_ARRAY_BUFFER, 0);
+  glBindTexture(GL_TEXTURE_2D, 0);
+
+  glDeleteVertexArrays(1, &vertex_array_object);
+
+  display_shader_->unbind();
+
+  if (transparent) {
+    glDisable(GL_BLEND);
+  }
+
+  gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+  glFlush();
+
+  if (use_gl_context_) {
+    gl_context_mutex_.unlock();
+  }
+}
+
+void BlenderDisplayDriver::gl_context_create()
+{
+  /* When rendering in viewport there is no render context available via engine.
+   * Check whether own context is to be created here.
+   *
+   * NOTE: If the `b_engine_`'s context is not available, we are expected to be on a main thread
+   * here. */
+  use_gl_context_ = !RE_engine_has_render_context(
+      reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
+
+  if (use_gl_context_) {
+    const bool drw_state = DRW_opengl_context_release();
+    gl_context_ = WM_opengl_context_create();
+    if (gl_context_) {
+      /* On Windows an old context is restored after creation, and subsequent release of context
+       * generates a Win32 error. Harmless for users, but annoying to have possible misleading
+       * error prints in the console. */
+#ifndef _WIN32
+      WM_opengl_context_release(gl_context_);
+#endif
+    }
+    else {
+      LOG(ERROR) << "Error creating OpenGL context.";
+    }
+
+    DRW_opengl_context_activate(drw_state);
+  }
+}
+
+bool BlenderDisplayDriver::gl_context_enable()
+{
+  if (use_gl_context_) {
+    if (!gl_context_) {
+      return false;
+    }
+    gl_context_mutex_.lock();
+    WM_opengl_context_activate(gl_context_);
+    return true;
+  }
+
+  RE_engine_render_context_enable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
+  return true;
+}
+
+void BlenderDisplayDriver::gl_context_disable()
+{
+  if (use_gl_context_) {
+    if (gl_context_) {
+      WM_opengl_context_release(gl_context_);
+      gl_context_mutex_.unlock();
+    }
+    return;
+  }
+
+  RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
+}
+
+void BlenderDisplayDriver::gl_context_dispose()
+{
+  if (gl_context_) {
+    const bool drw_state = DRW_opengl_context_release();
+
+    WM_opengl_context_activate(gl_context_);
+    WM_opengl_context_dispose(gl_context_);
+
+    DRW_opengl_context_activate(drw_state);
+  }
+}
+
+bool BlenderDisplayDriver::gl_draw_resources_ensure()
+{
+  if (!texture_.gl_id) {
+    /* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
+     * can not continue. Note that this is not an unrecoverable error, so once the texture is known
+     * we will come back here and create all the GPU resources needed for draw. */
+    return false;
+  }
+
+  if (gl_draw_resource_creation_attempted_) {
+    return gl_draw_resources_created_;
+  }
+  gl_draw_resource_creation_attempted_ = true;
+
+  if (!vertex_buffer_) {
+    glGenBuffers(1, &vertex_buffer_);
+    if (!vertex_buffer_) {
+      LOG(ERROR) << "Error creating vertex buffer.";
+      return false;
+    }
+  }
+
+  gl_draw_resources_created_ = true;
+
+  return true;
+}
+
+void BlenderDisplayDriver::gl_resources_destroy()
+{
+  gl_context_enable();
+
+  if (vertex_buffer_ != 0) {
+    glDeleteBuffers(1, &vertex_buffer_);
+  }
+
+  if (texture_.gl_pbo_id) {
+    glDeleteBuffers(1, &texture_.gl_pbo_id);
+    texture_.gl_pbo_id = 0;
+  }
+
+  if (texture_.gl_id) {
+    glDeleteTextures(1, &texture_.gl_id);
+    texture_.gl_id = 0;
+  }
+
+  gl_context_disable();
+
+  gl_context_dispose();
+}
+
+bool BlenderDisplayDriver::gl_texture_resources_ensure()
+{
+  if (texture_.creation_attempted) {
+    return texture_.is_created;
+  }
+  texture_.creation_attempted = true;
+
+  DCHECK(!texture_.gl_id);
+  DCHECK(!texture_.gl_pbo_id);
+
+  /* Create texture. */
+  glGenTextures(1, &texture_.gl_id);
+  if (!texture_.gl_id) {
+    LOG(ERROR) << "Error creating texture.";
+    return false;
+  }
+
+  /* Configure the texture. */
+  glActiveTexture(GL_TEXTURE0);
+  glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+  glBindTexture(GL_TEXTURE_2D, 0);
+
+  /* Create PBO for the texture. */
+  glGenBuffers(1, &texture_.gl_pbo_id);
+  if (!texture_.gl_pbo_id) {
+    LOG(ERROR) << "Error creating texture pixel buffer object.";
+    return false;
+  }
+
+  /* Creation finished with a success. */
+  texture_.is_created = true;
+
+  return true;
+}
+
+void BlenderDisplayDriver::texture_update_if_needed()
+{
+  if (!texture_.need_update) {
+    return;
+  }
+
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
+  glTexSubImage2D(
+      GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0);
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+  texture_.need_update = false;
+}
+
+void BlenderDisplayDriver::vertex_buffer_update(const Params & /*params*/)
+{
+  /* Draw at the parameters for which the texture has been updated for. This allows to always draw
+   * texture during bordered-rendered camera view without flickering. The validness of the display
+   * parameters for a texture is guaranteed by the initial "clear" state which makes drawing to
+   * have an early output.
+   *
+   * Such approach can cause some extra "jelly" effect during panning, but it is not more jelly
+   * than overlay of selected objects. Also, it's possible to redraw texture at an intersection of
+   * the texture draw parameters and the latest updated draw parameters (although, complexity of
+   * doing it might not worth it. */
+  const int x = texture_.params.full_offset.x;
+  const int y = texture_.params.full_offset.y;
+
+  const int width = texture_.params.size.x;
+  const int height = texture_.params.size.y;
+
+  /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
+   * rendered. */
+  glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+  float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
+  if (!vpointer) {
+    return;
+  }
+
+  vpointer[0] = 0.0f;
+  vpointer[1] = 0.0f;
+  vpointer[2] = x;
+  vpointer[3] = y;
+
+  vpointer[4] = 1.0f;
+  vpointer[5] = 0.0f;
+  vpointer[6] = x + width;
+  vpointer[7] = y;
+
+  vpointer[8] = 1.0f;
+  vpointer[9] = 1.0f;
+  vpointer[10] = x + width;
+  vpointer[11] = y + height;
+
+  vpointer[12] = 0.0f;
+  vpointer[13] = 1.0f;
+  vpointer[14] = x;
+  vpointer[15] = y + height;
+
+  glUnmapBuffer(GL_ARRAY_BUFFER);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/display_driver.h b/intern/cycles/blender/display_driver.h
new file mode 100644
index 00000000000..66cfc8cffcc
--- /dev/null
+++ b/intern/cycles/blender/display_driver.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <atomic>
+
+#include "MEM_guardedalloc.h"
+
+#include "RNA_blender_cpp.h"
+
+#include "session/display_driver.h"
+
+#include "util/thread.h"
+#include "util/unique_ptr.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Base class of shader used for display driver rendering. */
+class BlenderDisplayShader {
+ public:
+  static constexpr const char *position_attribute_name = "pos";
+  static constexpr const char *tex_coord_attribute_name = "texCoord";
+
+  /* Create shader implementation suitable for the given render engine and scene configuration. */
+  static unique_ptr<BlenderDisplayShader> create(BL::RenderEngine &b_engine, BL::Scene &b_scene);
+
+  BlenderDisplayShader() = default;
+  virtual ~BlenderDisplayShader() = default;
+
+  virtual void bind(int width, int height) = 0;
+  virtual void unbind() = 0;
+
+  /* Get attribute location for position and texture coordinate respectively.
+   * NOTE: The shader needs to be bound to have access to those. */
+  virtual int get_position_attrib_location();
+  virtual int get_tex_coord_attrib_location();
+
+ protected:
+  /* Get program of this display shader.
+   * NOTE: The shader needs to be bound to have access to this. */
+  virtual uint get_shader_program() = 0;
+
+  /* Cached values of various OpenGL resources. */
+  int position_attribute_location_ = -1;
+  int tex_coord_attribute_location_ = -1;
+};
+
+/* Implementation of display rendering shader used in the case when render engine does not support
+ * display space shader. */
+class BlenderFallbackDisplayShader : public BlenderDisplayShader {
+ public:
+  virtual void bind(int width, int height) override;
+  virtual void unbind() override;
+
+ protected:
+  virtual uint get_shader_program() override;
+
+  void create_shader_if_needed();
+  void destroy_shader();
+
+  uint shader_program_ = 0;
+  int image_texture_location_ = -1;
+  int fullscreen_location_ = -1;
+
+  /* Shader compilation attempted. Which means, that if the shader program is 0 then compilation or
+   * linking has failed. Do not attempt to re-compile the shader. */
+  bool shader_compile_attempted_ = false;
+};
+
+class BlenderDisplaySpaceShader : public BlenderDisplayShader {
+ public:
+  BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);
+
+  virtual void bind(int width, int height) override;
+  virtual void unbind() override;
+
+ protected:
+  virtual uint get_shader_program() override;
+
+  BL::RenderEngine b_engine_;
+  BL::Scene &b_scene_;
+
+  /* Cached values of various OpenGL resources. */
+  uint shader_program_ = 0;
+};
+
+/* Display driver implementation which is specific for Blender viewport integration. */
+class BlenderDisplayDriver : public DisplayDriver {
+ public:
+  BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
+  ~BlenderDisplayDriver();
+
+  virtual void graphics_interop_activate() override;
+  virtual void graphics_interop_deactivate() override;
+
+  virtual void clear() override;
+
+  void set_zoom(float zoom_x, float zoom_y);
+
+ protected:
+  virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
+  virtual void update_end() override;
+
+  virtual half4 *map_texture_buffer() override;
+  virtual void unmap_texture_buffer() override;
+
+  virtual GraphicsInterop graphics_interop_get() override;
+
+  virtual void draw(const Params &params) override;
+
+  /* Helper function which allocates new GPU context. */
+  void gl_context_create();
+  bool gl_context_enable();
+  void gl_context_disable();
+  void gl_context_dispose();
+
+  /* Make sure texture is allocated and its initial configuration is performed. */
+  bool gl_texture_resources_ensure();
+
+  /* Ensure all runtime GPU resources needed for drawing are allocated.
+   * Returns true if all resources needed for drawing are available. */
+  bool gl_draw_resources_ensure();
+
+  /* Destroy all GPU resources which are being used by this object. */
+  void gl_resources_destroy();
+
+  /* Update GPU texture dimensions and content if needed (new pixel data was provided).
+   *
+   * NOTE: The texture needs to be bound. */
+  void texture_update_if_needed();
+
+  /* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
+   * This buffer is used to render texture in the viewport.
+   *
+   * NOTE: The buffer needs to be bound. */
+  void vertex_buffer_update(const Params &params);
+
+  BL::RenderEngine b_engine_;
+
+  /* OpenGL context which is used the render engine doesn't have its own. */
+  void *gl_context_ = nullptr;
+  /* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
+   * its own context. */
+  bool use_gl_context_ = false;
+  /* Mutex used to guard the `gl_context_`. */
+  thread_mutex gl_context_mutex_;
+
+  /* Texture which contains pixels of the render result. */
+  struct {
+    /* Indicates whether texture creation was attempted and succeeded.
+     * Used to avoid multiple attempts of texture creation on GPU issues or GPU context
+     * misconfiguration. */
+    bool creation_attempted = false;
+    bool is_created = false;
+
+    /* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write
+     * pixels to it.
+     *
+     * NOTE: Allocated on the engine's context. */
+    uint gl_id = 0;
+    uint gl_pbo_id = 0;
+
+    /* Is true when new data was written to the PBO, meaning, the texture might need to be resized
+     * and new data is to be uploaded to the GPU. */
+    bool need_update = false;
+
+    /* Content of the texture is to be filled with zeroes. */
+    std::atomic<bool> need_clear = true;
+
+    /* Dimensions of the texture in pixels. */
+    int width = 0;
+    int height = 0;
+
+    /* Dimensions of the underlying PBO. */
+    int buffer_width = 0;
+    int buffer_height = 0;
+
+    /* Display parameters the texture has been updated for. */
+    Params params;
+  } texture_;
+
+  unique_ptr<BlenderDisplayShader> display_shader_;
+
+  /* Special track of whether GPU resources were attempted to be created, to avoid attempts of
+   * their re-creation on failure on every redraw. */
+  bool gl_draw_resource_creation_attempted_ = false;
+  bool gl_draw_resources_created_ = false;
+
+  /* Vertex buffer which hold vertices of a triangle fan which is textures with the texture
+   * holding the render result. */
+  uint vertex_buffer_ = 0;
+
+  void *gl_render_sync_ = nullptr;
+  void *gl_upload_sync_ = nullptr;
+
+  float2 zoom_ = make_float2(1.0f, 1.0f);
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/geometry.cpp b/intern/cycles/blender/geometry.cpp
new file mode 100644
index 00000000000..479e76f68bc
--- /dev/null
+++ b/intern/cycles/blender/geometry.cpp
@@ -0,0 +1,241 @@
+
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/curves.h"
+#include "scene/hair.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/volume.h"
+
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "util/foreach.h"
+#include "util/task.h"
+
+CCL_NAMESPACE_BEGIN
+
+static Geometry::Type determine_geom_type(BObjectInfo &b_ob_info, bool use_particle_hair)
+{
+  if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
+    return Geometry::HAIR;
+  }
+
+  if (b_ob_info.object_data.is_a(&RNA_Volume) ||
+      (b_ob_info.object_data == b_ob_info.real_object.data() &&
+       object_fluid_gas_domain_find(b_ob_info.real_object))) {
+    return Geometry::VOLUME;
+  }
+
+  return Geometry::MESH;
+}
+
+array<Node *> BlenderSync::find_used_shaders(BL::Object &b_ob)
+{
+  BL::Material material_override = view_layer.material_override;
+  Shader *default_shader = (b_ob.type() == BL::Object::type_VOLUME) ? scene->default_volume :
+                                                                      scene->default_surface;
+
+  array<Node *> used_shaders;
+
+  for (BL::MaterialSlot &b_slot : b_ob.material_slots) {
+    if (material_override) {
+      find_shader(material_override, used_shaders, default_shader);
+    }
+    else {
+      BL::ID b_material(b_slot.material());
+      find_shader(b_material, used_shaders, default_shader);
+    }
+  }
+
+  if (used_shaders.size() == 0) {
+    if (material_override)
+      find_shader(material_override, used_shaders, default_shader);
+    else
+      used_shaders.push_back_slow(default_shader);
+  }
+
+  return used_shaders;
+}
+
+Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
+                                     BObjectInfo &b_ob_info,
+                                     bool object_updated,
+                                     bool use_particle_hair,
+                                     TaskPool *task_pool)
+{
+  /* Test if we can instance or if the object is modified. */
+  Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
+  BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
+                     BKE_object_is_modified(b_ob_info.real_object)) ?
+                        b_ob_info.real_object :
+                        b_ob_info.object_data;
+  GeometryKey key(b_key_id.ptr.data, geom_type);
+
+  /* Find shader indices. */
+  array<Node *> used_shaders = find_used_shaders(b_ob_info.iter_object);
+
+  /* Ensure we only sync instanced geometry once. */
+  Geometry *geom = geometry_map.find(key);
+  if (geom) {
+    if (geometry_synced.find(geom) != geometry_synced.end()) {
+      return geom;
+    }
+  }
+
+  /* Test if we need to sync. */
+  bool sync = true;
+  if (geom == NULL) {
+    /* Add new geometry if it did not exist yet. */
+    if (geom_type == Geometry::HAIR) {
+      geom = scene->create_node<Hair>();
+    }
+    else if (geom_type == Geometry::VOLUME) {
+      geom = scene->create_node<Volume>();
+    }
+    else {
+      geom = scene->create_node<Mesh>();
+    }
+    geometry_map.add(key, geom);
+  }
+  else {
+    /* Test if we need to update existing geometry. */
+    sync = geometry_map.update(geom, b_key_id);
+  }
+
+  if (!sync) {
+    /* If transform was applied to geometry, need full update. */
+    if (object_updated && geom->transform_applied) {
+      ;
+    }
+    /* Test if shaders changed, these can be object level so geometry
+     * does not get tagged for recalc. */
+    else if (geom->get_used_shaders() != used_shaders) {
+      ;
+    }
+    else {
+      /* Even if not tagged for recalc, we may need to sync anyway
+       * because the shader needs different geometry attributes. */
+      bool attribute_recalc = false;
+
+      foreach (Node *node, geom->get_used_shaders()) {
+        Shader *shader = static_cast<Shader *>(node);
+        if (shader->need_update_geometry()) {
+          attribute_recalc = true;
+        }
+      }
+
+      if (!attribute_recalc) {
+        return geom;
+      }
+    }
+  }
+
+  geometry_synced.insert(geom);
+
+  geom->name = ustring(b_ob_info.object_data.name().c_str());
+
+  /* Store the shaders immediately for the object attribute code. */
+  geom->set_used_shaders(used_shaders);
+
+  auto sync_func = [=]() mutable {
+    if (progress.get_cancel())
+      return;
+
+    progress.set_sync_status("Synchronizing object", b_ob_info.real_object.name());
+
+    if (geom_type == Geometry::HAIR) {
+      Hair *hair = static_cast<Hair *>(geom);
+      sync_hair(b_depsgraph, b_ob_info, hair);
+    }
+    else if (geom_type == Geometry::VOLUME) {
+      Volume *volume = static_cast<Volume *>(geom);
+      sync_volume(b_ob_info, volume);
+    }
+    else {
+      Mesh *mesh = static_cast<Mesh *>(geom);
+      sync_mesh(b_depsgraph, b_ob_info, mesh);
+    }
+  };
+
+  /* Defer the actual geometry sync to the task_pool for multithreading */
+  if (task_pool) {
+    task_pool->push(sync_func);
+  }
+  else {
+    sync_func();
+  }
+
+  return geom;
+}
+
+void BlenderSync::sync_geometry_motion(BL::Depsgraph &b_depsgraph,
+                                       BObjectInfo &b_ob_info,
+                                       Object *object,
+                                       float motion_time,
+                                       bool use_particle_hair,
+                                       TaskPool *task_pool)
+{
+  /* Ensure we only sync instanced geometry once. */
+  Geometry *geom = object->get_geometry();
+
+  if (geometry_motion_synced.find(geom) != geometry_motion_synced.end() ||
+      geometry_motion_attribute_synced.find(geom) != geometry_motion_attribute_synced.end()) {
+    return;
+  }
+
+  geometry_motion_synced.insert(geom);
+
+  /* Ensure we only motion sync geometry that also had geometry synced, to avoid
+   * unnecessary work and to ensure that its attributes were clear. */
+  if (geometry_synced.find(geom) == geometry_synced.end())
+    return;
+
+  /* Find time matching motion step required by geometry. */
+  int motion_step = geom->motion_step(motion_time);
+  if (motion_step < 0) {
+    return;
+  }
+
+  auto sync_func = [=]() mutable {
+    if (progress.get_cancel())
+      return;
+
+    if (b_ob_info.object_data.is_a(&RNA_Hair) || use_particle_hair) {
+      Hair *hair = static_cast<Hair *>(geom);
+      sync_hair_motion(b_depsgraph, b_ob_info, hair, motion_step);
+    }
+    else if (b_ob_info.object_data.is_a(&RNA_Volume) ||
+             object_fluid_gas_domain_find(b_ob_info.real_object)) {
+      /* No volume motion blur support yet. */
+    }
+    else {
+      Mesh *mesh = static_cast<Mesh *>(geom);
+      sync_mesh_motion(b_depsgraph, b_ob_info, mesh, motion_step);
+    }
+  };
+
+  /* Defer the actual geometry sync to the task_pool for multithreading */
+  if (task_pool) {
+    task_pool->push(sync_func);
+  }
+  else {
+    sync_func();
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/id_map.h b/intern/cycles/blender/id_map.h
new file mode 100644
index 00000000000..c1b800026c3
--- /dev/null
+++ b/intern/cycles/blender/id_map.h
@@ -0,0 +1,295 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_ID_MAP_H__
+#define __BLENDER_ID_MAP_H__
+
+#include <string.h>
+
+#include "scene/geometry.h"
+#include "scene/scene.h"
+
+#include "util/map.h"
+#include "util/set.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* ID Map
+ *
+ * Utility class to map between Blender datablocks and Cycles data structures,
+ * and keep track of recalc tags from the dependency graph. */
+
+template<typename K, typename T> class id_map {
+ public:
+  id_map(Scene *scene_) : scene(scene_)
+  {
+  }
+
+  ~id_map()
+  {
+    set<T *> nodes;
+
+    typename map<K, T *>::iterator jt;
+    for (jt = b_map.begin(); jt != b_map.end(); jt++) {
+      nodes.insert(jt->second);
+    }
+
+    scene->delete_nodes(nodes);
+  }
+
+  T *find(const BL::ID &id)
+  {
+    return find(id.ptr.owner_id);
+  }
+
+  T *find(const K &key)
+  {
+    if (b_map.find(key) != b_map.end()) {
+      T *data = b_map[key];
+      return data;
+    }
+
+    return NULL;
+  }
+
+  void set_recalc(const BL::ID &id)
+  {
+    b_recalc.insert(id.ptr.data);
+  }
+
+  void set_recalc(void *id_ptr)
+  {
+    b_recalc.insert(id_ptr);
+  }
+
+  bool has_recalc()
+  {
+    return !(b_recalc.empty());
+  }
+
+  void pre_sync()
+  {
+    used_set.clear();
+  }
+
+  /* Add new data. */
+  void add(const K &key, T *data)
+  {
+    assert(find(key) == NULL);
+    b_map[key] = data;
+    used(data);
+  }
+
+  /* Update existing data. */
+  bool update(T *data, const BL::ID &id)
+  {
+    return update(data, id, id);
+  }
+  bool update(T *data, const BL::ID &id, const BL::ID &parent)
+  {
+    bool recalc = (b_recalc.find(id.ptr.data) != b_recalc.end());
+    if (parent.ptr.data && parent.ptr.data != id.ptr.data) {
+      recalc = recalc || (b_recalc.find(parent.ptr.data) != b_recalc.end());
+    }
+    used(data);
+    return recalc;
+  }
+
+  /* Combined add and update as needed. */
+  bool add_or_update(T **r_data, const BL::ID &id)
+  {
+    return add_or_update(r_data, id, id, id.ptr.owner_id);
+  }
+  bool add_or_update(T **r_data, const BL::ID &id, const K &key)
+  {
+    return add_or_update(r_data, id, id, key);
+  }
+  bool add_or_update(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
+  {
+    T *data = find(key);
+    bool recalc;
+
+    if (!data) {
+      /* Add data if it didn't exist yet. */
+      data = scene->create_node<T>();
+      add(key, data);
+      recalc = true;
+    }
+    else {
+      /* check if updated needed. */
+      recalc = update(data, id, parent);
+    }
+
+    *r_data = data;
+    return recalc;
+  }
+
+  /* Combined add or update for convenience. */
+
+  bool is_used(const K &key)
+  {
+    T *data = find(key);
+    return (data) ? used_set.find(data) != used_set.end() : false;
+  }
+
+  void used(T *data)
+  {
+    /* tag data as still in use */
+    used_set.insert(data);
+  }
+
+  void set_default(T *data)
+  {
+    b_map[NULL] = data;
+  }
+
+  void post_sync(bool do_delete = true)
+  {
+    map<K, T *> new_map;
+    typedef pair<const K, T *> TMapPair;
+    typename map<K, T *>::iterator jt;
+
+    for (jt = b_map.begin(); jt != b_map.end(); jt++) {
+      TMapPair &pair = *jt;
+
+      if (do_delete && used_set.find(pair.second) == used_set.end()) {
+        scene->delete_node(pair.second);
+      }
+      else {
+        new_map[pair.first] = pair.second;
+      }
+    }
+
+    used_set.clear();
+    b_recalc.clear();
+    b_map = new_map;
+  }
+
+  const map<K, T *> &key_to_scene_data()
+  {
+    return b_map;
+  }
+
+ protected:
+  map<K, T *> b_map;
+  set<T *> used_set;
+  set<void *> b_recalc;
+  Scene *scene;
+};
+
+/* Object Key
+ *
+ * To uniquely identify instances, we use the parent, object and persistent instance ID.
+ * We also export separate object for a mesh and its particle hair. */
+
+enum { OBJECT_PERSISTENT_ID_SIZE = 8 /* MAX_DUPLI_RECUR in Blender. */ };
+
+struct ObjectKey {
+  void *parent;
+  int id[OBJECT_PERSISTENT_ID_SIZE];
+  void *ob;
+  bool use_particle_hair;
+
+  ObjectKey(void *parent_, int id_[OBJECT_PERSISTENT_ID_SIZE], void *ob_, bool use_particle_hair_)
+      : parent(parent_), ob(ob_), use_particle_hair(use_particle_hair_)
+  {
+    if (id_)
+      memcpy(id, id_, sizeof(id));
+    else
+      memset(id, 0, sizeof(id));
+  }
+
+  bool operator<(const ObjectKey &k) const
+  {
+    if (ob < k.ob) {
+      return true;
+    }
+    else if (ob == k.ob) {
+      if (parent < k.parent) {
+        return true;
+      }
+      else if (parent == k.parent) {
+        if (use_particle_hair < k.use_particle_hair) {
+          return true;
+        }
+        else if (use_particle_hair == k.use_particle_hair) {
+          return memcmp(id, k.id, sizeof(id)) < 0;
+        }
+      }
+    }
+
+    return false;
+  }
+};
+
+/* Geometry Key
+ *
+ * We export separate geometry for a mesh and its particle hair, so key needs to
+ * distinguish between them. */
+
+struct GeometryKey {
+  void *id;
+  Geometry::Type geometry_type;
+
+  GeometryKey(void *id, Geometry::Type geometry_type) : id(id), geometry_type(geometry_type)
+  {
+  }
+
+  bool operator<(const GeometryKey &k) const
+  {
+    if (id < k.id) {
+      return true;
+    }
+    else if (id == k.id) {
+      if (geometry_type < k.geometry_type) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+};
+
+/* Particle System Key */
+
+struct ParticleSystemKey {
+  void *ob;
+  int id[OBJECT_PERSISTENT_ID_SIZE];
+
+  ParticleSystemKey(void *ob_, int id_[OBJECT_PERSISTENT_ID_SIZE]) : ob(ob_)
+  {
+    if (id_)
+      memcpy(id, id_, sizeof(id));
+    else
+      memset(id, 0, sizeof(id));
+  }
+
+  bool operator<(const ParticleSystemKey &k) const
+  {
+    /* first id is particle index, we don't compare that */
+    if (ob < k.ob)
+      return true;
+    else if (ob == k.ob)
+      return memcmp(id + 1, k.id + 1, sizeof(int) * (OBJECT_PERSISTENT_ID_SIZE - 1)) < 0;
+
+    return false;
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_ID_MAP_H__ */
diff --git a/intern/cycles/blender/image.cpp b/intern/cycles/blender/image.cpp
new file mode 100644
index 00000000000..3ea3a47c1f4
--- /dev/null
+++ b/intern/cycles/blender/image.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "blender/image.h"
+#include "blender/session.h"
+#include "blender/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Packed Images */
+
+BlenderImageLoader::BlenderImageLoader(BL::Image b_image, int frame)
+    : b_image(b_image), frame(frame), free_cache(!b_image.has_data())
+{
+}
+
+bool BlenderImageLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata)
+{
+  metadata.width = b_image.size()[0];
+  metadata.height = b_image.size()[1];
+  metadata.depth = 1;
+  metadata.channels = b_image.channels();
+
+  if (b_image.is_float()) {
+    if (metadata.channels == 1) {
+      metadata.type = IMAGE_DATA_TYPE_FLOAT;
+    }
+    else if (metadata.channels == 4) {
+      metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+    }
+    else {
+      return false;
+    }
+
+    /* Float images are already converted on the Blender side,
+     * no need to do anything in Cycles. */
+    metadata.colorspace = u_colorspace_raw;
+  }
+  else {
+    if (metadata.channels == 1) {
+      metadata.type = IMAGE_DATA_TYPE_BYTE;
+    }
+    else if (metadata.channels == 4) {
+      metadata.type = IMAGE_DATA_TYPE_BYTE4;
+    }
+    else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool BlenderImageLoader::load_pixels(const ImageMetaData &metadata,
+                                     void *pixels,
+                                     const size_t pixels_size,
+                                     const bool associate_alpha)
+{
+  const size_t num_pixels = ((size_t)metadata.width) * metadata.height;
+  const int channels = metadata.channels;
+  const int tile = 0; /* TODO(lukas): Support tiles here? */
+
+  if (b_image.is_float()) {
+    /* image data */
+    float *image_pixels;
+    image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+
+    if (image_pixels && num_pixels * channels == pixels_size) {
+      memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+    }
+    else {
+      if (channels == 1) {
+        memset(pixels, 0, num_pixels * sizeof(float));
+      }
+      else {
+        const size_t num_pixels_safe = pixels_size / channels;
+        float *fp = (float *)pixels;
+        for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
+          fp[0] = 1.0f;
+          fp[1] = 0.0f;
+          fp[2] = 1.0f;
+          if (channels == 4) {
+            fp[3] = 1.0f;
+          }
+        }
+      }
+    }
+
+    if (image_pixels) {
+      MEM_freeN(image_pixels);
+    }
+  }
+  else {
+    unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+
+    if (image_pixels && num_pixels * channels == pixels_size) {
+      memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+    }
+    else {
+      if (channels == 1) {
+        memset(pixels, 0, pixels_size * sizeof(unsigned char));
+      }
+      else {
+        const size_t num_pixels_safe = pixels_size / channels;
+        unsigned char *cp = (unsigned char *)pixels;
+        for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
+          cp[0] = 255;
+          cp[1] = 0;
+          cp[2] = 255;
+          if (channels == 4) {
+            cp[3] = 255;
+          }
+        }
+      }
+    }
+
+    if (image_pixels) {
+      MEM_freeN(image_pixels);
+    }
+
+    if (associate_alpha) {
+      /* Premultiply, byte images are always straight for Blender. */
+      unsigned char *cp = (unsigned char *)pixels;
+      for (size_t i = 0; i < num_pixels; i++, cp += channels) {
+        cp[0] = (cp[0] * cp[3]) / 255;
+        cp[1] = (cp[1] * cp[3]) / 255;
+        cp[2] = (cp[2] * cp[3]) / 255;
+      }
+    }
+  }
+
+  /* Free image buffers to save memory during render. */
+  if (free_cache) {
+    b_image.buffers_free();
+  }
+
+  return true;
+}
+
+string BlenderImageLoader::name() const
+{
+  return BL::Image(b_image).name();
+}
+
+bool BlenderImageLoader::equals(const ImageLoader &other) const
+{
+  const BlenderImageLoader &other_loader = (const BlenderImageLoader &)other;
+  return b_image == other_loader.b_image && frame == other_loader.frame;
+}
+
+/* Point Density */
+
+BlenderPointDensityLoader::BlenderPointDensityLoader(BL::Depsgraph b_depsgraph,
+                                                     BL::ShaderNodeTexPointDensity b_node)
+    : b_depsgraph(b_depsgraph), b_node(b_node)
+{
+}
+
+bool BlenderPointDensityLoader::load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata)
+{
+  metadata.channels = 4;
+  metadata.width = b_node.resolution();
+  metadata.height = metadata.width;
+  metadata.depth = metadata.width;
+  metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+  return true;
+}
+
+bool BlenderPointDensityLoader::load_pixels(const ImageMetaData &,
+                                            void *pixels,
+                                            const size_t,
+                                            const bool)
+{
+  int length;
+  b_node.calc_point_density(b_depsgraph, &length, (float **)&pixels);
+  return true;
+}
+
+void BlenderSession::builtin_images_load()
+{
+  /* Force builtin images to be loaded along with Blender data sync. This
+   * is needed because we may be reading from depsgraph evaluated data which
+   * can be freed by Blender before Cycles reads it.
+   *
+   * TODO: the assumption that no further access to builtin image data will
+   * happen is really weak, and likely to break in the future. We should find
+   * a better solution to hand over the data directly to the image manager
+   * instead of through callbacks whose timing is difficult to control. */
+  ImageManager *manager = session->scene->image_manager;
+  Device *device = session->device;
+  manager->device_load_builtin(device, session->scene, session->progress);
+}
+
+string BlenderPointDensityLoader::name() const
+{
+  return BL::ShaderNodeTexPointDensity(b_node).name();
+}
+
+bool BlenderPointDensityLoader::equals(const ImageLoader &other) const
+{
+  const BlenderPointDensityLoader &other_loader = (const BlenderPointDensityLoader &)other;
+  return b_node == other_loader.b_node && b_depsgraph == other_loader.b_depsgraph;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/image.h b/intern/cycles/blender/image.h
new file mode 100644
index 00000000000..6f1e72c21af
--- /dev/null
+++ b/intern/cycles/blender/image.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_IMAGE_H__
+#define __BLENDER_IMAGE_H__
+
+#include "RNA_blender_cpp.h"
+
+#include "scene/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderImageLoader : public ImageLoader {
+ public:
+  BlenderImageLoader(BL::Image b_image, int frame);
+
+  bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
+  bool load_pixels(const ImageMetaData &metadata,
+                   void *pixels,
+                   const size_t pixels_size,
+                   const bool associate_alpha) override;
+  string name() const override;
+  bool equals(const ImageLoader &other) const override;
+
+  BL::Image b_image;
+  int frame;
+  bool free_cache;
+};
+
+class BlenderPointDensityLoader : public ImageLoader {
+ public:
+  BlenderPointDensityLoader(BL::Depsgraph depsgraph, BL::ShaderNodeTexPointDensity b_node);
+
+  bool load_metadata(const ImageDeviceFeatures &features, ImageMetaData &metadata) override;
+  bool load_pixels(const ImageMetaData &metadata,
+                   void *pixels,
+                   const size_t pixels_size,
+                   const bool associate_alpha) override;
+  string name() const override;
+  bool equals(const ImageLoader &other) const override;
+
+  BL::Depsgraph b_depsgraph;
+  BL::ShaderNodeTexPointDensity b_node;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_IMAGE_H__ */
diff --git a/intern/cycles/blender/light.cpp b/intern/cycles/blender/light.cpp
new file mode 100644
index 00000000000..1e4cc0f1d14
--- /dev/null
+++ b/intern/cycles/blender/light.cpp
@@ -0,0 +1,205 @@
+
+
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/light.h"
+
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "util/hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+void BlenderSync::sync_light(BL::Object &b_parent,
+                             int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
+                             BObjectInfo &b_ob_info,
+                             int random_id,
+                             Transform &tfm,
+                             bool *use_portal)
+{
+  /* test if we need to sync */
+  ObjectKey key(b_parent, persistent_id, b_ob_info.real_object, false);
+  BL::Light b_light(b_ob_info.object_data);
+
+  Light *light = light_map.find(key);
+
+  /* Check if the transform was modified, in case a linked collection is moved we do not get a
+   * specific depsgraph update (T88515). This also mimics the behavior for Objects. */
+  const bool tfm_updated = (light && light->get_tfm() != tfm);
+
+  /* Update if either object or light data changed. */
+  if (!light_map.add_or_update(&light, b_ob_info.real_object, b_parent, key) && !tfm_updated) {
+    Shader *shader;
+    if (!shader_map.add_or_update(&shader, b_light)) {
+      if (light->get_is_portal())
+        *use_portal = true;
+      return;
+    }
+  }
+
+  /* type */
+  switch (b_light.type()) {
+    case BL::Light::type_POINT: {
+      BL::PointLight b_point_light(b_light);
+      light->set_size(b_point_light.shadow_soft_size());
+      light->set_light_type(LIGHT_POINT);
+      break;
+    }
+    case BL::Light::type_SPOT: {
+      BL::SpotLight b_spot_light(b_light);
+      light->set_size(b_spot_light.shadow_soft_size());
+      light->set_light_type(LIGHT_SPOT);
+      light->set_spot_angle(b_spot_light.spot_size());
+      light->set_spot_smooth(b_spot_light.spot_blend());
+      break;
+    }
+    /* Hemi were removed from 2.8 */
+    // case BL::Light::type_HEMI: {
+    //  light->type = LIGHT_DISTANT;
+    //  light->size = 0.0f;
+    //  break;
+    // }
+    case BL::Light::type_SUN: {
+      BL::SunLight b_sun_light(b_light);
+      light->set_angle(b_sun_light.angle());
+      light->set_light_type(LIGHT_DISTANT);
+      break;
+    }
+    case BL::Light::type_AREA: {
+      BL::AreaLight b_area_light(b_light);
+      light->set_size(1.0f);
+      light->set_axisu(transform_get_column(&tfm, 0));
+      light->set_axisv(transform_get_column(&tfm, 1));
+      light->set_sizeu(b_area_light.size());
+      light->set_spread(b_area_light.spread());
+      switch (b_area_light.shape()) {
+        case BL::AreaLight::shape_SQUARE:
+          light->set_sizev(light->get_sizeu());
+          light->set_round(false);
+          break;
+        case BL::AreaLight::shape_RECTANGLE:
+          light->set_sizev(b_area_light.size_y());
+          light->set_round(false);
+          break;
+        case BL::AreaLight::shape_DISK:
+          light->set_sizev(light->get_sizeu());
+          light->set_round(true);
+          break;
+        case BL::AreaLight::shape_ELLIPSE:
+          light->set_sizev(b_area_light.size_y());
+          light->set_round(true);
+          break;
+      }
+      light->set_light_type(LIGHT_AREA);
+      break;
+    }
+  }
+
+  /* strength */
+  float3 strength = get_float3(b_light.color()) * BL::PointLight(b_light).energy();
+  light->set_strength(strength);
+
+  /* location and (inverted!) direction */
+  light->set_co(transform_get_column(&tfm, 3));
+  light->set_dir(-transform_get_column(&tfm, 2));
+  light->set_tfm(tfm);
+
+  /* shader */
+  array<Node *> used_shaders;
+  find_shader(b_light, used_shaders, scene->default_light);
+  light->set_shader(static_cast<Shader *>(used_shaders[0]));
+
+  /* shadow */
+  PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
+  light->set_cast_shadow(get_boolean(clight, "cast_shadow"));
+  light->set_use_mis(get_boolean(clight, "use_multiple_importance_sampling"));
+
+  light->set_max_bounces(get_int(clight, "max_bounces"));
+
+  if (b_ob_info.real_object != b_ob_info.iter_object) {
+    light->set_random_id(random_id);
+  }
+  else {
+    light->set_random_id(hash_uint2(hash_string(b_ob_info.real_object.name().c_str()), 0));
+  }
+
+  if (light->get_light_type() == LIGHT_AREA)
+    light->set_is_portal(get_boolean(clight, "is_portal"));
+  else
+    light->set_is_portal(false);
+
+  if (light->get_is_portal())
+    *use_portal = true;
+
+  /* visibility */
+  uint visibility = object_ray_visibility(b_ob_info.real_object);
+  light->set_use_camera((visibility & PATH_RAY_CAMERA) != 0);
+  light->set_use_diffuse((visibility & PATH_RAY_DIFFUSE) != 0);
+  light->set_use_glossy((visibility & PATH_RAY_GLOSSY) != 0);
+  light->set_use_transmission((visibility & PATH_RAY_TRANSMIT) != 0);
+  light->set_use_scatter((visibility & PATH_RAY_VOLUME_SCATTER) != 0);
+  light->set_is_shadow_catcher(b_ob_info.real_object.is_shadow_catcher());
+
+  /* tag */
+  light->tag_update(scene);
+}
+
+void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
+{
+  BL::World b_world = b_scene.world();
+
+  if (b_world) {
+    PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
+
+    enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
+    int sampling_method = get_enum(cworld, "sampling_method", SAMPLING_NUM, SAMPLING_AUTOMATIC);
+    bool sample_as_light = (sampling_method != SAMPLING_NONE);
+
+    if (sample_as_light || use_portal) {
+      /* test if we need to sync */
+      Light *light;
+      ObjectKey key(b_world, 0, b_world, false);
+
+      if (light_map.add_or_update(&light, b_world, b_world, key) || world_recalc ||
+          b_world.ptr.data != world_map) {
+        light->set_light_type(LIGHT_BACKGROUND);
+        if (sampling_method == SAMPLING_MANUAL) {
+          light->set_map_resolution(get_int(cworld, "sample_map_resolution"));
+        }
+        else {
+          light->set_map_resolution(0);
+        }
+        light->set_shader(scene->default_background);
+        light->set_use_mis(sample_as_light);
+        light->set_max_bounces(get_int(cworld, "max_bounces"));
+
+        /* force enable light again when world is resynced */
+        light->set_is_enabled(true);
+
+        light->tag_update(scene);
+        light_map.set_recalc(b_world);
+      }
+    }
+  }
+
+  world_map = b_world.ptr.data;
+  world_recalc = false;
+  viewport_parameters = BlenderViewportParameters(b_v3d, use_developer_ui);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/logging.cpp b/intern/cycles/blender/logging.cpp
new file mode 100644
index 00000000000..613b4084aa8
--- /dev/null
+++ b/intern/cycles/blender/logging.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/CCL_api.h"
+#include "util/log.h"
+
+void CCL_init_logging(const char *argv0)
+{
+  ccl::util_logging_init(argv0);
+}
+
+void CCL_start_debug_logging()
+{
+  ccl::util_logging_start();
+}
+
+void CCL_logging_verbosity_set(int verbosity)
+{
+  ccl::util_logging_verbosity_set(verbosity);
+}
diff --git a/intern/cycles/blender/mesh.cpp b/intern/cycles/blender/mesh.cpp
new file mode 100644
index 00000000000..b69bf88c213
--- /dev/null
+++ b/intern/cycles/blender/mesh.cpp
@@ -0,0 +1,1302 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/session.h"
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "scene/camera.h"
+#include "scene/colorspace.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/scene.h"
+
+#include "subd/patch.h"
+#include "subd/split.h"
+
+#include "util/algorithm.h"
+#include "util/color.h"
+#include "util/disjoint_set.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/math.h"
+
+#include "mikktspace.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Tangent Space */
+
+struct MikkUserData {
+  MikkUserData(const BL::Mesh &b_mesh,
+               const char *layer_name,
+               const Mesh *mesh,
+               float3 *tangent,
+               float *tangent_sign)
+      : mesh(mesh), texface(NULL), orco(NULL), tangent(tangent), tangent_sign(tangent_sign)
+  {
+    const AttributeSet &attributes = (mesh->get_num_subd_faces()) ? mesh->subd_attributes :
+                                                                    mesh->attributes;
+
+    Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL);
+    vertex_normal = attr_vN->data_float3();
+
+    if (layer_name == NULL) {
+      Attribute *attr_orco = attributes.find(ATTR_STD_GENERATED);
+
+      if (attr_orco) {
+        orco = attr_orco->data_float3();
+        mesh_texture_space(*(BL::Mesh *)&b_mesh, orco_loc, orco_size);
+      }
+    }
+    else {
+      Attribute *attr_uv = attributes.find(ustring(layer_name));
+      if (attr_uv != NULL) {
+        texface = attr_uv->data_float2();
+      }
+    }
+  }
+
+  const Mesh *mesh;
+  int num_faces;
+
+  float3 *vertex_normal;
+  float2 *texface;
+  float3 *orco;
+  float3 orco_loc, orco_size;
+
+  float3 *tangent;
+  float *tangent_sign;
+};
+
+static int mikk_get_num_faces(const SMikkTSpaceContext *context)
+{
+  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
+  if (userdata->mesh->get_num_subd_faces()) {
+    return userdata->mesh->get_num_subd_faces();
+  }
+  else {
+    return userdata->mesh->num_triangles();
+  }
+}
+
+static int mikk_get_num_verts_of_face(const SMikkTSpaceContext *context, const int face_num)
+{
+  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
+  if (userdata->mesh->get_num_subd_faces()) {
+    const Mesh *mesh = userdata->mesh;
+    return mesh->get_subd_num_corners()[face_num];
+  }
+  else {
+    return 3;
+  }
+}
+
+static int mikk_vertex_index(const Mesh *mesh, const int face_num, const int vert_num)
+{
+  if (mesh->get_num_subd_faces()) {
+    const Mesh::SubdFace &face = mesh->get_subd_face(face_num);
+    return mesh->get_subd_face_corners()[face.start_corner + vert_num];
+  }
+  else {
+    return mesh->get_triangles()[face_num * 3 + vert_num];
+  }
+}
+
+static int mikk_corner_index(const Mesh *mesh, const int face_num, const int vert_num)
+{
+  if (mesh->get_num_subd_faces()) {
+    const Mesh::SubdFace &face = mesh->get_subd_face(face_num);
+    return face.start_corner + vert_num;
+  }
+  else {
+    return face_num * 3 + vert_num;
+  }
+}
+
+static void mikk_get_position(const SMikkTSpaceContext *context,
+                              float P[3],
+                              const int face_num,
+                              const int vert_num)
+{
+  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
+  const Mesh *mesh = userdata->mesh;
+  const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
+  const float3 vP = mesh->get_verts()[vertex_index];
+  P[0] = vP.x;
+  P[1] = vP.y;
+  P[2] = vP.z;
+}
+
+static void mikk_get_texture_coordinate(const SMikkTSpaceContext *context,
+                                        float uv[2],
+                                        const int face_num,
+                                        const int vert_num)
+{
+  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
+  const Mesh *mesh = userdata->mesh;
+  if (userdata->texface != NULL) {
+    const int corner_index = mikk_corner_index(mesh, face_num, vert_num);
+    float2 tfuv = userdata->texface[corner_index];
+    uv[0] = tfuv.x;
+    uv[1] = tfuv.y;
+  }
+  else if (userdata->orco != NULL) {
+    const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
+    const float3 orco_loc = userdata->orco_loc;
+    const float3 orco_size = userdata->orco_size;
+    const float3 orco = (userdata->orco[vertex_index] + orco_loc) / orco_size;
+
+    const float2 tmp = map_to_sphere(orco);
+    uv[0] = tmp.x;
+    uv[1] = tmp.y;
+  }
+  else {
+    uv[0] = 0.0f;
+    uv[1] = 0.0f;
+  }
+}
+
+static void mikk_get_normal(const SMikkTSpaceContext *context,
+                            float N[3],
+                            const int face_num,
+                            const int vert_num)
+{
+  const MikkUserData *userdata = (const MikkUserData *)context->m_pUserData;
+  const Mesh *mesh = userdata->mesh;
+  float3 vN;
+  if (mesh->get_num_subd_faces()) {
+    const Mesh::SubdFace &face = mesh->get_subd_face(face_num);
+    if (face.smooth) {
+      const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
+      vN = userdata->vertex_normal[vertex_index];
+    }
+    else {
+      vN = face.normal(mesh);
+    }
+  }
+  else {
+    if (mesh->get_smooth()[face_num]) {
+      const int vertex_index = mikk_vertex_index(mesh, face_num, vert_num);
+      vN = userdata->vertex_normal[vertex_index];
+    }
+    else {
+      const Mesh::Triangle tri = mesh->get_triangle(face_num);
+      vN = tri.compute_normal(&mesh->get_verts()[0]);
+    }
+  }
+  N[0] = vN.x;
+  N[1] = vN.y;
+  N[2] = vN.z;
+}
+
+static void mikk_set_tangent_space(const SMikkTSpaceContext *context,
+                                   const float T[],
+                                   const float sign,
+                                   const int face_num,
+                                   const int vert_num)
+{
+  MikkUserData *userdata = (MikkUserData *)context->m_pUserData;
+  const Mesh *mesh = userdata->mesh;
+  const int corner_index = mikk_corner_index(mesh, face_num, vert_num);
+  userdata->tangent[corner_index] = make_float3(T[0], T[1], T[2]);
+  if (userdata->tangent_sign != NULL) {
+    userdata->tangent_sign[corner_index] = sign;
+  }
+}
+
+static void mikk_compute_tangents(
+    const BL::Mesh &b_mesh, const char *layer_name, Mesh *mesh, bool need_sign, bool active_render)
+{
+  /* Create tangent attributes. */
+  AttributeSet &attributes = (mesh->get_num_subd_faces()) ? mesh->subd_attributes :
+                                                            mesh->attributes;
+  Attribute *attr;
+  ustring name;
+  if (layer_name != NULL) {
+    name = ustring((string(layer_name) + ".tangent").c_str());
+  }
+  else {
+    name = ustring("orco.tangent");
+  }
+  if (active_render) {
+    attr = attributes.add(ATTR_STD_UV_TANGENT, name);
+  }
+  else {
+    attr = attributes.add(name, TypeDesc::TypeVector, ATTR_ELEMENT_CORNER);
+  }
+  float3 *tangent = attr->data_float3();
+  /* Create bitangent sign attribute. */
+  float *tangent_sign = NULL;
+  if (need_sign) {
+    Attribute *attr_sign;
+    ustring name_sign;
+    if (layer_name != NULL) {
+      name_sign = ustring((string(layer_name) + ".tangent_sign").c_str());
+    }
+    else {
+      name_sign = ustring("orco.tangent_sign");
+    }
+
+    if (active_render) {
+      attr_sign = attributes.add(ATTR_STD_UV_TANGENT_SIGN, name_sign);
+    }
+    else {
+      attr_sign = attributes.add(name_sign, TypeDesc::TypeFloat, ATTR_ELEMENT_CORNER);
+    }
+    tangent_sign = attr_sign->data_float();
+  }
+  /* Setup userdata. */
+  MikkUserData userdata(b_mesh, layer_name, mesh, tangent, tangent_sign);
+  /* Setup interface. */
+  SMikkTSpaceInterface sm_interface;
+  memset(&sm_interface, 0, sizeof(sm_interface));
+  sm_interface.m_getNumFaces = mikk_get_num_faces;
+  sm_interface.m_getNumVerticesOfFace = mikk_get_num_verts_of_face;
+  sm_interface.m_getPosition = mikk_get_position;
+  sm_interface.m_getTexCoord = mikk_get_texture_coordinate;
+  sm_interface.m_getNormal = mikk_get_normal;
+  sm_interface.m_setTSpaceBasic = mikk_set_tangent_space;
+  /* Setup context. */
+  SMikkTSpaceContext context;
+  memset(&context, 0, sizeof(context));
+  context.m_pUserData = &userdata;
+  context.m_pInterface = &sm_interface;
+  /* Compute tangents. */
+  genTangSpaceDefault(&context);
+}
+
+/* Create sculpt vertex color attributes. */
+static void attr_create_sculpt_vertex_color(Scene *scene,
+                                            Mesh *mesh,
+                                            BL::Mesh &b_mesh,
+                                            bool subdivision)
+{
+  for (BL::MeshVertColorLayer &l : b_mesh.sculpt_vertex_colors) {
+    const bool active_render = l.active_render();
+    AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+    ustring vcol_name = ustring(l.name().c_str());
+
+    const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+                           mesh->need_attribute(scene, vcol_std);
+
+    if (!need_vcol) {
+      continue;
+    }
+
+    AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+    Attribute *vcol_attr = attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_VERTEX);
+    vcol_attr->std = vcol_std;
+
+    float4 *cdata = vcol_attr->data_float4();
+    int numverts = b_mesh.vertices.length();
+
+    for (int i = 0; i < numverts; i++) {
+      *(cdata++) = get_float4(l.data[i].color());
+    }
+  }
+}
+
+template<typename TypeInCycles, typename GetValueAtIndex>
+static void fill_generic_attribute(BL::Mesh &b_mesh,
+                                   TypeInCycles *data,
+                                   const AttributeElement element,
+                                   const GetValueAtIndex &get_value_at_index)
+{
+  switch (element) {
+    case ATTR_ELEMENT_CORNER: {
+      for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+        const int index = t.index() * 3;
+        BL::Array<int, 3> loops = t.loops();
+        data[index] = get_value_at_index(loops[0]);
+        data[index + 1] = get_value_at_index(loops[1]);
+        data[index + 2] = get_value_at_index(loops[2]);
+      }
+      break;
+    }
+    case ATTR_ELEMENT_VERTEX: {
+      const int num_verts = b_mesh.vertices.length();
+      for (int i = 0; i < num_verts; i++) {
+        data[i] = get_value_at_index(i);
+      }
+      break;
+    }
+    case ATTR_ELEMENT_FACE: {
+      for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+        data[t.index()] = get_value_at_index(t.polygon_index());
+      }
+      break;
+    }
+    default: {
+      assert(false);
+      break;
+    }
+  }
+}
+
+static void attr_create_motion(Mesh *mesh, BL::Attribute &b_attribute, const float motion_scale)
+{
+  if (!(b_attribute.domain() == BL::Attribute::domain_POINT) &&
+      (b_attribute.data_type() == BL::Attribute::data_type_FLOAT_VECTOR)) {
+    return;
+  }
+
+  BL::FloatVectorAttribute b_vector_attribute(b_attribute);
+  const int numverts = mesh->get_verts().size();
+
+  /* Find or add attribute */
+  float3 *P = &mesh->get_verts()[0];
+  Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+  if (!attr_mP) {
+    attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+
+  /* Only export previous and next frame, we don't have any in between data. */
+  float motion_times[2] = {-1.0f, 1.0f};
+  for (int step = 0; step < 2; step++) {
+    const float relative_time = motion_times[step] * 0.5f * motion_scale;
+    float3 *mP = attr_mP->data_float3() + step * numverts;
+
+    for (int i = 0; i < numverts; i++) {
+      mP[i] = P[i] + get_float3(b_vector_attribute.data[i].vector()) * relative_time;
+    }
+  }
+}
+
+static void attr_create_generic(Scene *scene,
+                                Mesh *mesh,
+                                BL::Mesh &b_mesh,
+                                const bool subdivision,
+                                const bool need_motion,
+                                const float motion_scale)
+{
+  if (subdivision) {
+    /* TODO: Handle subdivision correctly. */
+    return;
+  }
+  AttributeSet &attributes = mesh->attributes;
+  static const ustring u_velocity("velocity");
+
+  for (BL::Attribute &b_attribute : b_mesh.attributes) {
+    const ustring name{b_attribute.name().c_str()};
+
+    if (need_motion && name == u_velocity) {
+      attr_create_motion(mesh, b_attribute, motion_scale);
+    }
+
+    if (!mesh->need_attribute(scene, name)) {
+      continue;
+    }
+    if (attributes.find(name)) {
+      continue;
+    }
+
+    const BL::Attribute::domain_enum b_domain = b_attribute.domain();
+    const BL::Attribute::data_type_enum b_data_type = b_attribute.data_type();
+
+    AttributeElement element = ATTR_ELEMENT_NONE;
+    switch (b_domain) {
+      case BL::Attribute::domain_CORNER:
+        element = ATTR_ELEMENT_CORNER;
+        break;
+      case BL::Attribute::domain_POINT:
+        element = ATTR_ELEMENT_VERTEX;
+        break;
+      case BL::Attribute::domain_FACE:
+        element = ATTR_ELEMENT_FACE;
+        break;
+      default:
+        break;
+    }
+    if (element == ATTR_ELEMENT_NONE) {
+      /* Not supported. */
+      continue;
+    }
+    switch (b_data_type) {
+      case BL::Attribute::data_type_FLOAT: {
+        BL::FloatAttribute b_float_attribute{b_attribute};
+        Attribute *attr = attributes.add(name, TypeFloat, element);
+        float *data = attr->data_float();
+        fill_generic_attribute(
+            b_mesh, data, element, [&](int i) { return b_float_attribute.data[i].value(); });
+        break;
+      }
+      case BL::Attribute::data_type_BOOLEAN: {
+        BL::BoolAttribute b_bool_attribute{b_attribute};
+        Attribute *attr = attributes.add(name, TypeFloat, element);
+        float *data = attr->data_float();
+        fill_generic_attribute(
+            b_mesh, data, element, [&](int i) { return (float)b_bool_attribute.data[i].value(); });
+        break;
+      }
+      case BL::Attribute::data_type_INT: {
+        BL::IntAttribute b_int_attribute{b_attribute};
+        Attribute *attr = attributes.add(name, TypeFloat, element);
+        float *data = attr->data_float();
+        fill_generic_attribute(
+            b_mesh, data, element, [&](int i) { return (float)b_int_attribute.data[i].value(); });
+        break;
+      }
+      case BL::Attribute::data_type_FLOAT_VECTOR: {
+        BL::FloatVectorAttribute b_vector_attribute{b_attribute};
+        Attribute *attr = attributes.add(name, TypeVector, element);
+        float3 *data = attr->data_float3();
+        fill_generic_attribute(b_mesh, data, element, [&](int i) {
+          BL::Array<float, 3> v = b_vector_attribute.data[i].vector();
+          return make_float3(v[0], v[1], v[2]);
+        });
+        break;
+      }
+      case BL::Attribute::data_type_FLOAT_COLOR: {
+        BL::FloatColorAttribute b_color_attribute{b_attribute};
+        Attribute *attr = attributes.add(name, TypeRGBA, element);
+        float4 *data = attr->data_float4();
+        fill_generic_attribute(b_mesh, data, element, [&](int i) {
+          BL::Array<float, 4> v = b_color_attribute.data[i].color();
+          return make_float4(v[0], v[1], v[2], v[3]);
+        });
+        break;
+      }
+      case BL::Attribute::data_type_FLOAT2: {
+        BL::Float2Attribute b_float2_attribute{b_attribute};
+        Attribute *attr = attributes.add(name, TypeFloat2, element);
+        float2 *data = attr->data_float2();
+        fill_generic_attribute(b_mesh, data, element, [&](int i) {
+          BL::Array<float, 2> v = b_float2_attribute.data[i].vector();
+          return make_float2(v[0], v[1]);
+        });
+        break;
+      }
+      default:
+        /* Not supported. */
+        break;
+    }
+  }
+}
+
+/* Create vertex color attributes. */
+static void attr_create_vertex_color(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
+{
+  for (BL::MeshLoopColorLayer &l : b_mesh.vertex_colors) {
+    const bool active_render = l.active_render();
+    AttributeStandard vcol_std = (active_render) ? ATTR_STD_VERTEX_COLOR : ATTR_STD_NONE;
+    ustring vcol_name = ustring(l.name().c_str());
+
+    const bool need_vcol = mesh->need_attribute(scene, vcol_name) ||
+                           mesh->need_attribute(scene, vcol_std);
+
+    if (!need_vcol) {
+      continue;
+    }
+
+    Attribute *vcol_attr = NULL;
+
+    if (subdivision) {
+      if (active_render) {
+        vcol_attr = mesh->subd_attributes.add(vcol_std, vcol_name);
+      }
+      else {
+        vcol_attr = mesh->subd_attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+      }
+
+      uchar4 *cdata = vcol_attr->data_uchar4();
+
+      for (BL::MeshPolygon &p : b_mesh.polygons) {
+        int n = p.loop_total();
+        for (int i = 0; i < n; i++) {
+          float4 color = get_float4(l.data[p.loop_start() + i].color());
+          /* Compress/encode vertex color using the sRGB curve. */
+          *(cdata++) = color_float4_to_uchar4(color);
+        }
+      }
+    }
+    else {
+      if (active_render) {
+        vcol_attr = mesh->attributes.add(vcol_std, vcol_name);
+      }
+      else {
+        vcol_attr = mesh->attributes.add(vcol_name, TypeRGBA, ATTR_ELEMENT_CORNER_BYTE);
+      }
+
+      uchar4 *cdata = vcol_attr->data_uchar4();
+
+      for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+        int3 li = get_int3(t.loops());
+        float4 c1 = get_float4(l.data[li[0]].color());
+        float4 c2 = get_float4(l.data[li[1]].color());
+        float4 c3 = get_float4(l.data[li[2]].color());
+
+        /* Compress/encode vertex color using the sRGB curve. */
+        cdata[0] = color_float4_to_uchar4(c1);
+        cdata[1] = color_float4_to_uchar4(c2);
+        cdata[2] = color_float4_to_uchar4(c3);
+
+        cdata += 3;
+      }
+    }
+  }
+}
+
+/* Create uv map attributes. */
+static void attr_create_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh)
+{
+  if (b_mesh.uv_layers.length() != 0) {
+    for (BL::MeshUVLoopLayer &l : b_mesh.uv_layers) {
+      const bool active_render = l.active_render();
+      AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
+      ustring uv_name = ustring(l.name().c_str());
+      AttributeStandard tangent_std = (active_render) ? ATTR_STD_UV_TANGENT : ATTR_STD_NONE;
+      ustring tangent_name = ustring((string(l.name().c_str()) + ".tangent").c_str());
+
+      /* Denotes whether UV map was requested directly. */
+      const bool need_uv = mesh->need_attribute(scene, uv_name) ||
+                           mesh->need_attribute(scene, uv_std);
+      /* Denotes whether tangent was requested directly. */
+      const bool need_tangent = mesh->need_attribute(scene, tangent_name) ||
+                                (active_render && mesh->need_attribute(scene, tangent_std));
+
+      /* UV map */
+      /* NOTE: We create temporary UV layer if its needed for tangent but
+       * wasn't requested by other nodes in shaders.
+       */
+      Attribute *uv_attr = NULL;
+      if (need_uv || need_tangent) {
+        if (active_render) {
+          uv_attr = mesh->attributes.add(uv_std, uv_name);
+        }
+        else {
+          uv_attr = mesh->attributes.add(uv_name, TypeFloat2, ATTR_ELEMENT_CORNER);
+        }
+
+        float2 *fdata = uv_attr->data_float2();
+
+        for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+          int3 li = get_int3(t.loops());
+          fdata[0] = get_float2(l.data[li[0]].uv());
+          fdata[1] = get_float2(l.data[li[1]].uv());
+          fdata[2] = get_float2(l.data[li[2]].uv());
+          fdata += 3;
+        }
+      }
+
+      /* UV tangent */
+      if (need_tangent) {
+        AttributeStandard sign_std = (active_render) ? ATTR_STD_UV_TANGENT_SIGN : ATTR_STD_NONE;
+        ustring sign_name = ustring((string(l.name().c_str()) + ".tangent_sign").c_str());
+        bool need_sign = (mesh->need_attribute(scene, sign_name) ||
+                          mesh->need_attribute(scene, sign_std));
+        mikk_compute_tangents(b_mesh, l.name().c_str(), mesh, need_sign, active_render);
+      }
+      /* Remove temporarily created UV attribute. */
+      if (!need_uv && uv_attr != NULL) {
+        mesh->attributes.remove(uv_attr);
+      }
+    }
+  }
+  else if (mesh->need_attribute(scene, ATTR_STD_UV_TANGENT)) {
+    bool need_sign = mesh->need_attribute(scene, ATTR_STD_UV_TANGENT_SIGN);
+    mikk_compute_tangents(b_mesh, NULL, mesh, need_sign, true);
+    if (!mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
+      mesh->attributes.remove(ATTR_STD_GENERATED);
+    }
+  }
+}
+
+static void attr_create_subd_uv_map(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivide_uvs)
+{
+  if (b_mesh.uv_layers.length() != 0) {
+    BL::Mesh::uv_layers_iterator l;
+    int i = 0;
+
+    for (b_mesh.uv_layers.begin(l); l != b_mesh.uv_layers.end(); ++l, ++i) {
+      bool active_render = l->active_render();
+      AttributeStandard uv_std = (active_render) ? ATTR_STD_UV : ATTR_STD_NONE;
+      ustring uv_name = ustring(l->name().c_str());
+      AttributeStandard tangent_std = (active_render) ? ATTR_STD_UV_TANGENT : ATTR_STD_NONE;
+      ustring tangent_name = ustring((string(l->name().c_str()) + ".tangent").c_str());
+
+      /* Denotes whether UV map was requested directly. */
+      const bool need_uv = mesh->need_attribute(scene, uv_name) ||
+                           mesh->need_attribute(scene, uv_std);
+      /* Denotes whether tangent was requested directly. */
+      const bool need_tangent = mesh->need_attribute(scene, tangent_name) ||
+                                (active_render && mesh->need_attribute(scene, tangent_std));
+
+      Attribute *uv_attr = NULL;
+
+      /* UV map */
+      if (need_uv || need_tangent) {
+        if (active_render)
+          uv_attr = mesh->subd_attributes.add(uv_std, uv_name);
+        else
+          uv_attr = mesh->subd_attributes.add(uv_name, TypeFloat2, ATTR_ELEMENT_CORNER);
+
+        if (subdivide_uvs) {
+          uv_attr->flags |= ATTR_SUBDIVIDED;
+        }
+
+        float2 *fdata = uv_attr->data_float2();
+
+        for (BL::MeshPolygon &p : b_mesh.polygons) {
+          int n = p.loop_total();
+          for (int j = 0; j < n; j++) {
+            *(fdata++) = get_float2(l->data[p.loop_start() + j].uv());
+          }
+        }
+      }
+
+      /* UV tangent */
+      if (need_tangent) {
+        AttributeStandard sign_std = (active_render) ? ATTR_STD_UV_TANGENT_SIGN : ATTR_STD_NONE;
+        ustring sign_name = ustring((string(l->name().c_str()) + ".tangent_sign").c_str());
+        bool need_sign = (mesh->need_attribute(scene, sign_name) ||
+                          mesh->need_attribute(scene, sign_std));
+        mikk_compute_tangents(b_mesh, l->name().c_str(), mesh, need_sign, active_render);
+      }
+      /* Remove temporarily created UV attribute. */
+      if (!need_uv && uv_attr != NULL) {
+        mesh->subd_attributes.remove(uv_attr);
+      }
+    }
+  }
+  else if (mesh->need_attribute(scene, ATTR_STD_UV_TANGENT)) {
+    bool need_sign = mesh->need_attribute(scene, ATTR_STD_UV_TANGENT_SIGN);
+    mikk_compute_tangents(b_mesh, NULL, mesh, need_sign, true);
+    if (!mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
+      mesh->subd_attributes.remove(ATTR_STD_GENERATED);
+    }
+  }
+}
+
+/* Create vertex pointiness attributes. */
+
+/* Compare vertices by sum of their coordinates. */
+class VertexAverageComparator {
+ public:
+  VertexAverageComparator(const array<float3> &verts) : verts_(verts)
+  {
+  }
+
+  bool operator()(const int &vert_idx_a, const int &vert_idx_b)
+  {
+    const float3 &vert_a = verts_[vert_idx_a];
+    const float3 &vert_b = verts_[vert_idx_b];
+    if (vert_a == vert_b) {
+      /* Special case for doubles, so we ensure ordering. */
+      return vert_idx_a > vert_idx_b;
+    }
+    const float x1 = vert_a.x + vert_a.y + vert_a.z;
+    const float x2 = vert_b.x + vert_b.y + vert_b.z;
+    return x1 < x2;
+  }
+
+ protected:
+  const array<float3> &verts_;
+};
+
+static void attr_create_pointiness(Scene *scene, Mesh *mesh, BL::Mesh &b_mesh, bool subdivision)
+{
+  if (!mesh->need_attribute(scene, ATTR_STD_POINTINESS)) {
+    return;
+  }
+  const int num_verts = b_mesh.vertices.length();
+  if (num_verts == 0) {
+    return;
+  }
+  /* STEP 1: Find out duplicated vertices and point duplicates to a single
+   *         original vertex.
+   */
+  vector<int> sorted_vert_indeices(num_verts);
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    sorted_vert_indeices[vert_index] = vert_index;
+  }
+  VertexAverageComparator compare(mesh->get_verts());
+  sort(sorted_vert_indeices.begin(), sorted_vert_indeices.end(), compare);
+  /* This array stores index of the original vertex for the given vertex
+   * index.
+   */
+  vector<int> vert_orig_index(num_verts);
+  for (int sorted_vert_index = 0; sorted_vert_index < num_verts; ++sorted_vert_index) {
+    const int vert_index = sorted_vert_indeices[sorted_vert_index];
+    const float3 &vert_co = mesh->get_verts()[vert_index];
+    bool found = false;
+    for (int other_sorted_vert_index = sorted_vert_index + 1; other_sorted_vert_index < num_verts;
+         ++other_sorted_vert_index) {
+      const int other_vert_index = sorted_vert_indeices[other_sorted_vert_index];
+      const float3 &other_vert_co = mesh->get_verts()[other_vert_index];
+      /* We are too far away now, we wouldn't have duplicate. */
+      if ((other_vert_co.x + other_vert_co.y + other_vert_co.z) -
+              (vert_co.x + vert_co.y + vert_co.z) >
+          3 * FLT_EPSILON) {
+        break;
+      }
+      /* Found duplicate. */
+      if (len_squared(other_vert_co - vert_co) < FLT_EPSILON) {
+        found = true;
+        vert_orig_index[vert_index] = other_vert_index;
+        break;
+      }
+    }
+    if (!found) {
+      vert_orig_index[vert_index] = vert_index;
+    }
+  }
+  /* Make sure we always points to the very first orig vertex. */
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    int orig_index = vert_orig_index[vert_index];
+    while (orig_index != vert_orig_index[orig_index]) {
+      orig_index = vert_orig_index[orig_index];
+    }
+    vert_orig_index[vert_index] = orig_index;
+  }
+  sorted_vert_indeices.free_memory();
+  /* STEP 2: Calculate vertex normals taking into account their possible
+   *         duplicates which gets "welded" together.
+   */
+  vector<float3> vert_normal(num_verts, zero_float3());
+  /* First we accumulate all vertex normals in the original index. */
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    const float3 normal = get_float3(b_mesh.vertices[vert_index].normal());
+    const int orig_index = vert_orig_index[vert_index];
+    vert_normal[orig_index] += normal;
+  }
+  /* Then we normalize the accumulated result and flush it to all duplicates
+   * as well.
+   */
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    const int orig_index = vert_orig_index[vert_index];
+    vert_normal[vert_index] = normalize(vert_normal[orig_index]);
+  }
+  /* STEP 3: Calculate pointiness using single ring neighborhood. */
+  vector<int> counter(num_verts, 0);
+  vector<float> raw_data(num_verts, 0.0f);
+  vector<float3> edge_accum(num_verts, zero_float3());
+  BL::Mesh::edges_iterator e;
+  EdgeMap visited_edges;
+  int edge_index = 0;
+  memset(&counter[0], 0, sizeof(int) * counter.size());
+  for (b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
+    const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
+              v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
+    if (visited_edges.exists(v0, v1)) {
+      continue;
+    }
+    visited_edges.insert(v0, v1);
+    float3 co0 = get_float3(b_mesh.vertices[v0].co()), co1 = get_float3(b_mesh.vertices[v1].co());
+    float3 edge = normalize(co1 - co0);
+    edge_accum[v0] += edge;
+    edge_accum[v1] += -edge;
+    ++counter[v0];
+    ++counter[v1];
+  }
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    const int orig_index = vert_orig_index[vert_index];
+    if (orig_index != vert_index) {
+      /* Skip duplicates, they'll be overwritten later on. */
+      continue;
+    }
+    if (counter[vert_index] > 0) {
+      const float3 normal = vert_normal[vert_index];
+      const float angle = safe_acosf(dot(normal, edge_accum[vert_index] / counter[vert_index]));
+      raw_data[vert_index] = angle * M_1_PI_F;
+    }
+    else {
+      raw_data[vert_index] = 0.0f;
+    }
+  }
+  /* STEP 3: Blur vertices to approximate 2 ring neighborhood. */
+  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+  Attribute *attr = attributes.add(ATTR_STD_POINTINESS);
+  float *data = attr->data_float();
+  memcpy(data, &raw_data[0], sizeof(float) * raw_data.size());
+  memset(&counter[0], 0, sizeof(int) * counter.size());
+  edge_index = 0;
+  visited_edges.clear();
+  for (b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e, ++edge_index) {
+    const int v0 = vert_orig_index[b_mesh.edges[edge_index].vertices()[0]],
+              v1 = vert_orig_index[b_mesh.edges[edge_index].vertices()[1]];
+    if (visited_edges.exists(v0, v1)) {
+      continue;
+    }
+    visited_edges.insert(v0, v1);
+    data[v0] += raw_data[v1];
+    data[v1] += raw_data[v0];
+    ++counter[v0];
+    ++counter[v1];
+  }
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    data[vert_index] /= counter[vert_index] + 1;
+  }
+  /* STEP 4: Copy attribute to the duplicated vertices. */
+  for (int vert_index = 0; vert_index < num_verts; ++vert_index) {
+    const int orig_index = vert_orig_index[vert_index];
+    data[vert_index] = data[orig_index];
+  }
+}
+
+/* The Random Per Island attribute is a random float associated with each
+ * connected component (island) of the mesh. The attribute is computed by
+ * first classifying the vertices into different sets using a Disjoint Set
+ * data structure. Then the index of the root of each vertex (Which is the
+ * representative of the set the vertex belongs to) is hashed and stored.
+ *
+ * We are using a face attribute to avoid interpolation during rendering,
+ * allowing the user to safely hash the output further. Had we used vertex
+ * attribute, the interpolation will introduce very slight variations,
+ * making the output unsafe to hash. */
+static void attr_create_random_per_island(Scene *scene,
+                                          Mesh *mesh,
+                                          BL::Mesh &b_mesh,
+                                          bool subdivision)
+{
+  if (!mesh->need_attribute(scene, ATTR_STD_RANDOM_PER_ISLAND)) {
+    return;
+  }
+
+  int number_of_vertices = b_mesh.vertices.length();
+  if (number_of_vertices == 0) {
+    return;
+  }
+
+  DisjointSet vertices_sets(number_of_vertices);
+
+  for (BL::MeshEdge &e : b_mesh.edges) {
+    vertices_sets.join(e.vertices()[0], e.vertices()[1]);
+  }
+
+  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+  Attribute *attribute = attributes.add(ATTR_STD_RANDOM_PER_ISLAND);
+  float *data = attribute->data_float();
+
+  if (!subdivision) {
+    for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+      data[t.index()] = hash_uint_to_float(vertices_sets.find(t.vertices()[0]));
+    }
+  }
+  else {
+    for (BL::MeshPolygon &p : b_mesh.polygons) {
+      data[p.index()] = hash_uint_to_float(vertices_sets.find(p.vertices()[0]));
+    }
+  }
+}
+
+/* Create Mesh */
+
+static void create_mesh(Scene *scene,
+                        Mesh *mesh,
+                        BL::Mesh &b_mesh,
+                        const array<Node *> &used_shaders,
+                        const bool need_motion,
+                        const float motion_scale,
+                        const bool subdivision = false,
+                        const bool subdivide_uvs = true)
+{
+  /* count vertices and faces */
+  int numverts = b_mesh.vertices.length();
+  int numfaces = (!subdivision) ? b_mesh.loop_triangles.length() : b_mesh.polygons.length();
+  int numtris = 0;
+  int numcorners = 0;
+  int numngons = 0;
+  bool use_loop_normals = b_mesh.use_auto_smooth() &&
+                          (mesh->get_subdivision_type() != Mesh::SUBDIVISION_CATMULL_CLARK);
+
+  /* If no faces, create empty mesh. */
+  if (numfaces == 0) {
+    return;
+  }
+
+  if (!subdivision) {
+    numtris = numfaces;
+  }
+  else {
+    for (BL::MeshPolygon &p : b_mesh.polygons) {
+      numngons += (p.loop_total() == 4) ? 0 : 1;
+      numcorners += p.loop_total();
+    }
+  }
+
+  /* allocate memory */
+  if (subdivision) {
+    mesh->reserve_subd_faces(numfaces, numngons, numcorners);
+  }
+
+  mesh->reserve_mesh(numverts, numtris);
+
+  /* create vertex coordinates and normals */
+  BL::Mesh::vertices_iterator v;
+  for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v)
+    mesh->add_vertex(get_float3(v->co()));
+
+  AttributeSet &attributes = (subdivision) ? mesh->subd_attributes : mesh->attributes;
+  Attribute *attr_N = attributes.add(ATTR_STD_VERTEX_NORMAL);
+  float3 *N = attr_N->data_float3();
+
+  for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++N)
+    *N = get_float3(v->normal());
+  N = attr_N->data_float3();
+
+  /* create generated coordinates from undeformed coordinates */
+  const bool need_default_tangent = (subdivision == false) && (b_mesh.uv_layers.length() == 0) &&
+                                    (mesh->need_attribute(scene, ATTR_STD_UV_TANGENT));
+  if (mesh->need_attribute(scene, ATTR_STD_GENERATED) || need_default_tangent) {
+    Attribute *attr = attributes.add(ATTR_STD_GENERATED);
+    attr->flags |= ATTR_SUBDIVIDED;
+
+    float3 loc, size;
+    mesh_texture_space(b_mesh, loc, size);
+
+    float3 *generated = attr->data_float3();
+    size_t i = 0;
+
+    for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v) {
+      generated[i++] = get_float3(v->undeformed_co()) * size - loc;
+    }
+  }
+
+  /* create faces */
+  if (!subdivision) {
+    for (BL::MeshLoopTriangle &t : b_mesh.loop_triangles) {
+      BL::MeshPolygon p = b_mesh.polygons[t.polygon_index()];
+      int3 vi = get_int3(t.vertices());
+
+      int shader = clamp(p.material_index(), 0, used_shaders.size() - 1);
+      bool smooth = p.use_smooth() || use_loop_normals;
+
+      if (use_loop_normals) {
+        BL::Array<float, 9> loop_normals = t.split_normals();
+        for (int i = 0; i < 3; i++) {
+          N[vi[i]] = make_float3(
+              loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
+        }
+      }
+
+      /* Create triangles.
+       *
+       * NOTE: Autosmooth is already taken care about.
+       */
+      mesh->add_triangle(vi[0], vi[1], vi[2], shader, smooth);
+    }
+  }
+  else {
+    vector<int> vi;
+
+    for (BL::MeshPolygon &p : b_mesh.polygons) {
+      int n = p.loop_total();
+      int shader = clamp(p.material_index(), 0, used_shaders.size() - 1);
+      bool smooth = p.use_smooth() || use_loop_normals;
+
+      vi.resize(n);
+      for (int i = 0; i < n; i++) {
+        /* NOTE: Autosmooth is already taken care about. */
+        vi[i] = b_mesh.loops[p.loop_start() + i].vertex_index();
+      }
+
+      /* create subd faces */
+      mesh->add_subd_face(&vi[0], n, shader, smooth);
+    }
+  }
+
+  /* Create all needed attributes.
+   * The calculate functions will check whether they're needed or not.
+   */
+  attr_create_pointiness(scene, mesh, b_mesh, subdivision);
+  attr_create_vertex_color(scene, mesh, b_mesh, subdivision);
+  attr_create_sculpt_vertex_color(scene, mesh, b_mesh, subdivision);
+  attr_create_random_per_island(scene, mesh, b_mesh, subdivision);
+  attr_create_generic(scene, mesh, b_mesh, subdivision, need_motion, motion_scale);
+
+  if (subdivision) {
+    attr_create_subd_uv_map(scene, mesh, b_mesh, subdivide_uvs);
+  }
+  else {
+    attr_create_uv_map(scene, mesh, b_mesh);
+  }
+
+  /* For volume objects, create a matrix to transform from object space to
+   * mesh texture space. this does not work with deformations but that can
+   * probably only be done well with a volume grid mapping of coordinates. */
+  if (mesh->need_attribute(scene, ATTR_STD_GENERATED_TRANSFORM)) {
+    Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED_TRANSFORM);
+    Transform *tfm = attr->data_transform();
+
+    float3 loc, size;
+    mesh_texture_space(b_mesh, loc, size);
+
+    *tfm = transform_translate(-loc) * transform_scale(size);
+  }
+}
+
+static void create_subd_mesh(Scene *scene,
+                             Mesh *mesh,
+                             BObjectInfo &b_ob_info,
+                             BL::Mesh &b_mesh,
+                             const array<Node *> &used_shaders,
+                             const bool need_motion,
+                             const float motion_scale,
+                             float dicing_rate,
+                             int max_subdivisions)
+{
+  BL::Object b_ob = b_ob_info.real_object;
+
+  BL::SubsurfModifier subsurf_mod(b_ob.modifiers[b_ob.modifiers.length() - 1]);
+  bool subdivide_uvs = subsurf_mod.uv_smooth() != BL::SubsurfModifier::uv_smooth_NONE;
+
+  create_mesh(scene, mesh, b_mesh, used_shaders, need_motion, motion_scale, true, subdivide_uvs);
+
+  /* export creases */
+  size_t num_creases = 0;
+
+  for (BL::MeshEdge &e : b_mesh.edges) {
+    if (e.crease() != 0.0f) {
+      num_creases++;
+    }
+  }
+
+  mesh->reserve_subd_creases(num_creases);
+
+  for (BL::MeshEdge &e : b_mesh.edges) {
+    if (e.crease() != 0.0f) {
+      mesh->add_crease(e.vertices()[0], e.vertices()[1], e.crease());
+    }
+  }
+
+  /* set subd params */
+  PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
+  float subd_dicing_rate = max(0.1f, RNA_float_get(&cobj, "dicing_rate") * dicing_rate);
+
+  mesh->set_subd_dicing_rate(subd_dicing_rate);
+  mesh->set_subd_max_level(max_subdivisions);
+  mesh->set_subd_objecttoworld(get_transform(b_ob.matrix_world()));
+}
+
+/* Sync */
+
+/* Check whether some of "built-in" motion-related attributes are needed to be exported (includes
+ * things like velocity from cache modifier, fluid simulation).
+ *
+ * NOTE: This code is run prior to object motion blur initialization. so can not access properties
+ * set by `sync_object_motion_init()`. */
+static bool mesh_need_motion_attribute(BObjectInfo &b_ob_info, Scene *scene)
+{
+  const Scene::MotionType need_motion = scene->need_motion();
+  if (need_motion == Scene::MOTION_NONE) {
+    /* Simple case: neither motion pass nor motion blur is needed, no need in the motion related
+     * attributes. */
+    return false;
+  }
+
+  if (need_motion == Scene::MOTION_BLUR) {
+    /* A bit tricky and implicit case:
+     * - Motion blur is enabled in the scene, which implies specific number of time steps for
+     *   objects.
+     * - If the object has motion blur disabled on it, it will have 0 time steps.
+     * - Motion attribute expects non-zero time steps.
+     *
+     * Avoid adding motion attributes if the motion blur will enforce 0 motion steps. */
+    PointerRNA cobject = RNA_pointer_get(&b_ob_info.real_object.ptr, "cycles");
+    const bool use_motion = get_boolean(cobject, "use_motion_blur");
+    if (!use_motion) {
+      return false;
+    }
+  }
+
+  /* Motion pass which implies 3 motion steps, or motion blur which is not disabled on object
+   * level. */
+  return true;
+}
+
+void BlenderSync::sync_mesh(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Mesh *mesh)
+{
+  /* make a copy of the shaders as the caller in the main thread still need them for syncing the
+   * attributes */
+  array<Node *> used_shaders = mesh->get_used_shaders();
+
+  Mesh new_mesh;
+  new_mesh.set_used_shaders(used_shaders);
+
+  if (view_layer.use_surfaces) {
+    /* Adaptive subdivision setup. Not for baking since that requires
+     * exact mapping to the Blender mesh. */
+    if (!scene->bake_manager->get_baking()) {
+      new_mesh.set_subdivision_type(
+          object_subdivision_type(b_ob_info.real_object, preview, experimental));
+    }
+
+    /* For some reason, meshes do not need this... */
+    bool need_undeformed = new_mesh.need_attribute(scene, ATTR_STD_GENERATED);
+    BL::Mesh b_mesh = object_to_mesh(
+        b_data, b_ob_info, b_depsgraph, need_undeformed, new_mesh.get_subdivision_type());
+
+    if (b_mesh) {
+      /* Motion blur attribute is relative to seconds, we need it relative to frames. */
+      const bool need_motion = mesh_need_motion_attribute(b_ob_info, scene);
+      const float motion_scale = (need_motion) ?
+                                     scene->motion_shutter_time() /
+                                         (b_scene.render().fps() / b_scene.render().fps_base()) :
+                                     0.0f;
+
+      /* Sync mesh itself. */
+      if (new_mesh.get_subdivision_type() != Mesh::SUBDIVISION_NONE)
+        create_subd_mesh(scene,
+                         &new_mesh,
+                         b_ob_info,
+                         b_mesh,
+                         new_mesh.get_used_shaders(),
+                         need_motion,
+                         motion_scale,
+                         dicing_rate,
+                         max_subdivisions);
+      else
+        create_mesh(scene,
+                    &new_mesh,
+                    b_mesh,
+                    new_mesh.get_used_shaders(),
+                    need_motion,
+                    motion_scale,
+                    false);
+
+      free_object_to_mesh(b_data, b_ob_info, b_mesh);
+    }
+  }
+
+  /* update original sockets */
+
+  mesh->clear_non_sockets();
+
+  for (const SocketType &socket : new_mesh.type->inputs) {
+    /* Those sockets are updated in sync_object, so do not modify them. */
+    if (socket.name == "use_motion_blur" || socket.name == "motion_steps" ||
+        socket.name == "used_shaders") {
+      continue;
+    }
+    mesh->set_value(socket, new_mesh, socket);
+  }
+
+  mesh->attributes.update(std::move(new_mesh.attributes));
+  mesh->subd_attributes.update(std::move(new_mesh.subd_attributes));
+
+  mesh->set_num_subd_faces(new_mesh.get_num_subd_faces());
+
+  /* tag update */
+  bool rebuild = (mesh->triangles_is_modified()) || (mesh->subd_num_corners_is_modified()) ||
+                 (mesh->subd_shader_is_modified()) || (mesh->subd_smooth_is_modified()) ||
+                 (mesh->subd_ptex_offset_is_modified()) ||
+                 (mesh->subd_start_corner_is_modified()) ||
+                 (mesh->subd_face_corners_is_modified());
+
+  mesh->tag_update(scene, rebuild);
+}
+
+void BlenderSync::sync_mesh_motion(BL::Depsgraph b_depsgraph,
+                                   BObjectInfo &b_ob_info,
+                                   Mesh *mesh,
+                                   int motion_step)
+{
+  /* Skip if no vertices were exported. */
+  size_t numverts = mesh->get_verts().size();
+  if (numverts == 0) {
+    return;
+  }
+
+  /* Skip objects without deforming modifiers. this is not totally reliable,
+   * would need a more extensive check to see which objects are animated. */
+  BL::Mesh b_mesh(PointerRNA_NULL);
+  if (ccl::BKE_object_is_deform_modified(b_ob_info, b_scene, preview)) {
+    /* get derived mesh */
+    b_mesh = object_to_mesh(b_data, b_ob_info, b_depsgraph, false, Mesh::SUBDIVISION_NONE);
+  }
+
+  const std::string ob_name = b_ob_info.real_object.name();
+
+  /* TODO(sergey): Perform preliminary check for number of vertices. */
+  if (b_mesh) {
+    /* Export deformed coordinates. */
+    /* Find attributes. */
+    Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+    Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+    Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
+    bool new_attribute = false;
+    /* Add new attributes if they don't exist already. */
+    if (!attr_mP) {
+      attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+      if (attr_N)
+        attr_mN = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_NORMAL);
+
+      new_attribute = true;
+    }
+    /* Load vertex data from mesh. */
+    float3 *mP = attr_mP->data_float3() + motion_step * numverts;
+    float3 *mN = (attr_mN) ? attr_mN->data_float3() + motion_step * numverts : NULL;
+    /* NOTE: We don't copy more that existing amount of vertices to prevent
+     * possible memory corruption.
+     */
+    BL::Mesh::vertices_iterator v;
+    int i = 0;
+    for (b_mesh.vertices.begin(v); v != b_mesh.vertices.end() && i < numverts; ++v, ++i) {
+      mP[i] = get_float3(v->co());
+      if (mN)
+        mN[i] = get_float3(v->normal());
+    }
+    if (new_attribute) {
+      /* In case of new attribute, we verify if there really was any motion. */
+      if (b_mesh.vertices.length() != numverts ||
+          memcmp(mP, &mesh->get_verts()[0], sizeof(float3) * numverts) == 0) {
+        /* no motion, remove attributes again */
+        if (b_mesh.vertices.length() != numverts) {
+          VLOG(1) << "Topology differs, disabling motion blur for object " << ob_name;
+        }
+        else {
+          VLOG(1) << "No actual deformation motion for object " << ob_name;
+        }
+        mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
+        if (attr_mN)
+          mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL);
+      }
+      else if (motion_step > 0) {
+        VLOG(1) << "Filling deformation motion for object " << ob_name;
+        /* motion, fill up previous steps that we might have skipped because
+         * they had no motion, but we need them anyway now */
+        float3 *P = &mesh->get_verts()[0];
+        float3 *N = (attr_N) ? attr_N->data_float3() : NULL;
+        for (int step = 0; step < motion_step; step++) {
+          memcpy(attr_mP->data_float3() + step * numverts, P, sizeof(float3) * numverts);
+          if (attr_mN)
+            memcpy(attr_mN->data_float3() + step * numverts, N, sizeof(float3) * numverts);
+        }
+      }
+    }
+    else {
+      if (b_mesh.vertices.length() != numverts) {
+        VLOG(1) << "Topology differs, discarding motion blur for object " << ob_name << " at time "
+                << motion_step;
+        memcpy(mP, &mesh->get_verts()[0], sizeof(float3) * numverts);
+        if (mN != NULL) {
+          memcpy(mN, attr_N->data_float3(), sizeof(float3) * numverts);
+        }
+      }
+    }
+
+    free_object_to_mesh(b_data, b_ob_info, b_mesh);
+    return;
+  }
+
+  /* No deformation on this frame, copy coordinates if other frames did have it. */
+  mesh->copy_center_to_motion_step(motion_step);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/object.cpp b/intern/cycles/blender/object.cpp
new file mode 100644
index 00000000000..9919b9d1836
--- /dev/null
+++ b/intern/cycles/blender/object.cpp
@@ -0,0 +1,769 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/object_cull.h"
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "scene/alembic.h"
+#include "scene/camera.h"
+#include "scene/integrator.h"
+#include "scene/light.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/particles.h"
+#include "scene/scene.h"
+#include "scene/shader.h"
+#include "scene/shader_graph.h"
+#include "scene/shader_nodes.h"
+
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/task.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Utilities */
+
+bool BlenderSync::BKE_object_is_modified(BL::Object &b_ob)
+{
+  /* test if we can instance or if the object is modified */
+  if (b_ob.type() == BL::Object::type_META) {
+    /* multi-user and dupli metaballs are fused, can't instance */
+    return true;
+  }
+  else if (ccl::BKE_object_is_modified(b_ob, b_scene, preview)) {
+    /* modifiers */
+    return true;
+  }
+  else {
+    /* object level material links */
+    for (BL::MaterialSlot &b_slot : b_ob.material_slots) {
+      if (b_slot.link() == BL::MaterialSlot::link_OBJECT) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool BlenderSync::object_is_geometry(BL::Object &b_ob)
+{
+  BL::ID b_ob_data = b_ob.data();
+
+  if (!b_ob_data) {
+    return false;
+  }
+
+  BL::Object::type_enum type = b_ob.type();
+
+  if (type == BL::Object::type_VOLUME || type == BL::Object::type_HAIR) {
+    /* Will be exported attached to mesh. */
+    return true;
+  }
+  else if (type == BL::Object::type_CURVE) {
+    /* Skip exporting curves without faces, overhead can be
+     * significant if there are many for path animation. */
+    BL::Curve b_curve(b_ob_data);
+
+    return (b_curve.bevel_object() || b_curve.extrude() != 0.0f || b_curve.bevel_depth() != 0.0f ||
+            b_curve.dimensions() == BL::Curve::dimensions_2D || b_ob.modifiers.length());
+  }
+  else {
+    return (b_ob_data.is_a(&RNA_Mesh) || b_ob_data.is_a(&RNA_Curve) ||
+            b_ob_data.is_a(&RNA_MetaBall));
+  }
+}
+
+bool BlenderSync::object_is_light(BL::Object &b_ob)
+{
+  BL::ID b_ob_data = b_ob.data();
+
+  return (b_ob_data && b_ob_data.is_a(&RNA_Light));
+}
+
+void BlenderSync::sync_object_motion_init(BL::Object &b_parent, BL::Object &b_ob, Object *object)
+{
+  /* Initialize motion blur for object, detecting if it's enabled and creating motion
+   * steps array if so. */
+  array<Transform> motion;
+  object->set_motion(motion);
+
+  Geometry *geom = object->get_geometry();
+  if (!geom) {
+    return;
+  }
+
+  int motion_steps = 0;
+  bool use_motion_blur = false;
+
+  Scene::MotionType need_motion = scene->need_motion();
+  if (need_motion == Scene::MOTION_BLUR) {
+    motion_steps = object_motion_steps(b_parent, b_ob, Object::MAX_MOTION_STEPS);
+    if (motion_steps && object_use_deform_motion(b_parent, b_ob)) {
+      use_motion_blur = true;
+    }
+  }
+  else if (need_motion != Scene::MOTION_NONE) {
+    motion_steps = 3;
+  }
+
+  geom->set_use_motion_blur(use_motion_blur);
+  geom->set_motion_steps(motion_steps);
+
+  motion.resize(motion_steps, transform_empty());
+
+  if (motion_steps) {
+    motion[motion_steps / 2] = object->get_tfm();
+
+    /* update motion socket before trying to access object->motion_time */
+    object->set_motion(motion);
+
+    for (size_t step = 0; step < motion_steps; step++) {
+      motion_times.insert(object->motion_time(step));
+    }
+  }
+}
+
+Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
+                                 BL::ViewLayer &b_view_layer,
+                                 BL::DepsgraphObjectInstance &b_instance,
+                                 float motion_time,
+                                 bool use_particle_hair,
+                                 bool show_lights,
+                                 BlenderObjectCulling &culling,
+                                 bool *use_portal,
+                                 TaskPool *geom_task_pool)
+{
+  const bool is_instance = b_instance.is_instance();
+  BL::Object b_ob = b_instance.object();
+  BL::Object b_parent = is_instance ? b_instance.parent() : b_instance.object();
+  BObjectInfo b_ob_info{b_ob, is_instance ? b_instance.instance_object() : b_ob, b_ob.data()};
+  const bool motion = motion_time != 0.0f;
+  /*const*/ Transform tfm = get_transform(b_ob.matrix_world());
+  int *persistent_id = NULL;
+  BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id_array;
+  if (is_instance) {
+    persistent_id_array = b_instance.persistent_id();
+    persistent_id = persistent_id_array.data;
+  }
+
+  /* light is handled separately */
+  if (!motion && object_is_light(b_ob)) {
+    if (!show_lights) {
+      return NULL;
+    }
+
+    /* TODO: don't use lights for excluded layers used as mask layer,
+     * when dynamic overrides are back. */
+#if 0
+    if (!((layer_flag & view_layer.holdout_layer) && (layer_flag & view_layer.exclude_layer)))
+#endif
+    {
+      sync_light(b_parent,
+                 persistent_id,
+                 b_ob_info,
+                 is_instance ? b_instance.random_id() : 0,
+                 tfm,
+                 use_portal);
+    }
+
+    return NULL;
+  }
+
+  /* only interested in object that we can create meshes from */
+  if (!object_is_geometry(b_ob)) {
+    return NULL;
+  }
+
+  /* Perform object culling. */
+  if (culling.test(scene, b_ob, tfm)) {
+    return NULL;
+  }
+
+  /* Visibility flags for both parent and child. */
+  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+  bool use_holdout = b_parent.holdout_get(PointerRNA_NULL, b_view_layer);
+  uint visibility = object_ray_visibility(b_ob) & PATH_RAY_ALL_VISIBILITY;
+
+  if (b_parent.ptr.data != b_ob.ptr.data) {
+    visibility &= object_ray_visibility(b_parent);
+  }
+
+  /* TODO: make holdout objects on excluded layer invisible for non-camera rays. */
+#if 0
+  if (use_holdout && (layer_flag & view_layer.exclude_layer)) {
+    visibility &= ~(PATH_RAY_ALL_VISIBILITY - PATH_RAY_CAMERA);
+  }
+#endif
+
+  /* Clear camera visibility for indirect only objects. */
+  bool use_indirect_only = !use_holdout &&
+                           b_parent.indirect_only_get(PointerRNA_NULL, b_view_layer);
+  if (use_indirect_only) {
+    visibility &= ~PATH_RAY_CAMERA;
+  }
+
+  /* Don't export completely invisible objects. */
+  if (visibility == 0) {
+    return NULL;
+  }
+
+  /* Use task pool only for non-instances, since sync_dupli_particle accesses
+   * geometry. This restriction should be removed for better performance. */
+  TaskPool *object_geom_task_pool = (is_instance) ? NULL : geom_task_pool;
+
+  /* key to lookup object */
+  ObjectKey key(b_parent, persistent_id, b_ob_info.real_object, use_particle_hair);
+  Object *object;
+
+  /* motion vector case */
+  if (motion) {
+    object = object_map.find(key);
+
+    if (object && object->use_motion()) {
+      /* Set transform at matching motion time step. */
+      int time_index = object->motion_step(motion_time);
+      if (time_index >= 0) {
+        array<Transform> motion = object->get_motion();
+        motion[time_index] = tfm;
+        object->set_motion(motion);
+      }
+
+      /* mesh deformation */
+      if (object->get_geometry())
+        sync_geometry_motion(
+            b_depsgraph, b_ob_info, object, motion_time, use_particle_hair, object_geom_task_pool);
+    }
+
+    return object;
+  }
+
+  /* test if we need to sync */
+  bool object_updated = object_map.add_or_update(&object, b_ob, b_parent, key) ||
+                        (tfm != object->get_tfm());
+
+  /* mesh sync */
+  Geometry *geometry = sync_geometry(
+      b_depsgraph, b_ob_info, object_updated, use_particle_hair, object_geom_task_pool);
+  object->set_geometry(geometry);
+
+  /* special case not tracked by object update flags */
+
+  if (sync_object_attributes(b_instance, object)) {
+    object_updated = true;
+  }
+
+  /* holdout */
+  object->set_use_holdout(use_holdout);
+
+  object->set_visibility(visibility);
+
+  object->set_is_shadow_catcher(b_ob.is_shadow_catcher());
+
+  float shadow_terminator_shading_offset = get_float(cobject, "shadow_terminator_offset");
+  object->set_shadow_terminator_shading_offset(shadow_terminator_shading_offset);
+
+  float shadow_terminator_geometry_offset = get_float(cobject,
+                                                      "shadow_terminator_geometry_offset");
+  object->set_shadow_terminator_geometry_offset(shadow_terminator_geometry_offset);
+
+  float ao_distance = get_float(cobject, "ao_distance");
+  if (ao_distance == 0.0f && b_parent.ptr.data != b_ob.ptr.data) {
+    PointerRNA cparent = RNA_pointer_get(&b_parent.ptr, "cycles");
+    ao_distance = get_float(cparent, "ao_distance");
+  }
+  object->set_ao_distance(ao_distance);
+
+  /* sync the asset name for Cryptomatte */
+  BL::Object parent = b_ob.parent();
+  ustring parent_name;
+  if (parent) {
+    while (parent.parent()) {
+      parent = parent.parent();
+    }
+    parent_name = parent.name();
+  }
+  else {
+    parent_name = b_ob.name();
+  }
+  object->set_asset_name(parent_name);
+
+  /* object sync
+   * transform comparison should not be needed, but duplis don't work perfect
+   * in the depsgraph and may not signal changes, so this is a workaround */
+  if (object->is_modified() || object_updated ||
+      (object->get_geometry() && object->get_geometry()->is_modified())) {
+    object->name = b_ob.name().c_str();
+    object->set_pass_id(b_ob.pass_index());
+    object->set_color(get_float3(b_ob.color()));
+    object->set_tfm(tfm);
+
+    /* dupli texture coordinates and random_id */
+    if (is_instance) {
+      object->set_dupli_generated(0.5f * get_float3(b_instance.orco()) -
+                                  make_float3(0.5f, 0.5f, 0.5f));
+      object->set_dupli_uv(get_float2(b_instance.uv()));
+      object->set_random_id(b_instance.random_id());
+    }
+    else {
+      object->set_dupli_generated(zero_float3());
+      object->set_dupli_uv(zero_float2());
+      object->set_random_id(hash_uint2(hash_string(object->name.c_str()), 0));
+    }
+
+    object->tag_update(scene);
+  }
+
+  sync_object_motion_init(b_parent, b_ob, object);
+
+  if (is_instance) {
+    /* Sync possible particle data. */
+    sync_dupli_particle(b_parent, b_instance, object);
+  }
+
+  return object;
+}
+
+/* This function mirrors drw_uniform_property_lookup in draw_instance_data.cpp */
+static bool lookup_property(BL::ID b_id, const string &name, float4 *r_value)
+{
+  PointerRNA ptr;
+  PropertyRNA *prop;
+
+  if (!RNA_path_resolve(&b_id.ptr, name.c_str(), &ptr, &prop)) {
+    return false;
+  }
+
+  if (prop == NULL) {
+    return false;
+  }
+
+  PropertyType type = RNA_property_type(prop);
+  int arraylen = RNA_property_array_length(&ptr, prop);
+
+  if (arraylen == 0) {
+    float value;
+
+    if (type == PROP_FLOAT)
+      value = RNA_property_float_get(&ptr, prop);
+    else if (type == PROP_INT)
+      value = static_cast<float>(RNA_property_int_get(&ptr, prop));
+    else
+      return false;
+
+    *r_value = make_float4(value, value, value, 1.0f);
+    return true;
+  }
+  else if (type == PROP_FLOAT && arraylen <= 4) {
+    *r_value = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+    RNA_property_float_get_array(&ptr, prop, &r_value->x);
+    return true;
+  }
+
+  return false;
+}
+
+/* This function mirrors drw_uniform_attribute_lookup in draw_instance_data.cpp */
+static float4 lookup_instance_property(BL::DepsgraphObjectInstance &b_instance,
+                                       const string &name,
+                                       bool use_instancer)
+{
+  string idprop_name = string_printf("[\"%s\"]", name.c_str());
+  float4 value;
+
+  /* If requesting instance data, check the parent particle system and object. */
+  if (use_instancer && b_instance.is_instance()) {
+    BL::ParticleSystem b_psys = b_instance.particle_system();
+
+    if (b_psys) {
+      if (lookup_property(b_psys.settings(), idprop_name, &value) ||
+          lookup_property(b_psys.settings(), name, &value)) {
+        return value;
+      }
+    }
+    if (lookup_property(b_instance.parent(), idprop_name, &value) ||
+        lookup_property(b_instance.parent(), name, &value)) {
+      return value;
+    }
+  }
+
+  /* Check the object and mesh. */
+  BL::Object b_ob = b_instance.object();
+  BL::ID b_data = b_ob.data();
+
+  if (lookup_property(b_ob, idprop_name, &value) || lookup_property(b_ob, name, &value) ||
+      lookup_property(b_data, idprop_name, &value) || lookup_property(b_data, name, &value)) {
+    return value;
+  }
+
+  return make_float4(0.0f);
+}
+
+bool BlenderSync::sync_object_attributes(BL::DepsgraphObjectInstance &b_instance, Object *object)
+{
+  /* Find which attributes are needed. */
+  AttributeRequestSet requests = object->get_geometry()->needed_attributes();
+
+  /* Delete attributes that became unnecessary. */
+  vector<ParamValue> &attributes = object->attributes;
+  bool changed = false;
+
+  for (int i = attributes.size() - 1; i >= 0; i--) {
+    if (!requests.find(attributes[i].name())) {
+      attributes.erase(attributes.begin() + i);
+      changed = true;
+    }
+  }
+
+  /* Update attribute values. */
+  foreach (AttributeRequest &req, requests.requests) {
+    ustring name = req.name;
+
+    std::string real_name;
+    BlenderAttributeType type = blender_attribute_name_split_type(name, &real_name);
+
+    if (type != BL::ShaderNodeAttribute::attribute_type_GEOMETRY) {
+      bool use_instancer = (type == BL::ShaderNodeAttribute::attribute_type_INSTANCER);
+      float4 value = lookup_instance_property(b_instance, real_name, use_instancer);
+
+      /* Try finding the existing attribute value. */
+      ParamValue *param = NULL;
+
+      for (size_t i = 0; i < attributes.size(); i++) {
+        if (attributes[i].name() == name) {
+          param = &attributes[i];
+          break;
+        }
+      }
+
+      /* Replace or add the value. */
+      ParamValue new_param(name, TypeDesc::TypeFloat4, 1, &value);
+      assert(new_param.datasize() == sizeof(value));
+
+      if (!param) {
+        changed = true;
+        attributes.push_back(new_param);
+      }
+      else if (memcmp(param->data(), &value, sizeof(value)) != 0) {
+        changed = true;
+        *param = new_param;
+      }
+    }
+  }
+
+  return changed;
+}
+
+/* Object Loop */
+
+void BlenderSync::sync_procedural(BL::Object &b_ob,
+                                  BL::MeshSequenceCacheModifier &b_mesh_cache,
+                                  bool has_subdivision_modifier)
+{
+#ifdef WITH_ALEMBIC
+  BL::CacheFile cache_file = b_mesh_cache.cache_file();
+  void *cache_file_key = cache_file.ptr.data;
+
+  AlembicProcedural *procedural = static_cast<AlembicProcedural *>(
+      procedural_map.find(cache_file_key));
+
+  if (procedural == nullptr) {
+    procedural = scene->create_node<AlembicProcedural>();
+    procedural_map.add(cache_file_key, procedural);
+  }
+  else {
+    procedural_map.used(procedural);
+  }
+
+  float current_frame = static_cast<float>(b_scene.frame_current());
+  if (cache_file.override_frame()) {
+    current_frame = cache_file.frame();
+  }
+
+  if (!cache_file.override_frame()) {
+    procedural->set_start_frame(static_cast<float>(b_scene.frame_start()));
+    procedural->set_end_frame(static_cast<float>(b_scene.frame_end()));
+  }
+
+  procedural->set_frame(current_frame);
+  procedural->set_frame_rate(b_scene.render().fps() / b_scene.render().fps_base());
+  procedural->set_frame_offset(cache_file.frame_offset());
+
+  string absolute_path = blender_absolute_path(b_data, b_ob, b_mesh_cache.cache_file().filepath());
+  procedural->set_filepath(ustring(absolute_path));
+
+  procedural->set_scale(cache_file.scale());
+
+  procedural->set_use_prefetch(cache_file.use_prefetch());
+  procedural->set_prefetch_cache_size(cache_file.prefetch_cache_size());
+
+  /* create or update existing AlembicObjects */
+  ustring object_path = ustring(b_mesh_cache.object_path());
+
+  AlembicObject *abc_object = procedural->get_or_create_object(object_path);
+
+  array<Node *> used_shaders = find_used_shaders(b_ob);
+  abc_object->set_used_shaders(used_shaders);
+
+  PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
+  const float subd_dicing_rate = max(0.1f, RNA_float_get(&cobj, "dicing_rate") * dicing_rate);
+  abc_object->set_subd_dicing_rate(subd_dicing_rate);
+  abc_object->set_subd_max_level(max_subdivisions);
+
+  abc_object->set_ignore_subdivision(!has_subdivision_modifier);
+
+  if (abc_object->is_modified() || procedural->is_modified()) {
+    procedural->tag_update(scene);
+  }
+#else
+  (void)b_ob;
+  (void)b_mesh_cache;
+  (void)has_subdivision_modifier;
+#endif
+}
+
+void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
+                               BL::SpaceView3D &b_v3d,
+                               float motion_time)
+{
+  /* Task pool for multithreaded geometry sync. */
+  TaskPool geom_task_pool;
+
+  /* layer data */
+  bool motion = motion_time != 0.0f;
+
+  if (!motion) {
+    /* prepare for sync */
+    light_map.pre_sync();
+    geometry_map.pre_sync();
+    object_map.pre_sync();
+    procedural_map.pre_sync();
+    particle_system_map.pre_sync();
+    motion_times.clear();
+  }
+  else {
+    geometry_motion_synced.clear();
+  }
+
+  /* initialize culling */
+  BlenderObjectCulling culling(scene, b_scene);
+
+  /* object loop */
+  bool cancel = false;
+  bool use_portal = false;
+  const bool show_lights = BlenderViewportParameters(b_v3d, use_developer_ui).use_scene_lights;
+
+  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+  BL::Depsgraph::object_instances_iterator b_instance_iter;
+
+  for (b_depsgraph.object_instances.begin(b_instance_iter);
+       b_instance_iter != b_depsgraph.object_instances.end() && !cancel;
+       ++b_instance_iter) {
+    BL::DepsgraphObjectInstance b_instance = *b_instance_iter;
+    BL::Object b_ob = b_instance.object();
+
+    /* Viewport visibility. */
+    const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d);
+    if (show_in_viewport == false) {
+      continue;
+    }
+
+    /* Load per-object culling data. */
+    culling.init_object(scene, b_ob);
+
+    /* Ensure the object geom supporting the hair is processed before adding
+     * the hair processing task to the task pool, calling .to_mesh() on the
+     * same object in parallel does not work. */
+    const bool sync_hair = b_instance.show_particles() && object_has_particle_hair(b_ob);
+
+    /* Object itself. */
+    if (b_instance.show_self()) {
+#ifdef WITH_ALEMBIC
+      bool use_procedural = false;
+      bool has_subdivision_modifier = false;
+      BL::MeshSequenceCacheModifier b_mesh_cache(PointerRNA_NULL);
+
+      /* Experimental as Blender does not have good support for procedurals at the moment, also
+       * only available in preview renders since currently do not have a good cache policy, the
+       * data being loaded at once for all the frames. */
+      if (experimental && b_v3d) {
+        b_mesh_cache = object_mesh_cache_find(b_ob, &has_subdivision_modifier);
+        use_procedural = b_mesh_cache && b_mesh_cache.cache_file().use_render_procedural();
+      }
+
+      if (use_procedural) {
+        /* Skip in the motion case, as generating motion blur data will be handled in the
+         * procedural. */
+        if (!motion) {
+          sync_procedural(b_ob, b_mesh_cache, has_subdivision_modifier);
+        }
+      }
+      else
+#endif
+      {
+        sync_object(b_depsgraph,
+                    b_view_layer,
+                    b_instance,
+                    motion_time,
+                    false,
+                    show_lights,
+                    culling,
+                    &use_portal,
+                    sync_hair ? NULL : &geom_task_pool);
+      }
+    }
+
+    /* Particle hair as separate object. */
+    if (sync_hair) {
+      sync_object(b_depsgraph,
+                  b_view_layer,
+                  b_instance,
+                  motion_time,
+                  true,
+                  show_lights,
+                  culling,
+                  &use_portal,
+                  &geom_task_pool);
+    }
+
+    cancel = progress.get_cancel();
+  }
+
+  geom_task_pool.wait_work();
+
+  progress.set_sync_status("");
+
+  if (!cancel && !motion) {
+    sync_background_light(b_v3d, use_portal);
+
+    /* Handle removed data and modified pointers, as this may free memory, delete Nodes in the
+     * right order to ensure that dependent data is freed after their users. Objects should be
+     * freed before particle systems and geometries. */
+    light_map.post_sync();
+    object_map.post_sync();
+    geometry_map.post_sync();
+    particle_system_map.post_sync();
+    procedural_map.post_sync();
+  }
+
+  if (motion)
+    geometry_motion_synced.clear();
+}
+
+void BlenderSync::sync_motion(BL::RenderSettings &b_render,
+                              BL::Depsgraph &b_depsgraph,
+                              BL::SpaceView3D &b_v3d,
+                              BL::Object &b_override,
+                              int width,
+                              int height,
+                              void **python_thread_state)
+{
+  if (scene->need_motion() == Scene::MOTION_NONE)
+    return;
+
+  /* get camera object here to deal with camera switch */
+  BL::Object b_cam = b_scene.camera();
+  if (b_override)
+    b_cam = b_override;
+
+  int frame_center = b_scene.frame_current();
+  float subframe_center = b_scene.frame_subframe();
+  float frame_center_delta = 0.0f;
+
+  if (scene->need_motion() != Scene::MOTION_PASS &&
+      scene->camera->get_motion_position() != Camera::MOTION_POSITION_CENTER) {
+    float shuttertime = scene->camera->get_shuttertime();
+    if (scene->camera->get_motion_position() == Camera::MOTION_POSITION_END) {
+      frame_center_delta = -shuttertime * 0.5f;
+    }
+    else {
+      assert(scene->camera->get_motion_position() == Camera::MOTION_POSITION_START);
+      frame_center_delta = shuttertime * 0.5f;
+    }
+
+    float time = frame_center + subframe_center + frame_center_delta;
+    int frame = (int)floorf(time);
+    float subframe = time - frame;
+    python_thread_state_restore(python_thread_state);
+    b_engine.frame_set(frame, subframe);
+    python_thread_state_save(python_thread_state);
+    if (b_cam) {
+      sync_camera_motion(b_render, b_cam, width, height, 0.0f);
+    }
+    sync_objects(b_depsgraph, b_v3d);
+  }
+
+  /* Insert motion times from camera. Motion times from other objects
+   * have already been added in a sync_objects call. */
+  if (b_cam) {
+    uint camera_motion_steps = object_motion_steps(b_cam, b_cam);
+    for (size_t step = 0; step < camera_motion_steps; step++) {
+      motion_times.insert(scene->camera->motion_time(step));
+    }
+  }
+
+  /* Check which geometry already has motion blur so it can be skipped. */
+  geometry_motion_attribute_synced.clear();
+  for (Geometry *geom : scene->geometry) {
+    if (geom->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
+      geometry_motion_attribute_synced.insert(geom);
+    }
+  }
+
+  /* note iteration over motion_times set happens in sorted order */
+  foreach (float relative_time, motion_times) {
+    /* center time is already handled. */
+    if (relative_time == 0.0f) {
+      continue;
+    }
+
+    VLOG(1) << "Synchronizing motion for the relative time " << relative_time << ".";
+
+    /* fixed shutter time to get previous and next frame for motion pass */
+    float shuttertime = scene->motion_shutter_time();
+
+    /* compute frame and subframe time */
+    float time = frame_center + subframe_center + frame_center_delta +
+                 relative_time * shuttertime * 0.5f;
+    int frame = (int)floorf(time);
+    float subframe = time - frame;
+
+    /* change frame */
+    python_thread_state_restore(python_thread_state);
+    b_engine.frame_set(frame, subframe);
+    python_thread_state_save(python_thread_state);
+
+    /* Syncs camera motion if relative_time is one of the camera's motion times. */
+    sync_camera_motion(b_render, b_cam, width, height, relative_time);
+
+    /* sync object */
+    sync_objects(b_depsgraph, b_v3d, relative_time);
+  }
+
+  geometry_motion_attribute_synced.clear();
+
+  /* we need to set the python thread state again because this
+   * function assumes it is being executed from python and will
+   * try to save the thread state */
+  python_thread_state_restore(python_thread_state);
+  b_engine.frame_set(frame_center, subframe_center);
+  python_thread_state_save(python_thread_state);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/object_cull.cpp b/intern/cycles/blender/object_cull.cpp
new file mode 100644
index 00000000000..c2493be26dd
--- /dev/null
+++ b/intern/cycles/blender/object_cull.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdlib>
+
+#include "scene/camera.h"
+
+#include "blender/object_cull.h"
+#include "blender/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+BlenderObjectCulling::BlenderObjectCulling(Scene *scene, BL::Scene &b_scene)
+    : use_scene_camera_cull_(false),
+      use_camera_cull_(false),
+      camera_cull_margin_(0.0f),
+      use_scene_distance_cull_(false),
+      use_distance_cull_(false),
+      distance_cull_margin_(0.0f)
+{
+  if (b_scene.render().use_simplify()) {
+    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+    use_scene_camera_cull_ = scene->camera->get_camera_type() != CAMERA_PANORAMA &&
+                             !b_scene.render().use_multiview() &&
+                             get_boolean(cscene, "use_camera_cull");
+    use_scene_distance_cull_ = scene->camera->get_camera_type() != CAMERA_PANORAMA &&
+                               !b_scene.render().use_multiview() &&
+                               get_boolean(cscene, "use_distance_cull");
+
+    camera_cull_margin_ = get_float(cscene, "camera_cull_margin");
+    distance_cull_margin_ = get_float(cscene, "distance_cull_margin");
+
+    if (distance_cull_margin_ == 0.0f) {
+      use_scene_distance_cull_ = false;
+    }
+  }
+}
+
+void BlenderObjectCulling::init_object(Scene *scene, BL::Object &b_ob)
+{
+  if (!use_scene_camera_cull_ && !use_scene_distance_cull_) {
+    return;
+  }
+
+  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+
+  use_camera_cull_ = use_scene_camera_cull_ && get_boolean(cobject, "use_camera_cull");
+  use_distance_cull_ = use_scene_distance_cull_ && get_boolean(cobject, "use_distance_cull");
+
+  if (use_camera_cull_ || use_distance_cull_) {
+    /* Need to have proper projection matrix. */
+    scene->camera->update(scene);
+  }
+}
+
+bool BlenderObjectCulling::test(Scene *scene, BL::Object &b_ob, Transform &tfm)
+{
+  if (!use_camera_cull_ && !use_distance_cull_) {
+    return false;
+  }
+
+  /* Compute world space bounding box corners. */
+  float3 bb[8];
+  BL::Array<float, 24> boundbox = b_ob.bound_box();
+  for (int i = 0; i < 8; ++i) {
+    float3 p = make_float3(boundbox[3 * i + 0], boundbox[3 * i + 1], boundbox[3 * i + 2]);
+    bb[i] = transform_point(&tfm, p);
+  }
+
+  bool camera_culled = use_camera_cull_ && test_camera(scene, bb);
+  bool distance_culled = use_distance_cull_ && test_distance(scene, bb);
+
+  return ((camera_culled && distance_culled) || (camera_culled && !use_distance_cull_) ||
+          (distance_culled && !use_camera_cull_));
+}
+
+/* TODO(sergey): Not really optimal, consider approaches based on k-DOP in order
+ * to reduce number of objects which are wrongly considered visible.
+ */
+bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
+{
+  Camera *cam = scene->camera;
+  const ProjectionTransform &worldtondc = cam->worldtondc;
+  float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
+         bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+  bool all_behind = true;
+  for (int i = 0; i < 8; ++i) {
+    float3 p = bb[i];
+    float4 b = make_float4(p.x, p.y, p.z, 1.0f);
+    float4 c = make_float4(
+        dot(worldtondc.x, b), dot(worldtondc.y, b), dot(worldtondc.z, b), dot(worldtondc.w, b));
+    p = float4_to_float3(c / c.w);
+    if (c.z < 0.0f) {
+      p.x = 1.0f - p.x;
+      p.y = 1.0f - p.y;
+    }
+    if (c.z >= -camera_cull_margin_) {
+      all_behind = false;
+    }
+    bb_min = min(bb_min, p);
+    bb_max = max(bb_max, p);
+  }
+  if (all_behind) {
+    return true;
+  }
+  return (bb_min.x >= 1.0f + camera_cull_margin_ || bb_min.y >= 1.0f + camera_cull_margin_ ||
+          bb_max.x <= -camera_cull_margin_ || bb_max.y <= -camera_cull_margin_);
+}
+
+bool BlenderObjectCulling::test_distance(Scene *scene, float3 bb[8])
+{
+  float3 camera_position = transform_get_column(&scene->camera->get_matrix(), 3);
+  float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
+         bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+
+  /* Find min & max points for x & y & z on bounding box */
+  for (int i = 0; i < 8; ++i) {
+    float3 p = bb[i];
+    bb_min = min(bb_min, p);
+    bb_max = max(bb_max, p);
+  }
+
+  float3 closest_point = max(min(bb_max, camera_position), bb_min);
+  return (len_squared(camera_position - closest_point) >
+          distance_cull_margin_ * distance_cull_margin_);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/object_cull.h b/intern/cycles/blender/object_cull.h
new file mode 100644
index 00000000000..be3068ef4e7
--- /dev/null
+++ b/intern/cycles/blender/object_cull.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_OBJECT_CULL_H__
+#define __BLENDER_OBJECT_CULL_H__
+
+#include "blender/sync.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Scene;
+
+class BlenderObjectCulling {
+ public:
+  BlenderObjectCulling(Scene *scene, BL::Scene &b_scene);
+
+  void init_object(Scene *scene, BL::Object &b_ob);
+  bool test(Scene *scene, BL::Object &b_ob, Transform &tfm);
+
+ private:
+  bool test_camera(Scene *scene, float3 bb[8]);
+  bool test_distance(Scene *scene, float3 bb[8]);
+
+  bool use_scene_camera_cull_;
+  bool use_camera_cull_;
+  float camera_cull_margin_;
+  bool use_scene_distance_cull_;
+  bool use_distance_cull_;
+  float distance_cull_margin_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_OBJECT_CULL_H__ */
diff --git a/intern/cycles/blender/output_driver.cpp b/intern/cycles/blender/output_driver.cpp
new file mode 100644
index 00000000000..2b3586af668
--- /dev/null
+++ b/intern/cycles/blender/output_driver.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/output_driver.h"
+
+CCL_NAMESPACE_BEGIN
+
+BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
+{
+}
+
+BlenderOutputDriver::~BlenderOutputDriver()
+{
+}
+
+bool BlenderOutputDriver::read_render_tile(const Tile &tile)
+{
+  /* Get render result. */
+  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
+                                                 tile.offset.y,
+                                                 tile.size.x,
+                                                 tile.size.y,
+                                                 tile.layer.c_str(),
+                                                 tile.view.c_str());
+
+  /* Can happen if the intersected rectangle gives 0 width or height. */
+  if (b_rr.ptr.data == NULL) {
+    return false;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end()) {
+    return false;
+  }
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  vector<float> pixels(tile.size.x * tile.size.y * 4);
+
+  /* Copy each pass.
+   * TODO:copy only the required ones for better performance? */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
+  }
+
+  b_engine_.end_result(b_rr, false, false, false);
+
+  return true;
+}
+
+bool BlenderOutputDriver::update_render_tile(const Tile &tile)
+{
+  /* Use final write for preview renders, otherwise render result wouldn't be be updated
+   * quickly on Blender side. For all other cases we use the display driver. */
+  if (b_engine_.is_preview()) {
+    write_render_tile(tile);
+    return true;
+  }
+
+  /* Don't highlight full-frame tile. */
+  if (!(tile.size == tile.full_size)) {
+    b_engine_.tile_highlight_clear_all();
+    b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
+  }
+
+  return false;
+}
+
+void BlenderOutputDriver::write_render_tile(const Tile &tile)
+{
+  b_engine_.tile_highlight_clear_all();
+
+  /* Get render result. */
+  BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
+                                                 tile.offset.y,
+                                                 tile.size.x,
+                                                 tile.size.y,
+                                                 tile.layer.c_str(),
+                                                 tile.view.c_str());
+
+  /* Can happen if the intersected rectangle gives 0 width or height. */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* Layer will be missing if it was disabled in the UI. */
+  if (b_single_rlay == b_rr.layers.end()) {
+    return;
+  }
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  vector<float> pixels(tile.size.x * tile.size.y * 4);
+
+  /* Copy each pass. */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
+      memset(&pixels[0], 0, pixels.size() * sizeof(float));
+    }
+
+    b_pass.rect(&pixels[0]);
+  }
+
+  b_engine_.end_result(b_rr, true, false, true);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/output_driver.h b/intern/cycles/blender/output_driver.h
new file mode 100644
index 00000000000..1d016f8bcb9
--- /dev/null
+++ b/intern/cycles/blender/output_driver.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "RNA_blender_cpp.h"
+
+#include "session/output_driver.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderOutputDriver : public OutputDriver {
+ public:
+  explicit BlenderOutputDriver(BL::RenderEngine &b_engine);
+  ~BlenderOutputDriver();
+
+  virtual void write_render_tile(const Tile &tile) override;
+  virtual bool update_render_tile(const Tile &tile) override;
+  virtual bool read_render_tile(const Tile &tile) override;
+
+ protected:
+  BL::RenderEngine b_engine_;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/particles.cpp b/intern/cycles/blender/particles.cpp
new file mode 100644
index 00000000000..3a2c1b0ecf9
--- /dev/null
+++ b/intern/cycles/blender/particles.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/particles.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "util/foreach.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Utilities */
+
+bool BlenderSync::sync_dupli_particle(BL::Object &b_ob,
+                                      BL::DepsgraphObjectInstance &b_instance,
+                                      Object *object)
+{
+  /* Test if this dupli was generated from a particle system. */
+  BL::ParticleSystem b_psys = b_instance.particle_system();
+  if (!b_psys)
+    return false;
+
+  object->set_hide_on_missing_motion(true);
+
+  /* test if we need particle data */
+  if (!object->get_geometry()->need_attribute(scene, ATTR_STD_PARTICLE))
+    return false;
+
+  /* don't handle child particles yet */
+  BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_instance.persistent_id();
+
+  if (persistent_id[0] >= b_psys.particles.length())
+    return false;
+
+  /* find particle system */
+  ParticleSystemKey key(b_ob, persistent_id);
+  ParticleSystem *psys;
+
+  bool first_use = !particle_system_map.is_used(key);
+  bool need_update = particle_system_map.add_or_update(&psys, b_ob, b_instance.object(), key);
+
+  /* no update needed? */
+  if (!need_update && !object->get_geometry()->is_modified() &&
+      !scene->object_manager->need_update())
+    return true;
+
+  /* first time used in this sync loop? clear and tag update */
+  if (first_use) {
+    psys->particles.clear();
+    psys->tag_update(scene);
+  }
+
+  /* add particle */
+  BL::Particle b_pa = b_psys.particles[persistent_id[0]];
+  Particle pa;
+
+  pa.index = persistent_id[0];
+  pa.age = b_scene.frame_current_final() - b_pa.birth_time();
+  pa.lifetime = b_pa.lifetime();
+  pa.location = get_float3(b_pa.location());
+  pa.rotation = get_float4(b_pa.rotation());
+  pa.size = b_pa.size();
+  pa.velocity = get_float3(b_pa.velocity());
+  pa.angular_velocity = get_float3(b_pa.angular_velocity());
+
+  psys->particles.push_back_slow(pa);
+
+  object->set_particle_system(psys);
+  object->set_particle_index(psys->particles.size() - 1);
+
+  if (object->particle_index_is_modified())
+    scene->object_manager->tag_update(scene, ObjectManager::PARTICLE_MODIFIED);
+
+  /* return that this object has particle data */
+  return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/python.cpp b/intern/cycles/blender/python.cpp
new file mode 100644
index 00000000000..20bf6385999
--- /dev/null
+++ b/intern/cycles/blender/python.cpp
@@ -0,0 +1,1063 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Python.h>
+
+#include "blender/CCL_api.h"
+
+#include "blender/device.h"
+#include "blender/session.h"
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "session/denoising.h"
+#include "session/merge.h"
+
+#include "util/debug.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/md5.h"
+#include "util/opengl.h"
+#include "util/openimagedenoise.h"
+#include "util/path.h"
+#include "util/string.h"
+#include "util/task.h"
+#include "util/tbb.h"
+#include "util/types.h"
+
+#ifdef WITH_OSL
+#  include "scene/osl.h"
+
+#  include <OSL/oslconfig.h>
+#  include <OSL/oslquery.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+namespace {
+
+/* Flag describing whether debug flags were synchronized from scene. */
+bool debug_flags_set = false;
+
+void *pylong_as_voidptr_typesafe(PyObject *object)
+{
+  if (object == Py_None)
+    return NULL;
+  return PyLong_AsVoidPtr(object);
+}
+
+PyObject *pyunicode_from_string(const char *str)
+{
+  /* Ignore errors if device API returns invalid UTF-8 strings. */
+  return PyUnicode_DecodeUTF8(str, strlen(str), "ignore");
+}
+
+/* Synchronize debug flags from a given Blender scene.
+ * Return truth when device list needs invalidation.
+ */
+static void debug_flags_sync_from_scene(BL::Scene b_scene)
+{
+  DebugFlagsRef flags = DebugFlags();
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  /* Synchronize shared flags. */
+  flags.viewport_static_bvh = get_enum(cscene, "debug_bvh_type");
+  /* Synchronize CPU flags. */
+  flags.cpu.avx2 = get_boolean(cscene, "debug_use_cpu_avx2");
+  flags.cpu.avx = get_boolean(cscene, "debug_use_cpu_avx");
+  flags.cpu.sse41 = get_boolean(cscene, "debug_use_cpu_sse41");
+  flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
+  flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
+  flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
+  /* Synchronize CUDA flags. */
+  flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
+  /* Synchronize OptiX flags. */
+  flags.optix.use_debug = get_boolean(cscene, "debug_use_optix_debug");
+}
+
+/* Reset debug flags to default values.
+ * Return truth when device list needs invalidation.
+ */
+static void debug_flags_reset()
+{
+  DebugFlagsRef flags = DebugFlags();
+  flags.reset();
+}
+
+} /* namespace */
+
+void python_thread_state_save(void **python_thread_state)
+{
+  *python_thread_state = (void *)PyEval_SaveThread();
+}
+
+void python_thread_state_restore(void **python_thread_state)
+{
+  PyEval_RestoreThread((PyThreadState *)*python_thread_state);
+  *python_thread_state = NULL;
+}
+
+static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
+{
+  const char *result = PyUnicode_AsUTF8(py_str);
+  if (result) {
+    /* 99% of the time this is enough but we better support non unicode
+     * chars since blender doesn't limit this.
+     */
+    return result;
+  }
+  else {
+    PyErr_Clear();
+    if (PyBytes_Check(py_str)) {
+      return PyBytes_AS_STRING(py_str);
+    }
+    else if ((*coerce = PyUnicode_EncodeFSDefault(py_str))) {
+      return PyBytes_AS_STRING(*coerce);
+    }
+    else {
+      /* Clear the error, so Cycles can be at least used without
+       * GPU and OSL support,
+       */
+      PyErr_Clear();
+      return "";
+    }
+  }
+}
+
+static PyObject *init_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *path, *user_path, *temp_path;
+  int headless;
+
+  if (!PyArg_ParseTuple(args, "OOOi", &path, &user_path, &temp_path, &headless)) {
+    return nullptr;
+  }
+
+  PyObject *path_coerce = nullptr, *user_path_coerce = nullptr, *temp_path_coerce = nullptr;
+  path_init(PyC_UnicodeAsByte(path, &path_coerce),
+            PyC_UnicodeAsByte(user_path, &user_path_coerce),
+            PyC_UnicodeAsByte(temp_path, &temp_path_coerce));
+  Py_XDECREF(path_coerce);
+  Py_XDECREF(user_path_coerce);
+  Py_XDECREF(temp_path_coerce);
+
+  BlenderSession::headless = headless;
+
+  DebugFlags().running_inside_blender = true;
+
+  VLOG(2) << "Debug flags initialized to:\n" << DebugFlags();
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *exit_func(PyObject * /*self*/, PyObject * /*args*/)
+{
+  ShaderManager::free_memory();
+  TaskScheduler::free_memory();
+  Device::free_memory();
+  Py_RETURN_NONE;
+}
+
+static PyObject *create_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pyengine, *pypreferences, *pydata, *pyscreen, *pyregion, *pyv3d, *pyrv3d;
+  int preview_osl;
+
+  if (!PyArg_ParseTuple(args,
+                        "OOOOOOOi",
+                        &pyengine,
+                        &pypreferences,
+                        &pydata,
+                        &pyscreen,
+                        &pyregion,
+                        &pyv3d,
+                        &pyrv3d,
+                        &preview_osl)) {
+    return NULL;
+  }
+
+  /* RNA */
+  ID *bScreen = (ID *)PyLong_AsVoidPtr(pyscreen);
+
+  PointerRNA engineptr;
+  RNA_pointer_create(NULL, &RNA_RenderEngine, (void *)PyLong_AsVoidPtr(pyengine), &engineptr);
+  BL::RenderEngine engine(engineptr);
+
+  PointerRNA preferencesptr;
+  RNA_pointer_create(
+      NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
+  BL::Preferences preferences(preferencesptr);
+
+  PointerRNA dataptr;
+  RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
+  BL::BlendData data(dataptr);
+
+  PointerRNA regionptr;
+  RNA_pointer_create(bScreen, &RNA_Region, pylong_as_voidptr_typesafe(pyregion), &regionptr);
+  BL::Region region(regionptr);
+
+  PointerRNA v3dptr;
+  RNA_pointer_create(bScreen, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d), &v3dptr);
+  BL::SpaceView3D v3d(v3dptr);
+
+  PointerRNA rv3dptr;
+  RNA_pointer_create(bScreen, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr);
+  BL::RegionView3D rv3d(rv3dptr);
+
+  /* create session */
+  BlenderSession *session;
+
+  if (rv3d) {
+    /* interactive viewport session */
+    int width = region.width();
+    int height = region.height();
+
+    session = new BlenderSession(engine, preferences, data, v3d, rv3d, width, height);
+  }
+  else {
+    /* offline session or preview render */
+    session = new BlenderSession(engine, preferences, data, preview_osl);
+  }
+
+  return PyLong_FromVoidPtr(session);
+}
+
+static PyObject *free_func(PyObject * /*self*/, PyObject *value)
+{
+  delete (BlenderSession *)PyLong_AsVoidPtr(value);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *render_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pysession, *pydepsgraph;
+
+  if (!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
+    return NULL;
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
+
+  PointerRNA depsgraphptr;
+  RNA_pointer_create(NULL, &RNA_Depsgraph, (ID *)PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
+  BL::Depsgraph b_depsgraph(depsgraphptr);
+
+  /* Allow Blender to execute other Python scripts. */
+  python_thread_state_save(&session->python_thread_state);
+
+  session->render(b_depsgraph);
+
+  python_thread_state_restore(&session->python_thread_state);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *render_frame_finish_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pysession;
+
+  if (!PyArg_ParseTuple(args, "O", &pysession)) {
+    return nullptr;
+  }
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
+
+  /* Allow Blender to execute other Python scripts. */
+  python_thread_state_save(&session->python_thread_state);
+
+  session->render_frame_finish();
+
+  python_thread_state_restore(&session->python_thread_state);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *py_session, *py_graph, *py_screen, *py_space_image;
+
+  if (!PyArg_ParseTuple(args, "OOOO", &py_session, &py_graph, &py_screen, &py_space_image)) {
+    return nullptr;
+  }
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(py_session);
+
+  ID *b_screen = (ID *)PyLong_AsVoidPtr(py_screen);
+
+  PointerRNA b_space_image_ptr;
+  RNA_pointer_create(b_screen,
+                     &RNA_SpaceImageEditor,
+                     pylong_as_voidptr_typesafe(py_space_image),
+                     &b_space_image_ptr);
+  BL::SpaceImageEditor b_space_image(b_space_image_ptr);
+
+  session->draw(b_space_image);
+
+  Py_RETURN_NONE;
+}
+
+/* pixel_array and result passed as pointers */
+static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pysession, *pydepsgraph, *pyobject;
+  const char *pass_type;
+  int pass_filter, width, height;
+
+  if (!PyArg_ParseTuple(args,
+                        "OOOsiii",
+                        &pysession,
+                        &pydepsgraph,
+                        &pyobject,
+                        &pass_type,
+                        &pass_filter,
+                        &width,
+                        &height))
+    return NULL;
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
+
+  PointerRNA depsgraphptr;
+  RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
+  BL::Depsgraph b_depsgraph(depsgraphptr);
+
+  PointerRNA objectptr;
+  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyobject), &objectptr);
+  BL::Object b_object(objectptr);
+
+  python_thread_state_save(&session->python_thread_state);
+
+  session->bake(b_depsgraph, b_object, pass_type, pass_filter, width, height);
+
+  python_thread_state_restore(&session->python_thread_state);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pysession, *pygraph, *pyv3d, *pyrv3d;
+
+  if (!PyArg_ParseTuple(args, "OOOO", &pysession, &pygraph, &pyv3d, &pyrv3d))
+    return NULL;
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
+
+  if (PyLong_AsVoidPtr(pyrv3d)) {
+    /* 3d view drawing */
+    int viewport[4];
+    glGetIntegerv(GL_VIEWPORT, viewport);
+
+    session->view_draw(viewport[2], viewport[3]);
+  }
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *reset_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pysession, *pydata, *pydepsgraph;
+
+  if (!PyArg_ParseTuple(args, "OOO", &pysession, &pydata, &pydepsgraph))
+    return NULL;
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
+
+  PointerRNA dataptr;
+  RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
+  BL::BlendData b_data(dataptr);
+
+  PointerRNA depsgraphptr;
+  RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
+  BL::Depsgraph b_depsgraph(depsgraphptr);
+
+  python_thread_state_save(&session->python_thread_state);
+
+  session->reset_session(b_data, b_depsgraph);
+
+  python_thread_state_restore(&session->python_thread_state);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *sync_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pysession, *pydepsgraph;
+
+  if (!PyArg_ParseTuple(args, "OO", &pysession, &pydepsgraph))
+    return NULL;
+
+  BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
+
+  PointerRNA depsgraphptr;
+  RNA_pointer_create(NULL, &RNA_Depsgraph, PyLong_AsVoidPtr(pydepsgraph), &depsgraphptr);
+  BL::Depsgraph b_depsgraph(depsgraphptr);
+
+  python_thread_state_save(&session->python_thread_state);
+
+  session->synchronize(b_depsgraph);
+
+  python_thread_state_restore(&session->python_thread_state);
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *available_devices_func(PyObject * /*self*/, PyObject *args)
+{
+  const char *type_name;
+  if (!PyArg_ParseTuple(args, "s", &type_name)) {
+    return NULL;
+  }
+
+  DeviceType type = Device::type_from_string(type_name);
+  /* "NONE" is defined by the add-on, see: `CyclesPreferences.get_device_types`. */
+  if ((type == DEVICE_NONE) && (strcmp(type_name, "NONE") != 0)) {
+    PyErr_Format(PyExc_ValueError, "Device \"%s\" not known.", type_name);
+    return NULL;
+  }
+
+  uint mask = (type == DEVICE_NONE) ? DEVICE_MASK_ALL : DEVICE_MASK(type);
+  mask |= DEVICE_MASK_CPU;
+
+  vector<DeviceInfo> devices = Device::available_devices(mask);
+  PyObject *ret = PyTuple_New(devices.size());
+
+  for (size_t i = 0; i < devices.size(); i++) {
+    DeviceInfo &device = devices[i];
+    string type_name = Device::string_from_type(device.type);
+    PyObject *device_tuple = PyTuple_New(4);
+    PyTuple_SET_ITEM(device_tuple, 0, pyunicode_from_string(device.description.c_str()));
+    PyTuple_SET_ITEM(device_tuple, 1, pyunicode_from_string(type_name.c_str()));
+    PyTuple_SET_ITEM(device_tuple, 2, pyunicode_from_string(device.id.c_str()));
+    PyTuple_SET_ITEM(device_tuple, 3, PyBool_FromLong(device.has_peer_memory));
+    PyTuple_SET_ITEM(ret, i, device_tuple);
+  }
+
+  return ret;
+}
+
+#ifdef WITH_OSL
+
+static PyObject *osl_update_node_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pydata, *pynodegroup, *pynode;
+  const char *filepath = NULL;
+
+  if (!PyArg_ParseTuple(args, "OOOs", &pydata, &pynodegroup, &pynode, &filepath))
+    return NULL;
+
+  /* RNA */
+  PointerRNA dataptr;
+  RNA_main_pointer_create((Main *)PyLong_AsVoidPtr(pydata), &dataptr);
+  BL::BlendData b_data(dataptr);
+
+  PointerRNA nodeptr;
+  RNA_pointer_create((ID *)PyLong_AsVoidPtr(pynodegroup),
+                     &RNA_ShaderNodeScript,
+                     (void *)PyLong_AsVoidPtr(pynode),
+                     &nodeptr);
+  BL::ShaderNodeScript b_node(nodeptr);
+
+  /* update bytecode hash */
+  string bytecode = b_node.bytecode();
+
+  if (!bytecode.empty()) {
+    MD5Hash md5;
+    md5.append((const uint8_t *)bytecode.c_str(), bytecode.size());
+    b_node.bytecode_hash(md5.get_hex().c_str());
+  }
+  else
+    b_node.bytecode_hash("");
+
+  /* query from file path */
+  OSL::OSLQuery query;
+
+  if (!OSLShaderManager::osl_query(query, filepath))
+    Py_RETURN_FALSE;
+
+  /* add new sockets from parameters */
+  set<void *> used_sockets;
+
+  for (int i = 0; i < query.nparams(); i++) {
+    const OSL::OSLQuery::Parameter *param = query.getparam(i);
+
+    /* skip unsupported types */
+    if (param->varlenarray || param->isstruct || param->type.arraylen > 1)
+      continue;
+
+    /* Read metadata. */
+    bool is_bool_param = false;
+    ustring param_label = param->name;
+
+    for (const OSL::OSLQuery::Parameter &metadata : param->metadata) {
+      if (metadata.type == TypeDesc::STRING) {
+        if (metadata.name == "widget") {
+          /* Boolean socket. */
+          if (metadata.sdefault[0] == "boolean" || metadata.sdefault[0] == "checkBox") {
+            is_bool_param = true;
+          }
+        }
+        else if (metadata.name == "label") {
+          /* Socket label. */
+          param_label = metadata.sdefault[0];
+        }
+      }
+    }
+    /* determine socket type */
+    string socket_type;
+    BL::NodeSocket::type_enum data_type = BL::NodeSocket::type_VALUE;
+    float4 default_float4 = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+    float default_float = 0.0f;
+    int default_int = 0;
+    string default_string = "";
+    bool default_boolean = false;
+
+    if (param->isclosure) {
+      socket_type = "NodeSocketShader";
+      data_type = BL::NodeSocket::type_SHADER;
+    }
+    else if (param->type.vecsemantics == TypeDesc::COLOR) {
+      socket_type = "NodeSocketColor";
+      data_type = BL::NodeSocket::type_RGBA;
+
+      if (param->validdefault) {
+        default_float4[0] = param->fdefault[0];
+        default_float4[1] = param->fdefault[1];
+        default_float4[2] = param->fdefault[2];
+      }
+    }
+    else if (param->type.vecsemantics == TypeDesc::POINT ||
+             param->type.vecsemantics == TypeDesc::VECTOR ||
+             param->type.vecsemantics == TypeDesc::NORMAL) {
+      socket_type = "NodeSocketVector";
+      data_type = BL::NodeSocket::type_VECTOR;
+
+      if (param->validdefault) {
+        default_float4[0] = param->fdefault[0];
+        default_float4[1] = param->fdefault[1];
+        default_float4[2] = param->fdefault[2];
+      }
+    }
+    else if (param->type.aggregate == TypeDesc::SCALAR) {
+      if (param->type.basetype == TypeDesc::INT) {
+        if (is_bool_param) {
+          socket_type = "NodeSocketBool";
+          data_type = BL::NodeSocket::type_BOOLEAN;
+          if (param->validdefault) {
+            default_boolean = (bool)param->idefault[0];
+          }
+        }
+        else {
+          socket_type = "NodeSocketInt";
+          data_type = BL::NodeSocket::type_INT;
+          if (param->validdefault)
+            default_int = param->idefault[0];
+        }
+      }
+      else if (param->type.basetype == TypeDesc::FLOAT) {
+        socket_type = "NodeSocketFloat";
+        data_type = BL::NodeSocket::type_VALUE;
+        if (param->validdefault)
+          default_float = param->fdefault[0];
+      }
+      else if (param->type.basetype == TypeDesc::STRING) {
+        socket_type = "NodeSocketString";
+        data_type = BL::NodeSocket::type_STRING;
+        if (param->validdefault)
+          default_string = param->sdefault[0].string();
+      }
+      else
+        continue;
+    }
+    else
+      continue;
+
+    /* Update existing socket. */
+    bool found_existing = false;
+    if (param->isoutput) {
+      for (BL::NodeSocket &b_sock : b_node.outputs) {
+        if (b_sock.identifier() == param->name) {
+          if (b_sock.bl_idname() != socket_type) {
+            /* Remove if type no longer matches. */
+            b_node.outputs.remove(b_data, b_sock);
+          }
+          else {
+            /* Reuse and update label. */
+            if (b_sock.name() != param_label) {
+              b_sock.name(param_label.string());
+            }
+            used_sockets.insert(b_sock.ptr.data);
+            found_existing = true;
+          }
+          break;
+        }
+      }
+    }
+    else {
+      for (BL::NodeSocket &b_sock : b_node.inputs) {
+        if (b_sock.identifier() == param->name) {
+          if (b_sock.bl_idname() != socket_type) {
+            /* Remove if type no longer matches. */
+            b_node.inputs.remove(b_data, b_sock);
+          }
+          else {
+            /* Reuse and update label. */
+            if (b_sock.name() != param_label) {
+              b_sock.name(param_label.string());
+            }
+            used_sockets.insert(b_sock.ptr.data);
+            found_existing = true;
+          }
+          break;
+        }
+      }
+    }
+
+    if (!found_existing) {
+      /* Create new socket. */
+      BL::NodeSocket b_sock = (param->isoutput) ? b_node.outputs.create(b_data,
+                                                                        socket_type.c_str(),
+                                                                        param_label.c_str(),
+                                                                        param->name.c_str()) :
+                                                  b_node.inputs.create(b_data,
+                                                                       socket_type.c_str(),
+                                                                       param_label.c_str(),
+                                                                       param->name.c_str());
+
+      /* set default value */
+      if (data_type == BL::NodeSocket::type_VALUE) {
+        set_float(b_sock.ptr, "default_value", default_float);
+      }
+      else if (data_type == BL::NodeSocket::type_INT) {
+        set_int(b_sock.ptr, "default_value", default_int);
+      }
+      else if (data_type == BL::NodeSocket::type_RGBA) {
+        set_float4(b_sock.ptr, "default_value", default_float4);
+      }
+      else if (data_type == BL::NodeSocket::type_VECTOR) {
+        set_float3(b_sock.ptr, "default_value", float4_to_float3(default_float4));
+      }
+      else if (data_type == BL::NodeSocket::type_STRING) {
+        set_string(b_sock.ptr, "default_value", default_string);
+      }
+      else if (data_type == BL::NodeSocket::type_BOOLEAN) {
+        set_boolean(b_sock.ptr, "default_value", default_boolean);
+      }
+
+      used_sockets.insert(b_sock.ptr.data);
+    }
+  }
+
+  /* remove unused parameters */
+  bool removed;
+
+  do {
+    removed = false;
+
+    for (BL::NodeSocket &b_input : b_node.inputs) {
+      if (used_sockets.find(b_input.ptr.data) == used_sockets.end()) {
+        b_node.inputs.remove(b_data, b_input);
+        removed = true;
+        break;
+      }
+    }
+
+    for (BL::NodeSocket &b_output : b_node.outputs) {
+      if (used_sockets.find(b_output.ptr.data) == used_sockets.end()) {
+        b_node.outputs.remove(b_data, b_output);
+        removed = true;
+        break;
+      }
+    }
+  } while (removed);
+
+  Py_RETURN_TRUE;
+}
+
+static PyObject *osl_compile_func(PyObject * /*self*/, PyObject *args)
+{
+  const char *inputfile = NULL, *outputfile = NULL;
+
+  if (!PyArg_ParseTuple(args, "ss", &inputfile, &outputfile))
+    return NULL;
+
+  /* return */
+  if (!OSLShaderManager::osl_compile(inputfile, outputfile))
+    Py_RETURN_FALSE;
+
+  Py_RETURN_TRUE;
+}
+#endif
+
+static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/)
+{
+  string system_info = Device::device_capabilities();
+  return pyunicode_from_string(system_info.c_str());
+}
+
+static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepaths)
+{
+  if (PyUnicode_Check(pyfilepaths)) {
+    const char *filepath = PyUnicode_AsUTF8(pyfilepaths);
+    filepaths.push_back(filepath);
+    return true;
+  }
+
+  PyObject *sequence = PySequence_Fast(pyfilepaths,
+                                       "File paths must be a string or sequence of strings");
+  if (sequence == NULL) {
+    return false;
+  }
+
+  for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
+    PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
+    const char *filepath = PyUnicode_AsUTF8(item);
+    if (filepath == NULL) {
+      PyErr_SetString(PyExc_ValueError, "File paths must be a string or sequence of strings.");
+      Py_DECREF(sequence);
+      return false;
+    }
+    filepaths.push_back(filepath);
+  }
+  Py_DECREF(sequence);
+
+  return true;
+}
+
+static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
+{
+#if 1
+  (void)args;
+  (void)keywords;
+#else
+  static const char *keyword_list[] = {
+      "preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
+  PyObject *pypreferences, *pyscene, *pyviewlayer;
+  PyObject *pyinput, *pyoutput = NULL;
+  int tile_size = 0, samples = 0;
+
+  if (!PyArg_ParseTupleAndKeywords(args,
+                                   keywords,
+                                   "OOOO|Oii",
+                                   (char **)keyword_list,
+                                   &pypreferences,
+                                   &pyscene,
+                                   &pyviewlayer,
+                                   &pyinput,
+                                   &pyoutput,
+                                   &tile_size,
+                                   &samples)) {
+    return NULL;
+  }
+
+  /* Get device specification from preferences and scene. */
+  PointerRNA preferencesptr;
+  RNA_pointer_create(
+      NULL, &RNA_Preferences, (void *)PyLong_AsVoidPtr(pypreferences), &preferencesptr);
+  BL::Preferences b_preferences(preferencesptr);
+
+  PointerRNA sceneptr;
+  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
+  BL::Scene b_scene(sceneptr);
+
+  DeviceInfo device = blender_device_info(b_preferences, b_scene, true);
+
+  /* Get denoising parameters from view layer. */
+  PointerRNA viewlayerptr;
+  RNA_pointer_create((ID *)PyLong_AsVoidPtr(pyscene),
+                     &RNA_ViewLayer,
+                     PyLong_AsVoidPtr(pyviewlayer),
+                     &viewlayerptr);
+  PointerRNA cviewlayer = RNA_pointer_get(&viewlayerptr, "cycles");
+
+  DenoiseParams params;
+  params.radius = get_int(cviewlayer, "denoising_radius");
+  params.strength = get_float(cviewlayer, "denoising_strength");
+  params.feature_strength = get_float(cviewlayer, "denoising_feature_strength");
+  params.relative_pca = get_boolean(cviewlayer, "denoising_relative_pca");
+  params.neighbor_frames = get_int(cviewlayer, "denoising_neighbor_frames");
+
+  /* Parse file paths list. */
+  vector<string> input, output;
+
+  if (!image_parse_filepaths(pyinput, input)) {
+    return NULL;
+  }
+
+  if (pyoutput) {
+    if (!image_parse_filepaths(pyoutput, output)) {
+      return NULL;
+    }
+  }
+  else {
+    output = input;
+  }
+
+  if (input.empty()) {
+    PyErr_SetString(PyExc_ValueError, "No input file paths specified.");
+    return NULL;
+  }
+  if (input.size() != output.size()) {
+    PyErr_SetString(PyExc_ValueError, "Number of input and output file paths does not match.");
+    return NULL;
+  }
+
+  /* Create denoiser. */
+  DenoiserPipeline denoiser(device);
+  denoiser.params = params;
+  denoiser.input = input;
+  denoiser.output = output;
+
+  if (tile_size > 0) {
+    denoiser.tile_size = make_int2(tile_size, tile_size);
+  }
+  if (samples > 0) {
+    denoiser.samples_override = samples;
+  }
+
+  /* Run denoiser. */
+  if (!denoiser.run()) {
+    PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
+    return NULL;
+  }
+#endif
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *merge_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
+{
+  static const char *keyword_list[] = {"input", "output", NULL};
+  PyObject *pyinput, *pyoutput = NULL;
+
+  if (!PyArg_ParseTupleAndKeywords(
+          args, keywords, "OO", (char **)keyword_list, &pyinput, &pyoutput)) {
+    return NULL;
+  }
+
+  /* Parse input list. */
+  vector<string> input;
+  if (!image_parse_filepaths(pyinput, input)) {
+    return NULL;
+  }
+
+  /* Parse output string. */
+  if (!PyUnicode_Check(pyoutput)) {
+    PyErr_SetString(PyExc_ValueError, "Output must be a string.");
+    return NULL;
+  }
+  string output = PyUnicode_AsUTF8(pyoutput);
+
+  /* Merge. */
+  ImageMerger merger;
+  merger.input = input;
+  merger.output = output;
+
+  if (!merger.run()) {
+    PyErr_SetString(PyExc_ValueError, merger.error.c_str());
+    return NULL;
+  }
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
+{
+  PyObject *pyscene;
+  if (!PyArg_ParseTuple(args, "O", &pyscene)) {
+    return NULL;
+  }
+
+  PointerRNA sceneptr;
+  RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
+  BL::Scene b_scene(sceneptr);
+
+  debug_flags_sync_from_scene(b_scene);
+
+  VLOG(2) << "Debug flags set to:\n" << DebugFlags();
+
+  debug_flags_set = true;
+
+  Py_RETURN_NONE;
+}
+
+static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/)
+{
+  debug_flags_reset();
+  if (debug_flags_set) {
+    VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
+    debug_flags_set = false;
+  }
+  Py_RETURN_NONE;
+}
+
+static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*/)
+{
+  BlenderSession::print_render_stats = true;
+  Py_RETURN_NONE;
+}
+
+static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
+{
+  vector<DeviceType> device_types = Device::available_types();
+  bool has_cuda = false, has_optix = false, has_hip = false;
+  foreach (DeviceType device_type, device_types) {
+    has_cuda |= (device_type == DEVICE_CUDA);
+    has_optix |= (device_type == DEVICE_OPTIX);
+    has_hip |= (device_type == DEVICE_HIP);
+  }
+  PyObject *list = PyTuple_New(3);
+  PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
+  PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
+  PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
+  return list;
+}
+
+static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
+{
+  PyObject *override_string = PyObject_Str(arg);
+  string override = PyUnicode_AsUTF8(override_string);
+  Py_DECREF(override_string);
+
+  bool include_cpu = false;
+  const string cpu_suffix = "+CPU";
+  if (string_endswith(override, cpu_suffix)) {
+    include_cpu = true;
+    override = override.substr(0, override.length() - cpu_suffix.length());
+  }
+
+  if (override == "CPU") {
+    BlenderSession::device_override = DEVICE_MASK_CPU;
+  }
+  else if (override == "CUDA") {
+    BlenderSession::device_override = DEVICE_MASK_CUDA;
+  }
+  else if (override == "OPTIX") {
+    BlenderSession::device_override = DEVICE_MASK_OPTIX;
+  }
+  else if (override == "HIP") {
+    BlenderSession::device_override = DEVICE_MASK_HIP;
+  }
+  else {
+    printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
+    Py_RETURN_FALSE;
+  }
+
+  if (include_cpu) {
+    BlenderSession::device_override = (DeviceTypeMask)(BlenderSession::device_override |
+                                                       DEVICE_MASK_CPU);
+  }
+
+  Py_RETURN_TRUE;
+}
+
+static PyMethodDef methods[] = {
+    {"init", init_func, METH_VARARGS, ""},
+    {"exit", exit_func, METH_VARARGS, ""},
+    {"create", create_func, METH_VARARGS, ""},
+    {"free", free_func, METH_O, ""},
+    {"render", render_func, METH_VARARGS, ""},
+    {"render_frame_finish", render_frame_finish_func, METH_VARARGS, ""},
+    {"draw", draw_func, METH_VARARGS, ""},
+    {"bake", bake_func, METH_VARARGS, ""},
+    {"view_draw", view_draw_func, METH_VARARGS, ""},
+    {"sync", sync_func, METH_VARARGS, ""},
+    {"reset", reset_func, METH_VARARGS, ""},
+#ifdef WITH_OSL
+    {"osl_update_node", osl_update_node_func, METH_VARARGS, ""},
+    {"osl_compile", osl_compile_func, METH_VARARGS, ""},
+#endif
+    {"available_devices", available_devices_func, METH_VARARGS, ""},
+    {"system_info", system_info_func, METH_NOARGS, ""},
+
+    /* Standalone denoising */
+    {"denoise", (PyCFunction)denoise_func, METH_VARARGS | METH_KEYWORDS, ""},
+    {"merge", (PyCFunction)merge_func, METH_VARARGS | METH_KEYWORDS, ""},
+
+    /* Debugging routines */
+    {"debug_flags_update", debug_flags_update_func, METH_VARARGS, ""},
+    {"debug_flags_reset", debug_flags_reset_func, METH_NOARGS, ""},
+
+    /* Statistics. */
+    {"enable_print_stats", enable_print_stats_func, METH_NOARGS, ""},
+
+    /* Compute Device selection */
+    {"get_device_types", get_device_types_func, METH_VARARGS, ""},
+    {"set_device_override", set_device_override_func, METH_O, ""},
+
+    {NULL, NULL, 0, NULL},
+};
+
+static struct PyModuleDef module = {
+    PyModuleDef_HEAD_INIT,
+    "_cycles",
+    "Blender cycles render integration",
+    -1,
+    methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+};
+
+CCL_NAMESPACE_END
+
+void *CCL_python_module_init()
+{
+  PyObject *mod = PyModule_Create(&ccl::module);
+
+#ifdef WITH_OSL
+  /* TODO(sergey): This gives us library we've been linking against.
+   *               In theory with dynamic OSL library it might not be
+   *               accurate, but there's nothing in OSL API which we
+   *               might use to get version in runtime.
+   */
+  int curversion = OSL_LIBRARY_VERSION_CODE;
+  PyModule_AddObject(mod, "with_osl", Py_True);
+  Py_INCREF(Py_True);
+  PyModule_AddObject(
+      mod,
+      "osl_version",
+      Py_BuildValue("(iii)", curversion / 10000, (curversion / 100) % 100, curversion % 100));
+  PyModule_AddObject(
+      mod,
+      "osl_version_string",
+      PyUnicode_FromFormat(
+          "%2d, %2d, %2d", curversion / 10000, (curversion / 100) % 100, curversion % 100));
+#else
+  PyModule_AddObject(mod, "with_osl", Py_False);
+  Py_INCREF(Py_False);
+  PyModule_AddStringConstant(mod, "osl_version", "unknown");
+  PyModule_AddStringConstant(mod, "osl_version_string", "unknown");
+#endif
+
+#ifdef WITH_EMBREE
+  PyModule_AddObject(mod, "with_embree", Py_True);
+  Py_INCREF(Py_True);
+#else  /* WITH_EMBREE */
+  PyModule_AddObject(mod, "with_embree", Py_False);
+  Py_INCREF(Py_False);
+#endif /* WITH_EMBREE */
+
+  if (ccl::openimagedenoise_supported()) {
+    PyModule_AddObject(mod, "with_openimagedenoise", Py_True);
+    Py_INCREF(Py_True);
+  }
+  else {
+    PyModule_AddObject(mod, "with_openimagedenoise", Py_False);
+    Py_INCREF(Py_False);
+  }
+
+  return (void *)mod;
+}
diff --git a/intern/cycles/blender/session.cpp b/intern/cycles/blender/session.cpp
new file mode 100644
index 00000000000..d9a2d3d3029
--- /dev/null
+++ b/intern/cycles/blender/session.cpp
@@ -0,0 +1,1003 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include "device/device.h"
+#include "scene/background.h"
+#include "scene/camera.h"
+#include "scene/colorspace.h"
+#include "scene/film.h"
+#include "scene/integrator.h"
+#include "scene/light.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/scene.h"
+#include "scene/shader.h"
+#include "scene/stats.h"
+#include "session/buffers.h"
+#include "session/session.h"
+
+#include "util/algorithm.h"
+#include "util/color.h"
+#include "util/foreach.h"
+#include "util/function.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/murmurhash.h"
+#include "util/path.h"
+#include "util/progress.h"
+#include "util/time.h"
+
+#include "blender/display_driver.h"
+#include "blender/output_driver.h"
+#include "blender/session.h"
+#include "blender/sync.h"
+#include "blender/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+DeviceTypeMask BlenderSession::device_override = DEVICE_MASK_ALL;
+bool BlenderSession::headless = false;
+bool BlenderSession::print_render_stats = false;
+
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
+                               BL::Preferences &b_userpref,
+                               BL::BlendData &b_data,
+                               bool preview_osl)
+    : session(NULL),
+      scene(NULL),
+      sync(NULL),
+      b_engine(b_engine),
+      b_userpref(b_userpref),
+      b_data(b_data),
+      b_render(b_engine.render()),
+      b_depsgraph(PointerRNA_NULL),
+      b_scene(PointerRNA_NULL),
+      b_v3d(PointerRNA_NULL),
+      b_rv3d(PointerRNA_NULL),
+      width(0),
+      height(0),
+      preview_osl(preview_osl),
+      python_thread_state(NULL),
+      use_developer_ui(false)
+{
+  /* offline render */
+  background = true;
+  last_redraw_time = 0.0;
+  start_resize_time = 0.0;
+  last_status_time = 0.0;
+}
+
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
+                               BL::Preferences &b_userpref,
+                               BL::BlendData &b_data,
+                               BL::SpaceView3D &b_v3d,
+                               BL::RegionView3D &b_rv3d,
+                               int width,
+                               int height)
+    : session(NULL),
+      scene(NULL),
+      sync(NULL),
+      b_engine(b_engine),
+      b_userpref(b_userpref),
+      b_data(b_data),
+      b_render(b_engine.render()),
+      b_depsgraph(PointerRNA_NULL),
+      b_scene(PointerRNA_NULL),
+      b_v3d(b_v3d),
+      b_rv3d(b_rv3d),
+      width(width),
+      height(height),
+      preview_osl(false),
+      python_thread_state(NULL),
+      use_developer_ui(b_userpref.experimental().use_cycles_debug() &&
+                       b_userpref.view().show_developer_ui())
+{
+  /* 3d view render */
+  background = false;
+  last_redraw_time = 0.0;
+  start_resize_time = 0.0;
+  last_status_time = 0.0;
+}
+
+BlenderSession::~BlenderSession()
+{
+  free_session();
+}
+
+void BlenderSession::create_session()
+{
+  const SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+  /* reset status/progress */
+  last_status = "";
+  last_error = "";
+  last_progress = -1.0f;
+  start_resize_time = 0.0;
+
+  /* create session */
+  session = new Session(session_params, scene_params);
+  session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this));
+  session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this));
+  session->set_pause(session_pause);
+
+  /* create scene */
+  scene = session->scene;
+  scene->name = b_scene.name();
+
+  /* create sync */
+  sync = new BlenderSync(
+      b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
+  BL::Object b_camera_override(b_engine.camera_override());
+  if (b_v3d) {
+    sync->sync_view(b_v3d, b_rv3d, width, height);
+  }
+  else {
+    sync->sync_camera(b_render, b_camera_override, width, height, "");
+  }
+
+  /* set buffer parameters */
+  const BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_v3d, b_rv3d, scene->camera, width, height);
+  session->reset(session_params, buffer_params);
+
+  /* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
+   * engine that no tracking of the tiles state is needed.
+   * The offline rendering will make a decision when tile is being written. The penalty of asking
+   * the engine to keep track of tiles state is minimal, so there is nothing to worry about here
+   * about possible single-tiled final render. */
+  if (!b_engine.is_preview() && !b_v3d) {
+    b_engine.use_highlight_tiles(true);
+  }
+}
+
+void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
+{
+  /* Update data, scene and depsgraph pointers. These can change after undo. */
+  this->b_data = b_data;
+  this->b_depsgraph = b_depsgraph;
+  this->b_scene = b_depsgraph.scene_eval();
+  if (sync) {
+    sync->reset(this->b_data, this->b_scene);
+  }
+
+  if (preview_osl) {
+    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+    RNA_boolean_set(&cscene, "shading_system", preview_osl);
+  }
+
+  if (b_v3d) {
+    this->b_render = b_scene.render();
+  }
+  else {
+    this->b_render = b_engine.render();
+    width = render_resolution_x(b_render);
+    height = render_resolution_y(b_render);
+  }
+
+  bool is_new_session = (session == NULL);
+  if (is_new_session) {
+    /* Initialize session and remember it was just created so not to
+     * re-create it below.
+     */
+    create_session();
+  }
+
+  if (b_v3d) {
+    /* NOTE: We need to create session, but all the code from below
+     * will make viewport render to stuck on initialization.
+     */
+    return;
+  }
+
+  const SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+
+  if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
+      !this->b_render.use_persistent_data()) {
+    /* if scene or session parameters changed, it's easier to simply re-create
+     * them rather than trying to distinguish which settings need to be updated
+     */
+    if (!is_new_session) {
+      free_session();
+      create_session();
+    }
+    return;
+  }
+
+  session->progress.reset();
+
+  /* peak memory usage should show current render peak, not peak for all renders
+   * made by this render session
+   */
+  session->stats.mem_peak = session->stats.mem_used;
+
+  if (is_new_session) {
+    /* Sync object should be re-created for new scene. */
+    delete sync;
+    sync = new BlenderSync(
+        b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
+  }
+  else {
+    /* Sync recalculations to do just the required updates. */
+    sync->sync_recalc(b_depsgraph, b_v3d);
+  }
+
+  BL::Object b_camera_override(b_engine.camera_override());
+  sync->sync_camera(b_render, b_camera_override, width, height, "");
+
+  BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
+  BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
+  const BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+  session->reset(session_params, buffer_params);
+
+  /* reset time */
+  start_resize_time = 0.0;
+
+  {
+    thread_scoped_lock lock(draw_state_.mutex);
+    draw_state_.last_pass_index = -1;
+  }
+}
+
+void BlenderSession::free_session()
+{
+  if (session) {
+    session->cancel(true);
+  }
+
+  delete sync;
+  sync = nullptr;
+
+  delete session;
+  session = nullptr;
+
+  display_driver_ = nullptr;
+}
+
+void BlenderSession::full_buffer_written(string_view filename)
+{
+  full_buffer_files_.emplace_back(filename);
+}
+
+static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
+{
+  string identifier = string_printf("%08x", util_murmur_hash3(name.c_str(), name.length(), 0));
+  string prefix = "cryptomatte/" + identifier.substr(0, 7) + "/";
+
+  render_add_metadata(b_rr, prefix + "name", name);
+  render_add_metadata(b_rr, prefix + "hash", "MurmurHash3_32");
+  render_add_metadata(b_rr, prefix + "conversion", "uint32_to_float32");
+  render_add_metadata(b_rr, prefix + "manifest", manifest);
+}
+
+void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_layer_name)
+{
+  BL::RenderResult b_rr = b_engine.get_result();
+  string prefix = "cycles." + view_layer_name + ".";
+
+  /* Configured number of samples for the view layer. */
+  b_rr.stamp_data_add_field((prefix + "samples").c_str(),
+                            to_string(session->params.samples).c_str());
+
+  /* Store ranged samples information. */
+  /* TODO(sergey): Need to bring this information back. */
+#if 0
+  if (session->tile_manager.range_num_samples != -1) {
+    b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
+                              to_string(session->tile_manager.range_start_sample).c_str());
+    b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
+                              to_string(session->tile_manager.range_num_samples).c_str());
+  }
+#endif
+
+  /* Write cryptomatte metadata. */
+  if (scene->film->get_cryptomatte_passes() & CRYPT_OBJECT) {
+    add_cryptomatte_layer(b_rr,
+                          view_layer_name + ".CryptoObject",
+                          scene->object_manager->get_cryptomatte_objects(scene));
+  }
+  if (scene->film->get_cryptomatte_passes() & CRYPT_MATERIAL) {
+    add_cryptomatte_layer(b_rr,
+                          view_layer_name + ".CryptoMaterial",
+                          scene->shader_manager->get_cryptomatte_materials(scene));
+  }
+  if (scene->film->get_cryptomatte_passes() & CRYPT_ASSET) {
+    add_cryptomatte_layer(b_rr,
+                          view_layer_name + ".CryptoAsset",
+                          scene->object_manager->get_cryptomatte_assets(scene));
+  }
+
+  /* Store synchronization and bare-render times. */
+  double total_time, render_time;
+  session->progress.get_time(total_time, render_time);
+  b_rr.stamp_data_add_field((prefix + "total_time").c_str(),
+                            time_human_readable_from_seconds(total_time).c_str());
+  b_rr.stamp_data_add_field((prefix + "render_time").c_str(),
+                            time_human_readable_from_seconds(render_time).c_str());
+  b_rr.stamp_data_add_field((prefix + "synchronization_time").c_str(),
+                            time_human_readable_from_seconds(total_time - render_time).c_str());
+}
+
+void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
+{
+  b_depsgraph = b_depsgraph_;
+
+  if (session->progress.get_cancel()) {
+    update_status_progress();
+    return;
+  }
+
+  /* Create driver to write out render results. */
+  ensure_display_driver_if_needed();
+  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+
+  session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
+
+  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
+  /* get buffer parameters */
+  const SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_v3d, b_rv3d, scene->camera, width, height);
+
+  /* temporary render result to find needed passes and views */
+  BL::RenderResult b_rr = b_engine.begin_result(0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  {
+    thread_scoped_lock lock(draw_state_.mutex);
+    b_rlay_name = b_view_layer.name();
+
+    /* Signal that the display pass is to be updated. */
+    draw_state_.last_pass_index = -1;
+  }
+
+  /* Compute render passes and film settings. */
+  sync->sync_render_passes(b_rlay, b_view_layer);
+
+  BL::RenderResult::views_iterator b_view_iter;
+
+  int num_views = 0;
+  for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter) {
+    num_views++;
+  }
+
+  int view_index = 0;
+  for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end();
+       ++b_view_iter, ++view_index) {
+    b_rview_name = b_view_iter->name();
+
+    buffer_params.layer = b_view_layer.name();
+    buffer_params.view = b_rview_name;
+
+    /* set the current view */
+    b_engine.active_view_set(b_rview_name.c_str());
+
+    /* update scene */
+    BL::Object b_camera_override(b_engine.camera_override());
+    sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
+    sync->sync_data(
+        b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+    builtin_images_load();
+
+    /* Attempt to free all data which is held by Blender side, since at this
+     * point we know that we've got everything to render current view layer.
+     */
+    /* At the moment we only free if we are not doing multi-view
+     * (or if we are rendering the last view). See T58142/D4239 for discussion.
+     */
+    if (view_index == num_views - 1) {
+      free_blender_memory_if_possible();
+    }
+
+    /* Make sure all views have different noise patterns. - hardcoded value just to make it random
+     */
+    if (view_index != 0) {
+      int seed = scene->integrator->get_seed();
+      seed += hash_uint2(seed, hash_uint2(view_index * 0xdeadbeef, 0));
+      scene->integrator->set_seed(seed);
+    }
+
+    /* Update number of samples per layer. */
+    const int samples = sync->get_layer_samples();
+    const bool bound_samples = sync->get_layer_bound_samples();
+
+    SessionParams effective_session_params = session_params;
+    if (samples != 0 && (!bound_samples || (samples < session_params.samples))) {
+      effective_session_params.samples = samples;
+    }
+
+    /* Update session itself. */
+    session->reset(effective_session_params, buffer_params);
+
+    /* render */
+    if (!b_engine.is_preview() && background && print_render_stats) {
+      scene->enable_update_stats();
+    }
+
+    session->start();
+    session->wait();
+
+    if (!b_engine.is_preview() && background && print_render_stats) {
+      RenderStats stats;
+      session->collect_statistics(&stats);
+      printf("Render statistics:\n%s\n", stats.full_report().c_str());
+    }
+
+    if (session->progress.get_cancel())
+      break;
+  }
+
+  /* add metadata */
+  stamp_view_layer_metadata(scene, b_rlay_name);
+
+  /* free result without merging */
+  b_engine.end_result(b_rr, true, false, false);
+
+  /* When tiled rendering is used there will be no "write" done for the tile. Forcefully clear
+   * highlighted tiles now, so that the highlight will be removed while processing full frame from
+   * file. */
+  b_engine.tile_highlight_clear_all();
+
+  double total_time, render_time;
+  session->progress.get_time(total_time, render_time);
+  VLOG(1) << "Total render time: " << total_time;
+  VLOG(1) << "Render time (without synchronization): " << render_time;
+}
+
+void BlenderSession::render_frame_finish()
+{
+  /* Processing of all layers and views is done. Clear the strings so that we can communicate
+   * progress about reading files and denoising them. */
+  b_rlay_name = "";
+  b_rview_name = "";
+
+  if (!b_render.use_persistent_data()) {
+    /* Free the sync object so that it can properly dereference nodes from the scene graph before
+     * the graph is freed. */
+    delete sync;
+    sync = nullptr;
+
+    session->device_free();
+  }
+
+  for (string_view filename : full_buffer_files_) {
+    session->process_full_buffer_from_disk(filename);
+    if (check_and_report_session_error()) {
+      break;
+    }
+  }
+
+  for (string_view filename : full_buffer_files_) {
+    path_remove(filename);
+  }
+
+  /* Clear driver. */
+  session->set_output_driver(nullptr);
+  session->full_buffer_written_cb = function_null;
+
+  /* All the files are handled.
+   * Clear the list so that this session can be re-used by Persistent Data. */
+  full_buffer_files_.clear();
+}
+
+static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
+{
+  const char *bake_type = bake_type_str.c_str();
+
+  /* data passes */
+  if (strcmp(bake_type, "POSITION") == 0) {
+    return PASS_POSITION;
+  }
+  else if (strcmp(bake_type, "NORMAL") == 0) {
+    return PASS_NORMAL;
+  }
+  else if (strcmp(bake_type, "UV") == 0) {
+    return PASS_UV;
+  }
+  else if (strcmp(bake_type, "ROUGHNESS") == 0) {
+    return PASS_ROUGHNESS;
+  }
+  else if (strcmp(bake_type, "EMIT") == 0) {
+    return PASS_EMISSION;
+  }
+  /* light passes */
+  else if (strcmp(bake_type, "AO") == 0) {
+    return PASS_AO;
+  }
+  else if (strcmp(bake_type, "COMBINED") == 0) {
+    return PASS_COMBINED;
+  }
+  else if (strcmp(bake_type, "SHADOW") == 0) {
+    return PASS_SHADOW;
+  }
+  else if (strcmp(bake_type, "DIFFUSE") == 0) {
+    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
+        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
+      return PASS_DIFFUSE;
+    }
+    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
+      return PASS_DIFFUSE_DIRECT;
+    }
+    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
+      return PASS_DIFFUSE_INDIRECT;
+    }
+    else {
+      return PASS_DIFFUSE_COLOR;
+    }
+  }
+  else if (strcmp(bake_type, "GLOSSY") == 0) {
+    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
+        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
+      return PASS_GLOSSY;
+    }
+    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
+      return PASS_GLOSSY_DIRECT;
+    }
+    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
+      return PASS_GLOSSY_INDIRECT;
+    }
+    else {
+      return PASS_GLOSSY_COLOR;
+    }
+  }
+  else if (strcmp(bake_type, "TRANSMISSION") == 0) {
+    if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
+        bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
+      return PASS_TRANSMISSION;
+    }
+    else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
+      return PASS_TRANSMISSION_DIRECT;
+    }
+    else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
+      return PASS_TRANSMISSION_INDIRECT;
+    }
+    else {
+      return PASS_TRANSMISSION_COLOR;
+    }
+  }
+  /* extra */
+  else if (strcmp(bake_type, "ENVIRONMENT") == 0) {
+    return PASS_BACKGROUND;
+  }
+
+  return PASS_COMBINED;
+}
+
+void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
+                          BL::Object &b_object,
+                          const string &bake_type,
+                          const int bake_filter,
+                          const int bake_width,
+                          const int bake_height)
+{
+  b_depsgraph = b_depsgraph_;
+
+  /* Initialize bake manager, before we load the baking kernels. */
+  scene->bake_manager->set(scene, b_object.name());
+
+  /* Add render pass that we want to bake, and name it Combined so that it is
+   * used as that on the Blender side. */
+  Pass *pass = scene->create_node<Pass>();
+  pass->set_name(ustring("Combined"));
+  pass->set_type(bake_type_to_pass(bake_type, bake_filter));
+  pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
+
+  session->set_display_driver(nullptr);
+  session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
+
+  if (!session->progress.get_cancel()) {
+    /* Sync scene. */
+    BL::Object b_camera_override(b_engine.camera_override());
+    sync->sync_camera(b_render, b_camera_override, width, height, "");
+    sync->sync_data(
+        b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+    builtin_images_load();
+  }
+
+  /* Object might have been disabled for rendering or excluded in some
+   * other way, in that case Blender will report a warning afterwards. */
+  bool object_found = false;
+  foreach (Object *ob, scene->objects) {
+    if (ob->name == b_object.name()) {
+      object_found = true;
+      break;
+    }
+  }
+
+  if (object_found && !session->progress.get_cancel()) {
+    /* Get session and buffer parameters. */
+    const SessionParams session_params = BlenderSync::get_session_params(
+        b_engine, b_userpref, b_scene, background);
+
+    BufferParams buffer_params;
+    buffer_params.width = bake_width;
+    buffer_params.height = bake_height;
+
+    /* Update session. */
+    session->reset(session_params, buffer_params);
+
+    session->progress.set_update_callback(
+        function_bind(&BlenderSession::update_bake_progress, this));
+  }
+
+  /* Perform bake. Check cancel to avoid crash with incomplete scene data. */
+  if (object_found && !session->progress.get_cancel()) {
+    session->start();
+    session->wait();
+  }
+
+  session->set_output_driver(nullptr);
+}
+
+void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
+{
+  /* only used for viewport render */
+  if (!b_v3d)
+    return;
+
+  /* on session/scene parameter changes, we recreate session entirely */
+  const SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+  if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
+    free_session();
+    create_session();
+  }
+
+  ensure_display_driver_if_needed();
+
+  /* increase samples and render time, but never decrease */
+  session->set_samples(session_params.samples);
+  session->set_time_limit(session_params.time_limit);
+  session->set_pause(session_pause);
+
+  /* copy recalc flags, outside of mutex so we can decide to do the real
+   * synchronization at a later time to not block on running updates */
+  sync->sync_recalc(b_depsgraph_, b_v3d);
+
+  /* don't do synchronization if on pause */
+  if (session_pause) {
+    tag_update();
+    return;
+  }
+
+  /* try to acquire mutex. if we don't want to or can't, come back later */
+  if (!session->ready_to_reset() || !session->scene->mutex.try_lock()) {
+    tag_update();
+    return;
+  }
+
+  /* data and camera synchronize */
+  b_depsgraph = b_depsgraph_;
+
+  BL::Object b_camera_override(b_engine.camera_override());
+  sync->sync_data(
+      b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+
+  if (b_rv3d)
+    sync->sync_view(b_v3d, b_rv3d, width, height);
+  else
+    sync->sync_camera(b_render, b_camera_override, width, height, "");
+
+  /* get buffer parameters */
+  const BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_v3d, b_rv3d, scene->camera, width, height);
+
+  /* reset if needed */
+  if (scene->need_reset()) {
+    session->reset(session_params, buffer_params);
+
+    /* After session reset, so device is not accessing image data anymore. */
+    builtin_images_load();
+
+    /* reset time */
+    start_resize_time = 0.0;
+  }
+
+  /* unlock */
+  session->scene->mutex.unlock();
+
+  /* Start rendering thread, if it's not running already. Do this
+   * after all scene data has been synced at least once. */
+  session->start();
+}
+
+void BlenderSession::draw(BL::SpaceImageEditor &space_image)
+{
+  if (!session || !session->scene) {
+    /* Offline render drawing does not force the render engine update, which means it's possible
+     * that the Session is not created yet. */
+    return;
+  }
+
+  thread_scoped_lock lock(draw_state_.mutex);
+
+  const int pass_index = space_image.image_user().multilayer_pass();
+  if (pass_index != draw_state_.last_pass_index) {
+    BL::RenderPass b_display_pass(b_engine.pass_by_index_get(b_rlay_name.c_str(), pass_index));
+    if (!b_display_pass) {
+      return;
+    }
+
+    Scene *scene = session->scene;
+
+    thread_scoped_lock lock(scene->mutex);
+
+    const Pass *pass = Pass::find(scene->passes, b_display_pass.name());
+    if (!pass) {
+      return;
+    }
+
+    scene->film->set_display_pass(pass->get_type());
+
+    draw_state_.last_pass_index = pass_index;
+  }
+
+  if (display_driver_) {
+    BL::Array<float, 2> zoom = space_image.zoom();
+    display_driver_->set_zoom(zoom[0], zoom[1]);
+  }
+
+  session->draw();
+}
+
+void BlenderSession::view_draw(int w, int h)
+{
+  /* pause in redraw in case update is not being called due to final render */
+  session->set_pause(BlenderSync::get_session_pause(b_scene, background));
+
+  /* before drawing, we verify camera and viewport size changes, because
+   * we do not get update callbacks for those, we must detect them here */
+  if (session->ready_to_reset()) {
+    bool reset = false;
+
+    /* if dimensions changed, reset */
+    if (width != w || height != h) {
+      if (start_resize_time == 0.0) {
+        /* don't react immediately to resizes to avoid flickery resizing
+         * of the viewport, and some window managers changing the window
+         * size temporarily on unminimize */
+        start_resize_time = time_dt();
+        tag_redraw();
+      }
+      else if (time_dt() - start_resize_time < 0.2) {
+        tag_redraw();
+      }
+      else {
+        width = w;
+        height = h;
+        reset = true;
+      }
+    }
+
+    /* try to acquire mutex. if we can't, come back later */
+    if (!session->scene->mutex.try_lock()) {
+      tag_update();
+    }
+    else {
+      /* update camera from 3d view */
+
+      sync->sync_view(b_v3d, b_rv3d, width, height);
+
+      if (scene->camera->is_modified())
+        reset = true;
+
+      session->scene->mutex.unlock();
+    }
+
+    /* reset if requested */
+    if (reset) {
+      const SessionParams session_params = BlenderSync::get_session_params(
+          b_engine, b_userpref, b_scene, background);
+      const BufferParams buffer_params = BlenderSync::get_buffer_params(
+          b_v3d, b_rv3d, scene->camera, width, height);
+      const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+      if (session_pause == false) {
+        session->reset(session_params, buffer_params);
+        start_resize_time = 0.0;
+      }
+    }
+  }
+  else {
+    tag_update();
+  }
+
+  /* update status and progress for 3d view draw */
+  update_status_progress();
+
+  /* draw */
+  session->draw();
+}
+
+void BlenderSession::get_status(string &status, string &substatus)
+{
+  session->progress.get_status(status, substatus);
+}
+
+void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
+{
+  session->progress.get_time(total_time, render_time);
+  progress = session->progress.get_progress();
+}
+
+void BlenderSession::update_bake_progress()
+{
+  float progress = session->progress.get_progress();
+
+  if (progress != last_progress) {
+    b_engine.update_progress(progress);
+    last_progress = progress;
+  }
+}
+
+void BlenderSession::update_status_progress()
+{
+  string timestatus, status, substatus;
+  string scene_status = "";
+  float progress;
+  double total_time, remaining_time = 0, render_time;
+  float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
+  float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
+
+  get_status(status, substatus);
+  get_progress(progress, total_time, render_time);
+
+  if (progress > 0) {
+    remaining_time = session->get_estimated_remaining_time();
+  }
+
+  if (background) {
+    if (scene)
+      scene_status += " | " + scene->name;
+    if (b_rlay_name != "")
+      scene_status += ", " + b_rlay_name;
+
+    if (b_rview_name != "")
+      scene_status += ", " + b_rview_name;
+
+    if (remaining_time > 0) {
+      timestatus += "Remaining:" + time_human_readable_from_seconds(remaining_time) + " | ";
+    }
+
+    timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", (double)mem_used, (double)mem_peak);
+
+    if (status.size() > 0)
+      status = " | " + status;
+    if (substatus.size() > 0)
+      status += " | " + substatus;
+  }
+
+  double current_time = time_dt();
+  /* When rendering in a window, redraw the status at least once per second to keep the elapsed
+   * and remaining time up-to-date. For headless rendering, only report when something
+   * significant changes to keep the console output readable. */
+  if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
+    b_engine.update_stats("", (timestatus + scene_status + status).c_str());
+    b_engine.update_memory_stats(mem_used, mem_peak);
+    last_status = status;
+    last_status_time = current_time;
+  }
+  if (progress != last_progress) {
+    b_engine.update_progress(progress);
+    last_progress = progress;
+  }
+
+  check_and_report_session_error();
+}
+
+bool BlenderSession::check_and_report_session_error()
+{
+  if (!session->progress.get_error()) {
+    return false;
+  }
+
+  const string error = session->progress.get_error_message();
+  if (error != last_error) {
+    /* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
+     * Would be nice to have this figured out.
+     *
+     * For until then, 1 << 5 means RPT_ERROR. */
+    b_engine.report(1 << 5, error.c_str());
+    b_engine.error_set(error.c_str());
+    last_error = error;
+  }
+
+  return true;
+}
+
+void BlenderSession::tag_update()
+{
+  /* tell blender that we want to get another update callback */
+  b_engine.tag_update();
+}
+
+void BlenderSession::tag_redraw()
+{
+  if (background) {
+    /* update stats and progress, only for background here because
+     * in 3d view we do it in draw for thread safety reasons */
+    update_status_progress();
+
+    /* offline render, redraw if timeout passed */
+    if (time_dt() - last_redraw_time > 1.0) {
+      b_engine.tag_redraw();
+      last_redraw_time = time_dt();
+    }
+  }
+  else {
+    /* tell blender that we want to redraw */
+    b_engine.tag_redraw();
+  }
+}
+
+void BlenderSession::test_cancel()
+{
+  /* test if we need to cancel rendering */
+  if (background)
+    if (b_engine.test_break())
+      session->progress.set_cancel("Cancelled");
+}
+
+void BlenderSession::free_blender_memory_if_possible()
+{
+  if (!background) {
+    /* During interactive render we can not free anything: attempts to save
+     * memory would cause things to be allocated and evaluated for every
+     * updated sample.
+     */
+    return;
+  }
+  b_engine.free_blender_memory();
+}
+
+void BlenderSession::ensure_display_driver_if_needed()
+{
+  if (display_driver_) {
+    /* Driver is already created. */
+    return;
+  }
+
+  if (headless) {
+    /* No display needed for headless. */
+    return;
+  }
+
+  if (b_engine.is_preview()) {
+    /* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
+    return;
+  }
+
+  unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
+                                                                                      b_scene);
+  display_driver_ = display_driver.get();
+  session->set_display_driver(move(display_driver));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/session.h b/intern/cycles/blender/session.h
new file mode 100644
index 00000000000..fa24b5f7467
--- /dev/null
+++ b/intern/cycles/blender/session.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_SESSION_H__
+#define __BLENDER_SESSION_H__
+
+#include "MEM_guardedalloc.h"
+
+#include "RNA_blender_cpp.h"
+
+#include "device/device.h"
+
+#include "scene/bake.h"
+#include "scene/scene.h"
+#include "session/session.h"
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderDisplayDriver;
+class BlenderSync;
+class ImageMetaData;
+class Scene;
+class Session;
+
+class BlenderSession {
+ public:
+  BlenderSession(BL::RenderEngine &b_engine,
+                 BL::Preferences &b_userpref,
+                 BL::BlendData &b_data,
+                 bool preview_osl);
+
+  BlenderSession(BL::RenderEngine &b_engine,
+                 BL::Preferences &b_userpref,
+                 BL::BlendData &b_data,
+                 BL::SpaceView3D &b_v3d,
+                 BL::RegionView3D &b_rv3d,
+                 int width,
+                 int height);
+
+  ~BlenderSession();
+
+  /* session */
+  void create_session();
+  void free_session();
+
+  void reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph);
+
+  /* offline render */
+  void render(BL::Depsgraph &b_depsgraph);
+
+  void render_frame_finish();
+
+  void bake(BL::Depsgraph &b_depsgrah,
+            BL::Object &b_object,
+            const string &pass_type,
+            const int custom_flag,
+            const int bake_width,
+            const int bake_height);
+
+  void full_buffer_written(string_view filename);
+  /* interactive updates */
+  void synchronize(BL::Depsgraph &b_depsgraph);
+
+  /* drawing */
+  void draw(BL::SpaceImageEditor &space_image);
+  void view_draw(int w, int h);
+  void tag_redraw();
+  void tag_update();
+  void get_status(string &status, string &substatus);
+  void get_progress(float &progress, double &total_time, double &render_time);
+  void test_cancel();
+  void update_status_progress();
+  void update_bake_progress();
+
+  bool background;
+  Session *session;
+  Scene *scene;
+  BlenderSync *sync;
+  double last_redraw_time;
+
+  BL::RenderEngine b_engine;
+  BL::Preferences b_userpref;
+  BL::BlendData b_data;
+  BL::RenderSettings b_render;
+  BL::Depsgraph b_depsgraph;
+  /* NOTE: Blender's scene might become invalid after call
+   * #free_blender_memory_if_possible(). */
+  BL::Scene b_scene;
+  BL::SpaceView3D b_v3d;
+  BL::RegionView3D b_rv3d;
+  string b_rlay_name;
+  string b_rview_name;
+
+  string last_status;
+  string last_error;
+  float last_progress;
+  double last_status_time;
+
+  int width, height;
+  bool preview_osl;
+  double start_resize_time;
+
+  void *python_thread_state;
+
+  bool use_developer_ui;
+
+  /* Global state which is common for all render sessions created from Blender.
+   * Usually denotes command line arguments.
+   */
+  static DeviceTypeMask device_override;
+
+  /* Blender is running from the command line, no windows are shown and some
+   * extra render optimization is possible (possible to free draw-only data and
+   * so on.
+   */
+  static bool headless;
+
+  static bool print_render_stats;
+
+ protected:
+  void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
+
+  /* Check whether session error happened.
+   * If so, it is reported to the render engine and true is returned.
+   * Otherwise false is returned. */
+  bool check_and_report_session_error();
+
+  void builtin_images_load();
+
+  /* Is used after each render layer synchronization is done with the goal
+   * of freeing render engine data which is held from Blender side (for
+   * example, dependency graph).
+   */
+  void free_blender_memory_if_possible();
+
+  void ensure_display_driver_if_needed();
+
+  struct {
+    thread_mutex mutex;
+    int last_pass_index = -1;
+  } draw_state_;
+
+  /* NOTE: The BlenderSession references the display driver. */
+  BlenderDisplayDriver *display_driver_ = nullptr;
+
+  vector<string> full_buffer_files_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_SESSION_H__ */
diff --git a/intern/cycles/blender/shader.cpp b/intern/cycles/blender/shader.cpp
new file mode 100644
index 00000000000..0cd9052b47a
--- /dev/null
+++ b/intern/cycles/blender/shader.cpp
@@ -0,0 +1,1589 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/shader.h"
+#include "scene/background.h"
+#include "scene/colorspace.h"
+#include "scene/integrator.h"
+#include "scene/light.h"
+#include "scene/osl.h"
+#include "scene/scene.h"
+#include "scene/shader_graph.h"
+#include "scene/shader_nodes.h"
+
+#include "blender/image.h"
+#include "blender/sync.h"
+#include "blender/texture.h"
+#include "blender/util.h"
+
+#include "util/debug.h"
+#include "util/foreach.h"
+#include "util/set.h"
+#include "util/string.h"
+#include "util/task.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef map<void *, ShaderInput *> PtrInputMap;
+typedef map<void *, ShaderOutput *> PtrOutputMap;
+typedef map<string, ConvertNode *> ProxyMap;
+
+/* Find */
+
+void BlenderSync::find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader)
+{
+  Shader *shader = (id) ? shader_map.find(id) : default_shader;
+
+  used_shaders.push_back_slow(shader);
+  shader->tag_used(scene);
+}
+
+/* RNA translation utilities */
+
+static VolumeSampling get_volume_sampling(PointerRNA &ptr)
+{
+  return (VolumeSampling)get_enum(
+      ptr, "volume_sampling", VOLUME_NUM_SAMPLING, VOLUME_SAMPLING_DISTANCE);
+}
+
+static VolumeInterpolation get_volume_interpolation(PointerRNA &ptr)
+{
+  return (VolumeInterpolation)get_enum(
+      ptr, "volume_interpolation", VOLUME_NUM_INTERPOLATION, VOLUME_INTERPOLATION_LINEAR);
+}
+
+static DisplacementMethod get_displacement_method(PointerRNA &ptr)
+{
+  return (DisplacementMethod)get_enum(
+      ptr, "displacement_method", DISPLACE_NUM_METHODS, DISPLACE_BUMP);
+}
+
+static int validate_enum_value(int value, int num_values, int default_value)
+{
+  if (value >= num_values) {
+    return default_value;
+  }
+  return value;
+}
+
+template<typename NodeType> static InterpolationType get_image_interpolation(NodeType &b_node)
+{
+  int value = b_node.interpolation();
+  return (InterpolationType)validate_enum_value(
+      value, INTERPOLATION_NUM_TYPES, INTERPOLATION_LINEAR);
+}
+
+template<typename NodeType> static ExtensionType get_image_extension(NodeType &b_node)
+{
+  int value = b_node.extension();
+  return (ExtensionType)validate_enum_value(value, EXTENSION_NUM_TYPES, EXTENSION_REPEAT);
+}
+
+static ImageAlphaType get_image_alpha_type(BL::Image &b_image)
+{
+  int value = b_image.alpha_mode();
+  return (ImageAlphaType)validate_enum_value(value, IMAGE_ALPHA_NUM_TYPES, IMAGE_ALPHA_AUTO);
+}
+
+/* Attribute name translation utilities */
+
+/* Since Eevee needs to know whether the attribute is uniform or varying
+ * at the time it compiles the shader for the material, Blender had to
+ * introduce different namespaces (types) in its attribute node. However,
+ * Cycles already has object attributes that form a uniform namespace with
+ * the more common varying attributes. Without completely reworking the
+ * attribute handling in Cycles to introduce separate namespaces (this could
+ * be especially hard for OSL which directly uses the name string), the
+ * space identifier has to be added to the attribute name as a prefix.
+ *
+ * The prefixes include a control character to ensure the user specified
+ * name can't accidentally include a special prefix.
+ */
+
+static const string_view object_attr_prefix("\x01object:");
+static const string_view instancer_attr_prefix("\x01instancer:");
+
+static ustring blender_attribute_name_add_type(const string &name, BlenderAttributeType type)
+{
+  switch (type) {
+    case BL::ShaderNodeAttribute::attribute_type_OBJECT:
+      return ustring::concat(object_attr_prefix, name);
+    case BL::ShaderNodeAttribute::attribute_type_INSTANCER:
+      return ustring::concat(instancer_attr_prefix, name);
+    default:
+      return ustring(name);
+  }
+}
+
+BlenderAttributeType blender_attribute_name_split_type(ustring name, string *r_real_name)
+{
+  string_view sname(name);
+
+  if (sname.substr(0, object_attr_prefix.size()) == object_attr_prefix) {
+    *r_real_name = sname.substr(object_attr_prefix.size());
+    return BL::ShaderNodeAttribute::attribute_type_OBJECT;
+  }
+
+  if (sname.substr(0, instancer_attr_prefix.size()) == instancer_attr_prefix) {
+    *r_real_name = sname.substr(instancer_attr_prefix.size());
+    return BL::ShaderNodeAttribute::attribute_type_INSTANCER;
+  }
+
+  return BL::ShaderNodeAttribute::attribute_type_GEOMETRY;
+}
+
+/* Graph */
+
+static BL::NodeSocket get_node_output(BL::Node &b_node, const string &name)
+{
+  for (BL::NodeSocket &b_out : b_node.outputs) {
+    if (b_out.identifier() == name) {
+      return b_out;
+    }
+  }
+  assert(0);
+  return *b_node.outputs.begin();
+}
+
+static float3 get_node_output_rgba(BL::Node &b_node, const string &name)
+{
+  BL::NodeSocket b_sock = get_node_output(b_node, name);
+  float value[4];
+  RNA_float_get_array(&b_sock.ptr, "default_value", value);
+  return make_float3(value[0], value[1], value[2]);
+}
+
+static float get_node_output_value(BL::Node &b_node, const string &name)
+{
+  BL::NodeSocket b_sock = get_node_output(b_node, name);
+  return RNA_float_get(&b_sock.ptr, "default_value");
+}
+
+static float3 get_node_output_vector(BL::Node &b_node, const string &name)
+{
+  BL::NodeSocket b_sock = get_node_output(b_node, name);
+  float value[3];
+  RNA_float_get_array(&b_sock.ptr, "default_value", value);
+  return make_float3(value[0], value[1], value[2]);
+}
+
+static SocketType::Type convert_socket_type(BL::NodeSocket &b_socket)
+{
+  switch (b_socket.type()) {
+    case BL::NodeSocket::type_VALUE:
+      return SocketType::FLOAT;
+    case BL::NodeSocket::type_INT:
+      return SocketType::INT;
+    case BL::NodeSocket::type_VECTOR:
+      return SocketType::VECTOR;
+    case BL::NodeSocket::type_RGBA:
+      return SocketType::COLOR;
+    case BL::NodeSocket::type_STRING:
+      return SocketType::STRING;
+    case BL::NodeSocket::type_SHADER:
+      return SocketType::CLOSURE;
+
+    default:
+      return SocketType::UNDEFINED;
+  }
+}
+
+static void set_default_value(ShaderInput *input,
+                              BL::NodeSocket &b_sock,
+                              BL::BlendData &b_data,
+                              BL::ID &b_id)
+{
+  Node *node = input->parent;
+  const SocketType &socket = input->socket_type;
+
+  /* copy values for non linked inputs */
+  switch (input->type()) {
+    case SocketType::FLOAT: {
+      node->set(socket, get_float(b_sock.ptr, "default_value"));
+      break;
+    }
+    case SocketType::INT: {
+      if (b_sock.type() == BL::NodeSocket::type_BOOLEAN) {
+        node->set(socket, get_boolean(b_sock.ptr, "default_value"));
+      }
+      else {
+        node->set(socket, get_int(b_sock.ptr, "default_value"));
+      }
+      break;
+    }
+    case SocketType::COLOR: {
+      node->set(socket, float4_to_float3(get_float4(b_sock.ptr, "default_value")));
+      break;
+    }
+    case SocketType::NORMAL:
+    case SocketType::POINT:
+    case SocketType::VECTOR: {
+      node->set(socket, get_float3(b_sock.ptr, "default_value"));
+      break;
+    }
+    case SocketType::STRING: {
+      node->set(
+          socket,
+          (ustring)blender_absolute_path(b_data, b_id, get_string(b_sock.ptr, "default_value")));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+static void get_tex_mapping(TextureNode *mapping, BL::TexMapping &b_mapping)
+{
+  if (!b_mapping)
+    return;
+
+  mapping->set_tex_mapping_translation(get_float3(b_mapping.translation()));
+  mapping->set_tex_mapping_rotation(get_float3(b_mapping.rotation()));
+  mapping->set_tex_mapping_scale(get_float3(b_mapping.scale()));
+  mapping->set_tex_mapping_type((TextureMapping::Type)b_mapping.vector_type());
+
+  mapping->set_tex_mapping_x_mapping((TextureMapping::Mapping)b_mapping.mapping_x());
+  mapping->set_tex_mapping_y_mapping((TextureMapping::Mapping)b_mapping.mapping_y());
+  mapping->set_tex_mapping_z_mapping((TextureMapping::Mapping)b_mapping.mapping_z());
+}
+
+static ShaderNode *add_node(Scene *scene,
+                            BL::RenderEngine &b_engine,
+                            BL::BlendData &b_data,
+                            BL::Depsgraph &b_depsgraph,
+                            BL::Scene &b_scene,
+                            ShaderGraph *graph,
+                            BL::ShaderNodeTree &b_ntree,
+                            BL::ShaderNode &b_node)
+{
+  ShaderNode *node = NULL;
+
+  /* existing blender nodes */
+  if (b_node.is_a(&RNA_ShaderNodeRGBCurve)) {
+    BL::ShaderNodeRGBCurve b_curve_node(b_node);
+    BL::CurveMapping mapping(b_curve_node.mapping());
+    RGBCurvesNode *curves = graph->create_node<RGBCurvesNode>();
+    array<float3> curve_mapping_curves;
+    float min_x, max_x;
+    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
+    curvemapping_minmax(mapping, 4, &min_x, &max_x);
+    curves->set_min_x(min_x);
+    curves->set_max_x(max_x);
+    curves->set_curves(curve_mapping_curves);
+    node = curves;
+  }
+  if (b_node.is_a(&RNA_ShaderNodeVectorCurve)) {
+    BL::ShaderNodeVectorCurve b_curve_node(b_node);
+    BL::CurveMapping mapping(b_curve_node.mapping());
+    VectorCurvesNode *curves = graph->create_node<VectorCurvesNode>();
+    array<float3> curve_mapping_curves;
+    float min_x, max_x;
+    curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
+    curvemapping_minmax(mapping, 3, &min_x, &max_x);
+    curves->set_min_x(min_x);
+    curves->set_max_x(max_x);
+    curves->set_curves(curve_mapping_curves);
+    node = curves;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
+    BL::ShaderNodeFloatCurve b_curve_node(b_node);
+    BL::CurveMapping mapping(b_curve_node.mapping());
+    FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
+    array<float> curve_mapping_curve;
+    float min_x, max_x;
+    curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
+    curvemapping_minmax(mapping, 1, &min_x, &max_x);
+    curve->set_min_x(min_x);
+    curve->set_max_x(max_x);
+    curve->set_curve(curve_mapping_curve);
+    node = curve;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
+    RGBRampNode *ramp = graph->create_node<RGBRampNode>();
+    BL::ShaderNodeValToRGB b_ramp_node(b_node);
+    BL::ColorRamp b_color_ramp(b_ramp_node.color_ramp());
+    array<float3> ramp_values;
+    array<float> ramp_alpha;
+    colorramp_to_array(b_color_ramp, ramp_values, ramp_alpha, RAMP_TABLE_SIZE);
+    ramp->set_ramp(ramp_values);
+    ramp->set_ramp_alpha(ramp_alpha);
+    ramp->set_interpolate(b_color_ramp.interpolation() != BL::ColorRamp::interpolation_CONSTANT);
+    node = ramp;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeRGB)) {
+    ColorNode *color = graph->create_node<ColorNode>();
+    color->set_value(get_node_output_rgba(b_node, "Color"));
+    node = color;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeValue)) {
+    ValueNode *value = graph->create_node<ValueNode>();
+    value->set_value(get_node_output_value(b_node, "Value"));
+    node = value;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeCameraData)) {
+    node = graph->create_node<CameraNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeInvert)) {
+    node = graph->create_node<InvertNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeGamma)) {
+    node = graph->create_node<GammaNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBrightContrast)) {
+    node = graph->create_node<BrightContrastNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeMixRGB)) {
+    BL::ShaderNodeMixRGB b_mix_node(b_node);
+    MixNode *mix = graph->create_node<MixNode>();
+    mix->set_mix_type((NodeMix)b_mix_node.blend_type());
+    mix->set_use_clamp(b_mix_node.use_clamp());
+    node = mix;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeSeparateRGB)) {
+    node = graph->create_node<SeparateRGBNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeCombineRGB)) {
+    node = graph->create_node<CombineRGBNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeSeparateHSV)) {
+    node = graph->create_node<SeparateHSVNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeCombineHSV)) {
+    node = graph->create_node<CombineHSVNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeSeparateXYZ)) {
+    node = graph->create_node<SeparateXYZNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeCombineXYZ)) {
+    node = graph->create_node<CombineXYZNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeHueSaturation)) {
+    node = graph->create_node<HSVNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeRGBToBW)) {
+    node = graph->create_node<RGBToBWNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
+    BL::ShaderNodeMapRange b_map_range_node(b_node);
+    MapRangeNode *map_range_node = graph->create_node<MapRangeNode>();
+    map_range_node->set_clamp(b_map_range_node.clamp());
+    map_range_node->set_range_type((NodeMapRangeType)b_map_range_node.interpolation_type());
+    node = map_range_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
+    BL::ShaderNodeClamp b_clamp_node(b_node);
+    ClampNode *clamp_node = graph->create_node<ClampNode>();
+    clamp_node->set_clamp_type((NodeClampType)b_clamp_node.clamp_type());
+    node = clamp_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeMath)) {
+    BL::ShaderNodeMath b_math_node(b_node);
+    MathNode *math_node = graph->create_node<MathNode>();
+    math_node->set_math_type((NodeMathType)b_math_node.operation());
+    math_node->set_use_clamp(b_math_node.use_clamp());
+    node = math_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVectorMath)) {
+    BL::ShaderNodeVectorMath b_vector_math_node(b_node);
+    VectorMathNode *vector_math_node = graph->create_node<VectorMathNode>();
+    vector_math_node->set_math_type((NodeVectorMathType)b_vector_math_node.operation());
+    node = vector_math_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVectorRotate)) {
+    BL::ShaderNodeVectorRotate b_vector_rotate_node(b_node);
+    VectorRotateNode *vector_rotate_node = graph->create_node<VectorRotateNode>();
+    vector_rotate_node->set_rotate_type(
+        (NodeVectorRotateType)b_vector_rotate_node.rotation_type());
+    vector_rotate_node->set_invert(b_vector_rotate_node.invert());
+    node = vector_rotate_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
+    BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
+    VectorTransformNode *vtransform = graph->create_node<VectorTransformNode>();
+    vtransform->set_transform_type((NodeVectorTransformType)b_vector_transform_node.vector_type());
+    vtransform->set_convert_from(
+        (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_from());
+    vtransform->set_convert_to(
+        (NodeVectorTransformConvertSpace)b_vector_transform_node.convert_to());
+    node = vtransform;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeNormal)) {
+    BL::Node::outputs_iterator out_it;
+    b_node.outputs.begin(out_it);
+
+    NormalNode *norm = graph->create_node<NormalNode>();
+    norm->set_direction(get_node_output_vector(b_node, "Normal"));
+    node = norm;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeMapping)) {
+    BL::ShaderNodeMapping b_mapping_node(b_node);
+    MappingNode *mapping = graph->create_node<MappingNode>();
+    mapping->set_mapping_type((NodeMappingType)b_mapping_node.vector_type());
+    node = mapping;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeFresnel)) {
+    node = graph->create_node<FresnelNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeLayerWeight)) {
+    node = graph->create_node<LayerWeightNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeAddShader)) {
+    node = graph->create_node<AddClosureNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeMixShader)) {
+    node = graph->create_node<MixClosureNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeAttribute)) {
+    BL::ShaderNodeAttribute b_attr_node(b_node);
+    AttributeNode *attr = graph->create_node<AttributeNode>();
+    attr->set_attribute(blender_attribute_name_add_type(b_attr_node.attribute_name(),
+                                                        b_attr_node.attribute_type()));
+    node = attr;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBackground)) {
+    node = graph->create_node<BackgroundNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeHoldout)) {
+    node = graph->create_node<HoldoutNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfAnisotropic)) {
+    BL::ShaderNodeBsdfAnisotropic b_aniso_node(b_node);
+    AnisotropicBsdfNode *aniso = graph->create_node<AnisotropicBsdfNode>();
+
+    switch (b_aniso_node.distribution()) {
+      case BL::ShaderNodeBsdfAnisotropic::distribution_BECKMANN:
+        aniso->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_ID);
+        break;
+      case BL::ShaderNodeBsdfAnisotropic::distribution_GGX:
+        aniso->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_ID);
+        break;
+      case BL::ShaderNodeBsdfAnisotropic::distribution_MULTI_GGX:
+        aniso->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID);
+        break;
+      case BL::ShaderNodeBsdfAnisotropic::distribution_ASHIKHMIN_SHIRLEY:
+        aniso->set_distribution(CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID);
+        break;
+    }
+
+    node = aniso;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfDiffuse)) {
+    node = graph->create_node<DiffuseBsdfNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeSubsurfaceScattering)) {
+    BL::ShaderNodeSubsurfaceScattering b_subsurface_node(b_node);
+
+    SubsurfaceScatteringNode *subsurface = graph->create_node<SubsurfaceScatteringNode>();
+
+    switch (b_subsurface_node.falloff()) {
+      case BL::ShaderNodeSubsurfaceScattering::falloff_BURLEY:
+        subsurface->set_method(CLOSURE_BSSRDF_BURLEY_ID);
+        break;
+      case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK_FIXED_RADIUS:
+        subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+        break;
+      case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK:
+        subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
+        break;
+    }
+
+    node = subsurface;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfGlossy)) {
+    BL::ShaderNodeBsdfGlossy b_glossy_node(b_node);
+    GlossyBsdfNode *glossy = graph->create_node<GlossyBsdfNode>();
+
+    switch (b_glossy_node.distribution()) {
+      case BL::ShaderNodeBsdfGlossy::distribution_SHARP:
+        glossy->set_distribution(CLOSURE_BSDF_REFLECTION_ID);
+        break;
+      case BL::ShaderNodeBsdfGlossy::distribution_BECKMANN:
+        glossy->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_ID);
+        break;
+      case BL::ShaderNodeBsdfGlossy::distribution_GGX:
+        glossy->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_ID);
+        break;
+      case BL::ShaderNodeBsdfGlossy::distribution_ASHIKHMIN_SHIRLEY:
+        glossy->set_distribution(CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID);
+        break;
+      case BL::ShaderNodeBsdfGlossy::distribution_MULTI_GGX:
+        glossy->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID);
+        break;
+    }
+    node = glossy;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfGlass)) {
+    BL::ShaderNodeBsdfGlass b_glass_node(b_node);
+    GlassBsdfNode *glass = graph->create_node<GlassBsdfNode>();
+    switch (b_glass_node.distribution()) {
+      case BL::ShaderNodeBsdfGlass::distribution_SHARP:
+        glass->set_distribution(CLOSURE_BSDF_SHARP_GLASS_ID);
+        break;
+      case BL::ShaderNodeBsdfGlass::distribution_BECKMANN:
+        glass->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID);
+        break;
+      case BL::ShaderNodeBsdfGlass::distribution_GGX:
+        glass->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID);
+        break;
+      case BL::ShaderNodeBsdfGlass::distribution_MULTI_GGX:
+        glass->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID);
+        break;
+    }
+    node = glass;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfRefraction)) {
+    BL::ShaderNodeBsdfRefraction b_refraction_node(b_node);
+    RefractionBsdfNode *refraction = graph->create_node<RefractionBsdfNode>();
+    switch (b_refraction_node.distribution()) {
+      case BL::ShaderNodeBsdfRefraction::distribution_SHARP:
+        refraction->set_distribution(CLOSURE_BSDF_REFRACTION_ID);
+        break;
+      case BL::ShaderNodeBsdfRefraction::distribution_BECKMANN:
+        refraction->set_distribution(CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID);
+        break;
+      case BL::ShaderNodeBsdfRefraction::distribution_GGX:
+        refraction->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID);
+        break;
+    }
+    node = refraction;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfToon)) {
+    BL::ShaderNodeBsdfToon b_toon_node(b_node);
+    ToonBsdfNode *toon = graph->create_node<ToonBsdfNode>();
+    switch (b_toon_node.component()) {
+      case BL::ShaderNodeBsdfToon::component_DIFFUSE:
+        toon->set_component(CLOSURE_BSDF_DIFFUSE_TOON_ID);
+        break;
+      case BL::ShaderNodeBsdfToon::component_GLOSSY:
+        toon->set_component(CLOSURE_BSDF_GLOSSY_TOON_ID);
+        break;
+    }
+    node = toon;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfHair)) {
+    BL::ShaderNodeBsdfHair b_hair_node(b_node);
+    HairBsdfNode *hair = graph->create_node<HairBsdfNode>();
+    switch (b_hair_node.component()) {
+      case BL::ShaderNodeBsdfHair::component_Reflection:
+        hair->set_component(CLOSURE_BSDF_HAIR_REFLECTION_ID);
+        break;
+      case BL::ShaderNodeBsdfHair::component_Transmission:
+        hair->set_component(CLOSURE_BSDF_HAIR_TRANSMISSION_ID);
+        break;
+    }
+    node = hair;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfHairPrincipled)) {
+    BL::ShaderNodeBsdfHairPrincipled b_principled_hair_node(b_node);
+    PrincipledHairBsdfNode *principled_hair = graph->create_node<PrincipledHairBsdfNode>();
+    principled_hair->set_parametrization(
+        (NodePrincipledHairParametrization)get_enum(b_principled_hair_node.ptr,
+                                                    "parametrization",
+                                                    NODE_PRINCIPLED_HAIR_NUM,
+                                                    NODE_PRINCIPLED_HAIR_REFLECTANCE));
+    node = principled_hair;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfPrincipled)) {
+    BL::ShaderNodeBsdfPrincipled b_principled_node(b_node);
+    PrincipledBsdfNode *principled = graph->create_node<PrincipledBsdfNode>();
+    switch (b_principled_node.distribution()) {
+      case BL::ShaderNodeBsdfPrincipled::distribution_GGX:
+        principled->set_distribution(CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID);
+        break;
+      case BL::ShaderNodeBsdfPrincipled::distribution_MULTI_GGX:
+        principled->set_distribution(CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID);
+        break;
+    }
+    switch (b_principled_node.subsurface_method()) {
+      case BL::ShaderNodeBsdfPrincipled::subsurface_method_BURLEY:
+        principled->set_subsurface_method(CLOSURE_BSSRDF_BURLEY_ID);
+        break;
+      case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK_FIXED_RADIUS:
+        principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+        break;
+      case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK:
+        principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
+        break;
+    }
+    node = principled;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfTranslucent)) {
+    node = graph->create_node<TranslucentBsdfNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfTransparent)) {
+    node = graph->create_node<TransparentBsdfNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBsdfVelvet)) {
+    node = graph->create_node<VelvetBsdfNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeEmission)) {
+    node = graph->create_node<EmissionNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeAmbientOcclusion)) {
+    BL::ShaderNodeAmbientOcclusion b_ao_node(b_node);
+    AmbientOcclusionNode *ao = graph->create_node<AmbientOcclusionNode>();
+    ao->set_samples(b_ao_node.samples());
+    ao->set_inside(b_ao_node.inside());
+    ao->set_only_local(b_ao_node.only_local());
+    node = ao;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVolumeScatter)) {
+    node = graph->create_node<ScatterVolumeNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVolumeAbsorption)) {
+    node = graph->create_node<AbsorptionVolumeNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVolumePrincipled)) {
+    PrincipledVolumeNode *principled = graph->create_node<PrincipledVolumeNode>();
+    node = principled;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeNewGeometry)) {
+    node = graph->create_node<GeometryNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeWireframe)) {
+    BL::ShaderNodeWireframe b_wireframe_node(b_node);
+    WireframeNode *wire = graph->create_node<WireframeNode>();
+    wire->set_use_pixel_size(b_wireframe_node.use_pixel_size());
+    node = wire;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeWavelength)) {
+    node = graph->create_node<WavelengthNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBlackbody)) {
+    node = graph->create_node<BlackbodyNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeLightPath)) {
+    node = graph->create_node<LightPathNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeLightFalloff)) {
+    node = graph->create_node<LightFalloffNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeObjectInfo)) {
+    node = graph->create_node<ObjectInfoNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeParticleInfo)) {
+    node = graph->create_node<ParticleInfoNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeHairInfo)) {
+    node = graph->create_node<HairInfoNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVolumeInfo)) {
+    node = graph->create_node<VolumeInfoNode>();
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVertexColor)) {
+    BL::ShaderNodeVertexColor b_vertex_color_node(b_node);
+    VertexColorNode *vertex_color_node = graph->create_node<VertexColorNode>();
+    vertex_color_node->set_layer_name(ustring(b_vertex_color_node.layer_name()));
+    node = vertex_color_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBump)) {
+    BL::ShaderNodeBump b_bump_node(b_node);
+    BumpNode *bump = graph->create_node<BumpNode>();
+    bump->set_invert(b_bump_node.invert());
+    node = bump;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeScript)) {
+#ifdef WITH_OSL
+    if (scene->shader_manager->use_osl()) {
+      /* create script node */
+      BL::ShaderNodeScript b_script_node(b_node);
+
+      ShaderManager *manager = scene->shader_manager;
+      string bytecode_hash = b_script_node.bytecode_hash();
+
+      if (!bytecode_hash.empty()) {
+        node = OSLShaderManager::osl_node(
+            graph, manager, "", bytecode_hash, b_script_node.bytecode());
+      }
+      else {
+        string absolute_filepath = blender_absolute_path(
+            b_data, b_ntree, b_script_node.filepath());
+        node = OSLShaderManager::osl_node(graph, manager, absolute_filepath, "");
+      }
+    }
+#else
+    (void)b_data;
+    (void)b_ntree;
+#endif
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexImage)) {
+    BL::ShaderNodeTexImage b_image_node(b_node);
+    BL::Image b_image(b_image_node.image());
+    BL::ImageUser b_image_user(b_image_node.image_user());
+    ImageTextureNode *image = graph->create_node<ImageTextureNode>();
+
+    image->set_interpolation(get_image_interpolation(b_image_node));
+    image->set_extension(get_image_extension(b_image_node));
+    image->set_projection((NodeImageProjection)b_image_node.projection());
+    image->set_projection_blend(b_image_node.projection_blend());
+    BL::TexMapping b_texture_mapping(b_image_node.texture_mapping());
+    get_tex_mapping(image, b_texture_mapping);
+
+    if (b_image) {
+      PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
+      image->set_colorspace(ustring(get_enum_identifier(colorspace_ptr, "name")));
+
+      image->set_animated(b_image_node.image_user().use_auto_refresh());
+      image->set_alpha_type(get_image_alpha_type(b_image));
+
+      array<int> tiles;
+      for (BL::UDIMTile &b_tile : b_image.tiles) {
+        tiles.push_back_slow(b_tile.number());
+      }
+      image->set_tiles(tiles);
+
+      /* builtin images will use callback-based reading because
+       * they could only be loaded correct from blender side
+       */
+      bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
+                        b_image.source() == BL::Image::source_MOVIE ||
+                        (b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
+
+      if (is_builtin) {
+        /* for builtin images we're using image datablock name to find an image to
+         * read pixels from later
+         *
+         * also store frame number as well, so there's no differences in handling
+         * builtin names for packed images and movies
+         */
+        int scene_frame = b_scene.frame_current();
+        int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
+        image->handle = scene->image_manager->add_image(
+            new BlenderImageLoader(b_image, image_frame), image->image_params());
+      }
+      else {
+        ustring filename = ustring(
+            image_user_file_path(b_image_user, b_image, b_scene.frame_current(), true));
+        image->set_filename(filename);
+      }
+    }
+    node = image;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexEnvironment)) {
+    BL::ShaderNodeTexEnvironment b_env_node(b_node);
+    BL::Image b_image(b_env_node.image());
+    BL::ImageUser b_image_user(b_env_node.image_user());
+    EnvironmentTextureNode *env = graph->create_node<EnvironmentTextureNode>();
+
+    env->set_interpolation(get_image_interpolation(b_env_node));
+    env->set_projection((NodeEnvironmentProjection)b_env_node.projection());
+    BL::TexMapping b_texture_mapping(b_env_node.texture_mapping());
+    get_tex_mapping(env, b_texture_mapping);
+
+    if (b_image) {
+      PointerRNA colorspace_ptr = b_image.colorspace_settings().ptr;
+      env->set_colorspace(ustring(get_enum_identifier(colorspace_ptr, "name")));
+
+      env->set_animated(b_env_node.image_user().use_auto_refresh());
+      env->set_alpha_type(get_image_alpha_type(b_image));
+
+      bool is_builtin = b_image.packed_file() || b_image.source() == BL::Image::source_GENERATED ||
+                        b_image.source() == BL::Image::source_MOVIE ||
+                        (b_engine.is_preview() && b_image.source() != BL::Image::source_SEQUENCE);
+
+      if (is_builtin) {
+        int scene_frame = b_scene.frame_current();
+        int image_frame = image_user_frame_number(b_image_user, b_image, scene_frame);
+        env->handle = scene->image_manager->add_image(new BlenderImageLoader(b_image, image_frame),
+                                                      env->image_params());
+      }
+      else {
+        env->set_filename(
+            ustring(image_user_file_path(b_image_user, b_image, b_scene.frame_current(), false)));
+      }
+    }
+    node = env;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexGradient)) {
+    BL::ShaderNodeTexGradient b_gradient_node(b_node);
+    GradientTextureNode *gradient = graph->create_node<GradientTextureNode>();
+    gradient->set_gradient_type((NodeGradientType)b_gradient_node.gradient_type());
+    BL::TexMapping b_texture_mapping(b_gradient_node.texture_mapping());
+    get_tex_mapping(gradient, b_texture_mapping);
+    node = gradient;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexVoronoi)) {
+    BL::ShaderNodeTexVoronoi b_voronoi_node(b_node);
+    VoronoiTextureNode *voronoi = graph->create_node<VoronoiTextureNode>();
+    voronoi->set_dimensions(b_voronoi_node.voronoi_dimensions());
+    voronoi->set_feature((NodeVoronoiFeature)b_voronoi_node.feature());
+    voronoi->set_metric((NodeVoronoiDistanceMetric)b_voronoi_node.distance());
+    BL::TexMapping b_texture_mapping(b_voronoi_node.texture_mapping());
+    get_tex_mapping(voronoi, b_texture_mapping);
+    node = voronoi;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexMagic)) {
+    BL::ShaderNodeTexMagic b_magic_node(b_node);
+    MagicTextureNode *magic = graph->create_node<MagicTextureNode>();
+    magic->set_depth(b_magic_node.turbulence_depth());
+    BL::TexMapping b_texture_mapping(b_magic_node.texture_mapping());
+    get_tex_mapping(magic, b_texture_mapping);
+    node = magic;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexWave)) {
+    BL::ShaderNodeTexWave b_wave_node(b_node);
+    WaveTextureNode *wave = graph->create_node<WaveTextureNode>();
+    wave->set_wave_type((NodeWaveType)b_wave_node.wave_type());
+    wave->set_bands_direction((NodeWaveBandsDirection)b_wave_node.bands_direction());
+    wave->set_rings_direction((NodeWaveRingsDirection)b_wave_node.rings_direction());
+    wave->set_profile((NodeWaveProfile)b_wave_node.wave_profile());
+    BL::TexMapping b_texture_mapping(b_wave_node.texture_mapping());
+    get_tex_mapping(wave, b_texture_mapping);
+    node = wave;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexChecker)) {
+    BL::ShaderNodeTexChecker b_checker_node(b_node);
+    CheckerTextureNode *checker = graph->create_node<CheckerTextureNode>();
+    BL::TexMapping b_texture_mapping(b_checker_node.texture_mapping());
+    get_tex_mapping(checker, b_texture_mapping);
+    node = checker;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexBrick)) {
+    BL::ShaderNodeTexBrick b_brick_node(b_node);
+    BrickTextureNode *brick = graph->create_node<BrickTextureNode>();
+    brick->set_offset(b_brick_node.offset());
+    brick->set_offset_frequency(b_brick_node.offset_frequency());
+    brick->set_squash(b_brick_node.squash());
+    brick->set_squash_frequency(b_brick_node.squash_frequency());
+    BL::TexMapping b_texture_mapping(b_brick_node.texture_mapping());
+    get_tex_mapping(brick, b_texture_mapping);
+    node = brick;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexNoise)) {
+    BL::ShaderNodeTexNoise b_noise_node(b_node);
+    NoiseTextureNode *noise = graph->create_node<NoiseTextureNode>();
+    noise->set_dimensions(b_noise_node.noise_dimensions());
+    BL::TexMapping b_texture_mapping(b_noise_node.texture_mapping());
+    get_tex_mapping(noise, b_texture_mapping);
+    node = noise;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexMusgrave)) {
+    BL::ShaderNodeTexMusgrave b_musgrave_node(b_node);
+    MusgraveTextureNode *musgrave_node = graph->create_node<MusgraveTextureNode>();
+    musgrave_node->set_musgrave_type((NodeMusgraveType)b_musgrave_node.musgrave_type());
+    musgrave_node->set_dimensions(b_musgrave_node.musgrave_dimensions());
+    BL::TexMapping b_texture_mapping(b_musgrave_node.texture_mapping());
+    get_tex_mapping(musgrave_node, b_texture_mapping);
+    node = musgrave_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexCoord)) {
+    BL::ShaderNodeTexCoord b_tex_coord_node(b_node);
+    TextureCoordinateNode *tex_coord = graph->create_node<TextureCoordinateNode>();
+    tex_coord->set_from_dupli(b_tex_coord_node.from_instancer());
+    if (b_tex_coord_node.object()) {
+      tex_coord->set_use_transform(true);
+      tex_coord->set_ob_tfm(get_transform(b_tex_coord_node.object().matrix_world()));
+    }
+    node = tex_coord;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexSky)) {
+    BL::ShaderNodeTexSky b_sky_node(b_node);
+    SkyTextureNode *sky = graph->create_node<SkyTextureNode>();
+    sky->set_sky_type((NodeSkyType)b_sky_node.sky_type());
+    sky->set_sun_direction(normalize(get_float3(b_sky_node.sun_direction())));
+    sky->set_turbidity(b_sky_node.turbidity());
+    sky->set_ground_albedo(b_sky_node.ground_albedo());
+    sky->set_sun_disc(b_sky_node.sun_disc());
+    sky->set_sun_size(b_sky_node.sun_size());
+    sky->set_sun_intensity(b_sky_node.sun_intensity());
+    sky->set_sun_elevation(b_sky_node.sun_elevation());
+    sky->set_sun_rotation(b_sky_node.sun_rotation());
+    sky->set_altitude(b_sky_node.altitude());
+    sky->set_air_density(b_sky_node.air_density());
+    sky->set_dust_density(b_sky_node.dust_density());
+    sky->set_ozone_density(b_sky_node.ozone_density());
+    BL::TexMapping b_texture_mapping(b_sky_node.texture_mapping());
+    get_tex_mapping(sky, b_texture_mapping);
+    node = sky;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexIES)) {
+    BL::ShaderNodeTexIES b_ies_node(b_node);
+    IESLightNode *ies = graph->create_node<IESLightNode>();
+    switch (b_ies_node.mode()) {
+      case BL::ShaderNodeTexIES::mode_EXTERNAL:
+        ies->set_filename(ustring(blender_absolute_path(b_data, b_ntree, b_ies_node.filepath())));
+        break;
+      case BL::ShaderNodeTexIES::mode_INTERNAL:
+        ustring ies_content = ustring(get_text_datablock_content(b_ies_node.ies().ptr));
+        if (ies_content.empty()) {
+          ies_content = "\n";
+        }
+        ies->set_ies(ies_content);
+        break;
+    }
+    node = ies;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexWhiteNoise)) {
+    BL::ShaderNodeTexWhiteNoise b_tex_white_noise_node(b_node);
+    WhiteNoiseTextureNode *white_noise_node = graph->create_node<WhiteNoiseTextureNode>();
+    white_noise_node->set_dimensions(b_tex_white_noise_node.noise_dimensions());
+    node = white_noise_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeNormalMap)) {
+    BL::ShaderNodeNormalMap b_normal_map_node(b_node);
+    NormalMapNode *nmap = graph->create_node<NormalMapNode>();
+    nmap->set_space((NodeNormalMapSpace)b_normal_map_node.space());
+    nmap->set_attribute(ustring(b_normal_map_node.uv_map()));
+    node = nmap;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTangent)) {
+    BL::ShaderNodeTangent b_tangent_node(b_node);
+    TangentNode *tangent = graph->create_node<TangentNode>();
+    tangent->set_direction_type((NodeTangentDirectionType)b_tangent_node.direction_type());
+    tangent->set_axis((NodeTangentAxis)b_tangent_node.axis());
+    tangent->set_attribute(ustring(b_tangent_node.uv_map()));
+    node = tangent;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeUVMap)) {
+    BL::ShaderNodeUVMap b_uvmap_node(b_node);
+    UVMapNode *uvm = graph->create_node<UVMapNode>();
+    uvm->set_attribute(ustring(b_uvmap_node.uv_map()));
+    uvm->set_from_dupli(b_uvmap_node.from_instancer());
+    node = uvm;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
+    BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
+    PointDensityTextureNode *point_density = graph->create_node<PointDensityTextureNode>();
+    point_density->set_space((NodeTexVoxelSpace)b_point_density_node.space());
+    point_density->set_interpolation(get_image_interpolation(b_point_density_node));
+    point_density->handle = scene->image_manager->add_image(
+        new BlenderPointDensityLoader(b_depsgraph, b_point_density_node),
+        point_density->image_params());
+
+    b_point_density_node.cache_point_density(b_depsgraph);
+    node = point_density;
+
+    /* Transformation form world space to texture space.
+     *
+     * NOTE: Do this after the texture is cached, this is because getting
+     * min/max will need to access this cache.
+     */
+    BL::Object b_ob(b_point_density_node.object());
+    if (b_ob) {
+      float3 loc, size;
+      point_density_texture_space(b_depsgraph, b_point_density_node, loc, size);
+      point_density->set_tfm(transform_translate(-loc) * transform_scale(size) *
+                             transform_inverse(get_transform(b_ob.matrix_world())));
+    }
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeBevel)) {
+    BL::ShaderNodeBevel b_bevel_node(b_node);
+    BevelNode *bevel = graph->create_node<BevelNode>();
+    bevel->set_samples(b_bevel_node.samples());
+    node = bevel;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeDisplacement)) {
+    BL::ShaderNodeDisplacement b_disp_node(b_node);
+    DisplacementNode *disp = graph->create_node<DisplacementNode>();
+    disp->set_space((NodeNormalMapSpace)b_disp_node.space());
+    node = disp;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeVectorDisplacement)) {
+    BL::ShaderNodeVectorDisplacement b_disp_node(b_node);
+    VectorDisplacementNode *disp = graph->create_node<VectorDisplacementNode>();
+    disp->set_space((NodeNormalMapSpace)b_disp_node.space());
+    disp->set_attribute(ustring(""));
+    node = disp;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeOutputAOV)) {
+    BL::ShaderNodeOutputAOV b_aov_node(b_node);
+    OutputAOVNode *aov = graph->create_node<OutputAOVNode>();
+    aov->set_name(ustring(b_aov_node.name()));
+    node = aov;
+  }
+
+  if (node) {
+    node->name = b_node.name();
+    graph->add(node);
+  }
+
+  return node;
+}
+
+static bool node_use_modified_socket_name(ShaderNode *node)
+{
+  if (node->special_type == SHADER_SPECIAL_TYPE_OSL)
+    return false;
+
+  return true;
+}
+
+static ShaderInput *node_find_input_by_name(ShaderNode *node, BL::NodeSocket &b_socket)
+{
+  string name = b_socket.identifier();
+  ShaderInput *input = node->input(name.c_str());
+
+  if (!input && node_use_modified_socket_name(node)) {
+    /* Different internal name for shader. */
+    if (string_startswith(name, "Shader")) {
+      string_replace(name, "Shader", "Closure");
+    }
+    input = node->input(name.c_str());
+
+    if (!input) {
+      /* Different internal numbering of two sockets with same name.
+       * Note that the Blender convention for unique socket names changed
+       * from . to _ at some point, so we check both to handle old files. */
+      if (string_endswith(name, "_001")) {
+        string_replace(name, "_001", "2");
+      }
+      else if (string_endswith(name, ".001")) {
+        string_replace(name, ".001", "2");
+      }
+      else if (string_endswith(name, "_002")) {
+        string_replace(name, "_002", "3");
+      }
+      else if (string_endswith(name, ".002")) {
+        string_replace(name, ".002", "3");
+      }
+      else {
+        name += "1";
+      }
+
+      input = node->input(name.c_str());
+    }
+  }
+
+  return input;
+}
+
+static ShaderOutput *node_find_output_by_name(ShaderNode *node, BL::NodeSocket &b_socket)
+{
+  string name = b_socket.identifier();
+  ShaderOutput *output = node->output(name.c_str());
+
+  if (!output && node_use_modified_socket_name(node)) {
+    /* Different internal name for shader. */
+    if (name == "Shader") {
+      name = "Closure";
+      output = node->output(name.c_str());
+    }
+  }
+
+  return output;
+}
+
+static void add_nodes(Scene *scene,
+                      BL::RenderEngine &b_engine,
+                      BL::BlendData &b_data,
+                      BL::Depsgraph &b_depsgraph,
+                      BL::Scene &b_scene,
+                      ShaderGraph *graph,
+                      BL::ShaderNodeTree &b_ntree,
+                      const ProxyMap &proxy_input_map,
+                      const ProxyMap &proxy_output_map)
+{
+  /* add nodes */
+  PtrInputMap input_map;
+  PtrOutputMap output_map;
+
+  /* find the node to use for output if there are multiple */
+  BL::ShaderNode output_node = b_ntree.get_output_node(
+      BL::ShaderNodeOutputMaterial::target_CYCLES);
+
+  /* add nodes */
+  for (BL::Node &b_node : b_ntree.nodes) {
+    if (b_node.mute() || b_node.is_a(&RNA_NodeReroute)) {
+      /* replace muted node with internal links */
+      for (BL::NodeLink &b_link : b_node.internal_links) {
+        BL::NodeSocket to_socket(b_link.to_socket());
+        SocketType::Type to_socket_type = convert_socket_type(to_socket);
+        if (to_socket_type == SocketType::UNDEFINED) {
+          continue;
+        }
+
+        ConvertNode *proxy = graph->create_node<ConvertNode>(to_socket_type, to_socket_type, true);
+
+        input_map[b_link.from_socket().ptr.data] = proxy->inputs[0];
+        output_map[b_link.to_socket().ptr.data] = proxy->outputs[0];
+
+        graph->add(proxy);
+      }
+    }
+    else if (b_node.is_a(&RNA_ShaderNodeGroup) || b_node.is_a(&RNA_NodeCustomGroup) ||
+             b_node.is_a(&RNA_ShaderNodeCustomGroup)) {
+
+      BL::ShaderNodeTree b_group_ntree(PointerRNA_NULL);
+      if (b_node.is_a(&RNA_ShaderNodeGroup))
+        b_group_ntree = BL::ShaderNodeTree(((BL::NodeGroup)(b_node)).node_tree());
+      else if (b_node.is_a(&RNA_NodeCustomGroup))
+        b_group_ntree = BL::ShaderNodeTree(((BL::NodeCustomGroup)(b_node)).node_tree());
+      else
+        b_group_ntree = BL::ShaderNodeTree(((BL::ShaderNodeCustomGroup)(b_node)).node_tree());
+
+      ProxyMap group_proxy_input_map, group_proxy_output_map;
+
+      /* Add a proxy node for each socket
+       * Do this even if the node group has no internal tree,
+       * so that links have something to connect to and assert won't fail.
+       */
+      for (BL::NodeSocket &b_input : b_node.inputs) {
+        SocketType::Type input_type = convert_socket_type(b_input);
+        if (input_type == SocketType::UNDEFINED) {
+          continue;
+        }
+
+        ConvertNode *proxy = graph->create_node<ConvertNode>(input_type, input_type, true);
+        graph->add(proxy);
+
+        /* register the proxy node for internal binding */
+        group_proxy_input_map[b_input.identifier()] = proxy;
+
+        input_map[b_input.ptr.data] = proxy->inputs[0];
+
+        set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
+      }
+      for (BL::NodeSocket &b_output : b_node.outputs) {
+        SocketType::Type output_type = convert_socket_type(b_output);
+        if (output_type == SocketType::UNDEFINED) {
+          continue;
+        }
+
+        ConvertNode *proxy = graph->create_node<ConvertNode>(output_type, output_type, true);
+        graph->add(proxy);
+
+        /* register the proxy node for internal binding */
+        group_proxy_output_map[b_output.identifier()] = proxy;
+
+        output_map[b_output.ptr.data] = proxy->outputs[0];
+      }
+
+      if (b_group_ntree) {
+        add_nodes(scene,
+                  b_engine,
+                  b_data,
+                  b_depsgraph,
+                  b_scene,
+                  graph,
+                  b_group_ntree,
+                  group_proxy_input_map,
+                  group_proxy_output_map);
+      }
+    }
+    else if (b_node.is_a(&RNA_NodeGroupInput)) {
+      /* map each socket to a proxy node */
+      for (BL::NodeSocket &b_output : b_node.outputs) {
+        ProxyMap::const_iterator proxy_it = proxy_input_map.find(b_output.identifier());
+        if (proxy_it != proxy_input_map.end()) {
+          ConvertNode *proxy = proxy_it->second;
+
+          output_map[b_output.ptr.data] = proxy->outputs[0];
+        }
+      }
+    }
+    else if (b_node.is_a(&RNA_NodeGroupOutput)) {
+      BL::NodeGroupOutput b_output_node(b_node);
+      /* only the active group output is used */
+      if (b_output_node.is_active_output()) {
+        /* map each socket to a proxy node */
+        for (BL::NodeSocket &b_input : b_node.inputs) {
+          ProxyMap::const_iterator proxy_it = proxy_output_map.find(b_input.identifier());
+          if (proxy_it != proxy_output_map.end()) {
+            ConvertNode *proxy = proxy_it->second;
+
+            input_map[b_input.ptr.data] = proxy->inputs[0];
+
+            set_default_value(proxy->inputs[0], b_input, b_data, b_ntree);
+          }
+        }
+      }
+    }
+    else {
+      ShaderNode *node = NULL;
+
+      if (b_node.ptr.data == output_node.ptr.data) {
+        node = graph->output();
+      }
+      else {
+        BL::ShaderNode b_shader_node(b_node);
+        node = add_node(
+            scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree, b_shader_node);
+      }
+
+      if (node) {
+        /* map node sockets for linking */
+        for (BL::NodeSocket &b_input : b_node.inputs) {
+          ShaderInput *input = node_find_input_by_name(node, b_input);
+          if (!input) {
+            /* XXX should not happen, report error? */
+            continue;
+          }
+          input_map[b_input.ptr.data] = input;
+
+          set_default_value(input, b_input, b_data, b_ntree);
+        }
+        for (BL::NodeSocket &b_output : b_node.outputs) {
+          ShaderOutput *output = node_find_output_by_name(node, b_output);
+          if (!output) {
+            /* XXX should not happen, report error? */
+            continue;
+          }
+          output_map[b_output.ptr.data] = output;
+        }
+      }
+    }
+  }
+
+  /* connect nodes */
+  for (BL::NodeLink &b_link : b_ntree.links) {
+    /* Ignore invalid links to avoid unwanted cycles created in graph.
+     * Also ignore links with unavailable sockets. */
+    if (!(b_link.is_valid() && b_link.from_socket().enabled() && b_link.to_socket().enabled()) ||
+        b_link.is_muted()) {
+      continue;
+    }
+    /* get blender link data */
+    BL::NodeSocket b_from_sock = b_link.from_socket();
+    BL::NodeSocket b_to_sock = b_link.to_socket();
+
+    ShaderOutput *output = 0;
+    ShaderInput *input = 0;
+
+    PtrOutputMap::iterator output_it = output_map.find(b_from_sock.ptr.data);
+    if (output_it != output_map.end())
+      output = output_it->second;
+    PtrInputMap::iterator input_it = input_map.find(b_to_sock.ptr.data);
+    if (input_it != input_map.end())
+      input = input_it->second;
+
+    /* either node may be NULL when the node was not exported, typically
+     * because the node type is not supported */
+    if (output && input)
+      graph->connect(output, input);
+  }
+}
+
+static void add_nodes(Scene *scene,
+                      BL::RenderEngine &b_engine,
+                      BL::BlendData &b_data,
+                      BL::Depsgraph &b_depsgraph,
+                      BL::Scene &b_scene,
+                      ShaderGraph *graph,
+                      BL::ShaderNodeTree &b_ntree)
+{
+  static const ProxyMap empty_proxy_map;
+  add_nodes(scene,
+            b_engine,
+            b_data,
+            b_depsgraph,
+            b_scene,
+            graph,
+            b_ntree,
+            empty_proxy_map,
+            empty_proxy_map);
+}
+
+/* Sync Materials */
+
+void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
+{
+  shader_map.set_default(scene->default_surface);
+
+  TaskPool pool;
+  set<Shader *> updated_shaders;
+
+  for (BL::ID &b_id : b_depsgraph.ids) {
+    if (!b_id.is_a(&RNA_Material)) {
+      continue;
+    }
+
+    BL::Material b_mat(b_id);
+    Shader *shader;
+
+    /* test if we need to sync */
+    if (shader_map.add_or_update(&shader, b_mat) || update_all) {
+      ShaderGraph *graph = new ShaderGraph();
+
+      shader->name = b_mat.name().c_str();
+      shader->set_pass_id(b_mat.pass_index());
+
+      /* create nodes */
+      if (b_mat.use_nodes() && b_mat.node_tree()) {
+        BL::ShaderNodeTree b_ntree(b_mat.node_tree());
+
+        add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
+      }
+      else {
+        DiffuseBsdfNode *diffuse = graph->create_node<DiffuseBsdfNode>();
+        diffuse->set_color(get_float3(b_mat.diffuse_color()));
+        graph->add(diffuse);
+
+        ShaderNode *out = graph->output();
+        graph->connect(diffuse->output("BSDF"), out->input("Surface"));
+      }
+
+      /* settings */
+      PointerRNA cmat = RNA_pointer_get(&b_mat.ptr, "cycles");
+      shader->set_use_mis(get_boolean(cmat, "sample_as_light"));
+      shader->set_use_transparent_shadow(get_boolean(cmat, "use_transparent_shadow"));
+      shader->set_heterogeneous_volume(!get_boolean(cmat, "homogeneous_volume"));
+      shader->set_volume_sampling_method(get_volume_sampling(cmat));
+      shader->set_volume_interpolation_method(get_volume_interpolation(cmat));
+      shader->set_volume_step_rate(get_float(cmat, "volume_step_rate"));
+      shader->set_displacement_method(get_displacement_method(cmat));
+
+      shader->set_graph(graph);
+
+      /* By simplifying the shader graph as soon as possible, some
+       * redundant shader nodes might be removed which prevents loading
+       * unnecessary attributes later.
+       *
+       * However, since graph simplification also accounts for e.g. mix
+       * weight, this would cause frequent expensive resyncs in interactive
+       * sessions, so for those sessions optimization is only performed
+       * right before compiling.
+       */
+      if (!preview) {
+        pool.push(function_bind(&ShaderGraph::simplify, graph, scene));
+        /* NOTE: Update shaders out of the threads since those routines
+         * are accessing and writing to a global context.
+         */
+        updated_shaders.insert(shader);
+      }
+      else {
+        /* NOTE: Update tagging can access links which are being
+         * optimized out.
+         */
+        shader->tag_update(scene);
+      }
+    }
+  }
+
+  pool.wait_work();
+
+  foreach (Shader *shader, updated_shaders) {
+    shader->tag_update(scene);
+  }
+}
+
+/* Sync World */
+
+void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all)
+{
+  Background *background = scene->background;
+  Integrator *integrator = scene->integrator;
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  BL::World b_world = b_scene.world();
+
+  BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
+
+  if (world_recalc || update_all || b_world.ptr.data != world_map ||
+      viewport_parameters.shader_modified(new_viewport_parameters)) {
+    Shader *shader = scene->default_background;
+    ShaderGraph *graph = new ShaderGraph();
+
+    /* create nodes */
+    if (new_viewport_parameters.use_scene_world && b_world && b_world.use_nodes() &&
+        b_world.node_tree()) {
+      BL::ShaderNodeTree b_ntree(b_world.node_tree());
+
+      add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
+
+      /* volume */
+      PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
+      shader->set_heterogeneous_volume(!get_boolean(cworld, "homogeneous_volume"));
+      shader->set_volume_sampling_method(get_volume_sampling(cworld));
+      shader->set_volume_interpolation_method(get_volume_interpolation(cworld));
+      shader->set_volume_step_rate(get_float(cworld, "volume_step_size"));
+    }
+    else if (new_viewport_parameters.use_scene_world && b_world) {
+      BackgroundNode *background = graph->create_node<BackgroundNode>();
+      background->set_color(get_float3(b_world.color()));
+      graph->add(background);
+
+      ShaderNode *out = graph->output();
+      graph->connect(background->output("Background"), out->input("Surface"));
+    }
+    else if (!new_viewport_parameters.use_scene_world) {
+      float3 world_color;
+      if (b_world) {
+        world_color = get_float3(b_world.color());
+      }
+      else {
+        world_color = zero_float3();
+      }
+
+      BackgroundNode *background = graph->create_node<BackgroundNode>();
+      graph->add(background);
+
+      LightPathNode *light_path = graph->create_node<LightPathNode>();
+      graph->add(light_path);
+
+      MixNode *mix_scene_with_background = graph->create_node<MixNode>();
+      mix_scene_with_background->set_color2(world_color);
+      graph->add(mix_scene_with_background);
+
+      EnvironmentTextureNode *texture_environment = graph->create_node<EnvironmentTextureNode>();
+      texture_environment->set_tex_mapping_type(TextureMapping::VECTOR);
+      float3 rotation_z = texture_environment->get_tex_mapping_rotation();
+      rotation_z[2] = new_viewport_parameters.studiolight_rotate_z;
+      texture_environment->set_tex_mapping_rotation(rotation_z);
+      texture_environment->set_filename(new_viewport_parameters.studiolight_path);
+      graph->add(texture_environment);
+
+      MixNode *mix_intensity = graph->create_node<MixNode>();
+      mix_intensity->set_mix_type(NODE_MIX_MUL);
+      mix_intensity->set_fac(1.0f);
+      mix_intensity->set_color2(make_float3(new_viewport_parameters.studiolight_intensity,
+                                            new_viewport_parameters.studiolight_intensity,
+                                            new_viewport_parameters.studiolight_intensity));
+      graph->add(mix_intensity);
+
+      TextureCoordinateNode *texture_coordinate = graph->create_node<TextureCoordinateNode>();
+      graph->add(texture_coordinate);
+
+      MixNode *mix_background_with_environment = graph->create_node<MixNode>();
+      mix_background_with_environment->set_fac(
+          new_viewport_parameters.studiolight_background_alpha);
+      mix_background_with_environment->set_color1(world_color);
+      graph->add(mix_background_with_environment);
+
+      ShaderNode *out = graph->output();
+
+      graph->connect(texture_coordinate->output("Generated"),
+                     texture_environment->input("Vector"));
+      graph->connect(texture_environment->output("Color"), mix_intensity->input("Color1"));
+      graph->connect(light_path->output("Is Camera Ray"), mix_scene_with_background->input("Fac"));
+      graph->connect(mix_intensity->output("Color"), mix_scene_with_background->input("Color1"));
+      graph->connect(mix_intensity->output("Color"),
+                     mix_background_with_environment->input("Color2"));
+      graph->connect(mix_background_with_environment->output("Color"),
+                     mix_scene_with_background->input("Color2"));
+      graph->connect(mix_scene_with_background->output("Color"), background->input("Color"));
+      graph->connect(background->output("Background"), out->input("Surface"));
+    }
+
+    /* Visibility */
+    if (b_world) {
+      PointerRNA cvisibility = RNA_pointer_get(&b_world.ptr, "cycles_visibility");
+      uint visibility = 0;
+
+      visibility |= get_boolean(cvisibility, "camera") ? PATH_RAY_CAMERA : 0;
+      visibility |= get_boolean(cvisibility, "diffuse") ? PATH_RAY_DIFFUSE : 0;
+      visibility |= get_boolean(cvisibility, "glossy") ? PATH_RAY_GLOSSY : 0;
+      visibility |= get_boolean(cvisibility, "transmission") ? PATH_RAY_TRANSMIT : 0;
+      visibility |= get_boolean(cvisibility, "scatter") ? PATH_RAY_VOLUME_SCATTER : 0;
+
+      background->set_visibility(visibility);
+    }
+
+    shader->set_graph(graph);
+    shader->tag_update(scene);
+  }
+
+  /* Fast GI */
+  if (b_world) {
+    BL::WorldLighting b_light = b_world.light_settings();
+    enum { FAST_GI_METHOD_REPLACE = 0, FAST_GI_METHOD_ADD = 1, FAST_GI_METHOD_NUM };
+
+    const bool use_fast_gi = get_boolean(cscene, "use_fast_gi");
+    if (use_fast_gi) {
+      const int fast_gi_method = get_enum(
+          cscene, "fast_gi_method", FAST_GI_METHOD_NUM, FAST_GI_METHOD_REPLACE);
+      integrator->set_ao_factor((fast_gi_method == FAST_GI_METHOD_REPLACE) ? b_light.ao_factor() :
+                                                                             0.0f);
+      integrator->set_ao_additive_factor(
+          (fast_gi_method == FAST_GI_METHOD_ADD) ? b_light.ao_factor() : 0.0f);
+    }
+    else {
+      integrator->set_ao_factor(0.0f);
+      integrator->set_ao_additive_factor(0.0f);
+    }
+
+    integrator->set_ao_distance(b_light.distance());
+  }
+  else {
+    integrator->set_ao_factor(0.0f);
+    integrator->set_ao_additive_factor(0.0f);
+    integrator->set_ao_distance(10.0f);
+  }
+
+  background->set_transparent(b_scene.render().film_transparent());
+
+  if (background->get_transparent()) {
+    background->set_transparent_glass(get_boolean(cscene, "film_transparent_glass"));
+    background->set_transparent_roughness_threshold(
+        get_float(cscene, "film_transparent_roughness"));
+  }
+  else {
+    background->set_transparent_glass(false);
+    background->set_transparent_roughness_threshold(0.0f);
+  }
+
+  background->set_use_shader(view_layer.use_background_shader ||
+                             viewport_parameters.use_custom_shader());
+
+  background->tag_update(scene);
+}
+
+/* Sync Lights */
+
+void BlenderSync::sync_lights(BL::Depsgraph &b_depsgraph, bool update_all)
+{
+  shader_map.set_default(scene->default_light);
+
+  for (BL::ID &b_id : b_depsgraph.ids) {
+    if (!b_id.is_a(&RNA_Light)) {
+      continue;
+    }
+
+    BL::Light b_light(b_id);
+    Shader *shader;
+
+    /* test if we need to sync */
+    if (shader_map.add_or_update(&shader, b_light) || update_all) {
+      ShaderGraph *graph = new ShaderGraph();
+
+      /* create nodes */
+      if (b_light.use_nodes() && b_light.node_tree()) {
+        shader->name = b_light.name().c_str();
+
+        BL::ShaderNodeTree b_ntree(b_light.node_tree());
+
+        add_nodes(scene, b_engine, b_data, b_depsgraph, b_scene, graph, b_ntree);
+      }
+      else {
+        EmissionNode *emission = graph->create_node<EmissionNode>();
+        emission->set_color(one_float3());
+        emission->set_strength(1.0f);
+        graph->add(emission);
+
+        ShaderNode *out = graph->output();
+        graph->connect(emission->output("Emission"), out->input("Surface"));
+      }
+
+      shader->set_graph(graph);
+      shader->tag_update(scene);
+    }
+  }
+}
+
+void BlenderSync::sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
+{
+  /* for auto refresh images */
+  ImageManager *image_manager = scene->image_manager;
+  const int frame = b_scene.frame_current();
+  const bool auto_refresh_update = image_manager->set_animation_frame_update(frame);
+
+  shader_map.pre_sync();
+
+  sync_world(b_depsgraph, b_v3d, auto_refresh_update);
+  sync_lights(b_depsgraph, auto_refresh_update);
+  sync_materials(b_depsgraph, auto_refresh_update);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/sync.cpp b/intern/cycles/blender/sync.cpp
new file mode 100644
index 00000000000..73d3a4436b5
--- /dev/null
+++ b/intern/cycles/blender/sync.cpp
@@ -0,0 +1,949 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/background.h"
+#include "scene/camera.h"
+#include "scene/curves.h"
+#include "scene/film.h"
+#include "scene/integrator.h"
+#include "scene/light.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/procedural.h"
+#include "scene/scene.h"
+#include "scene/shader.h"
+#include "scene/shader_graph.h"
+#include "scene/shader_nodes.h"
+
+#include "device/device.h"
+
+#include "blender/device.h"
+#include "blender/session.h"
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#include "util/debug.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/opengl.h"
+#include "util/openimagedenoise.h"
+
+CCL_NAMESPACE_BEGIN
+
+static const char *cryptomatte_prefix = "Crypto";
+
+/* Constructor */
+
+BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
+                         BL::BlendData &b_data,
+                         BL::Scene &b_scene,
+                         Scene *scene,
+                         bool preview,
+                         bool use_developer_ui,
+                         Progress &progress)
+    : b_engine(b_engine),
+      b_data(b_data),
+      b_scene(b_scene),
+      shader_map(scene),
+      object_map(scene),
+      procedural_map(scene),
+      geometry_map(scene),
+      light_map(scene),
+      particle_system_map(scene),
+      world_map(NULL),
+      world_recalc(false),
+      scene(scene),
+      preview(preview),
+      experimental(false),
+      use_developer_ui(use_developer_ui),
+      dicing_rate(1.0f),
+      max_subdivisions(12),
+      progress(progress),
+      has_updates_(true)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
+                          RNA_float_get(&cscene, "dicing_rate");
+  max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
+}
+
+BlenderSync::~BlenderSync()
+{
+}
+
+void BlenderSync::reset(BL::BlendData &b_data, BL::Scene &b_scene)
+{
+  /* Update data and scene pointers in case they change in session reset,
+   * for example after undo.
+   * Note that we do not modify the `has_updates_` flag here because the sync
+   * reset is also used during viewport navigation. */
+  this->b_data = b_data;
+  this->b_scene = b_scene;
+}
+
+/* Sync */
+
+void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
+{
+  /* Sync recalc flags from blender to cycles. Actual update is done separate,
+   * so we can do it later on if doing it immediate is not suitable. */
+
+  if (experimental) {
+    /* Mark all meshes as needing to be exported again if dicing changed. */
+    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+    bool dicing_prop_changed = false;
+
+    float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
+                                          RNA_float_get(&cscene, "dicing_rate");
+
+    if (dicing_rate != updated_dicing_rate) {
+      dicing_rate = updated_dicing_rate;
+      dicing_prop_changed = true;
+    }
+
+    int updated_max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
+
+    if (max_subdivisions != updated_max_subdivisions) {
+      max_subdivisions = updated_max_subdivisions;
+      dicing_prop_changed = true;
+    }
+
+    if (dicing_prop_changed) {
+      has_updates_ = true;
+
+      for (const pair<const GeometryKey, Geometry *> &iter : geometry_map.key_to_scene_data()) {
+        Geometry *geom = iter.second;
+        if (geom->is_mesh()) {
+          Mesh *mesh = static_cast<Mesh *>(geom);
+          if (mesh->get_subdivision_type() != Mesh::SUBDIVISION_NONE) {
+            PointerRNA id_ptr;
+            RNA_id_pointer_create((::ID *)iter.first.id, &id_ptr);
+            geometry_map.set_recalc(BL::ID(id_ptr));
+          }
+        }
+      }
+    }
+  }
+
+  /* Iterate over all IDs in this depsgraph. */
+  for (BL::DepsgraphUpdate &b_update : b_depsgraph.updates) {
+    /* TODO(sergey): Can do more selective filter here. For example, ignore changes made to
+     * screen datablock. Note that sync_data() needs to be called after object deletion, and
+     * currently this is ensured by the scene ID tagged for update, which sets the `has_updates_`
+     * flag. */
+    has_updates_ = true;
+
+    BL::ID b_id(b_update.id());
+
+    /* Material */
+    if (b_id.is_a(&RNA_Material)) {
+      BL::Material b_mat(b_id);
+      shader_map.set_recalc(b_mat);
+    }
+    /* Light */
+    else if (b_id.is_a(&RNA_Light)) {
+      BL::Light b_light(b_id);
+      shader_map.set_recalc(b_light);
+    }
+    /* Object */
+    else if (b_id.is_a(&RNA_Object)) {
+      BL::Object b_ob(b_id);
+      const bool is_geometry = object_is_geometry(b_ob);
+      const bool is_light = !is_geometry && object_is_light(b_ob);
+
+      if (b_ob.is_instancer() && b_update.is_updated_shading()) {
+        /* Needed for e.g. object color updates on instancer. */
+        object_map.set_recalc(b_ob);
+      }
+
+      if (is_geometry || is_light) {
+        const bool updated_geometry = b_update.is_updated_geometry();
+
+        /* Geometry (mesh, hair, volume). */
+        if (is_geometry) {
+          if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
+            object_map.set_recalc(b_ob);
+          }
+
+          if (updated_geometry ||
+              (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
+            BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
+            geometry_map.set_recalc(key);
+          }
+
+          if (updated_geometry) {
+            BL::Object::particle_systems_iterator b_psys;
+            for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end();
+                 ++b_psys) {
+              particle_system_map.set_recalc(b_ob);
+            }
+          }
+        }
+        /* Light */
+        else if (is_light) {
+          if (b_update.is_updated_transform() || b_update.is_updated_shading()) {
+            object_map.set_recalc(b_ob);
+            light_map.set_recalc(b_ob);
+          }
+
+          if (updated_geometry) {
+            light_map.set_recalc(b_ob);
+          }
+        }
+      }
+    }
+    /* Mesh */
+    else if (b_id.is_a(&RNA_Mesh)) {
+      BL::Mesh b_mesh(b_id);
+      geometry_map.set_recalc(b_mesh);
+    }
+    /* World */
+    else if (b_id.is_a(&RNA_World)) {
+      BL::World b_world(b_id);
+      if (world_map == b_world.ptr.data) {
+        world_recalc = true;
+      }
+    }
+    /* Volume */
+    else if (b_id.is_a(&RNA_Volume)) {
+      BL::Volume b_volume(b_id);
+      geometry_map.set_recalc(b_volume);
+    }
+  }
+
+  if (b_v3d) {
+    BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
+
+    if (viewport_parameters.shader_modified(new_viewport_parameters)) {
+      world_recalc = true;
+      has_updates_ = true;
+    }
+
+    has_updates_ |= viewport_parameters.modified(new_viewport_parameters);
+  }
+}
+
+void BlenderSync::sync_data(BL::RenderSettings &b_render,
+                            BL::Depsgraph &b_depsgraph,
+                            BL::SpaceView3D &b_v3d,
+                            BL::Object &b_override,
+                            int width,
+                            int height,
+                            void **python_thread_state)
+{
+  if (!has_updates_) {
+    return;
+  }
+
+  scoped_timer timer;
+
+  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
+  /* TODO(sergey): This feels weak to pass view layer to the integrator, and even weaker to have an
+   * implicit check on whether it is a background render or not. What is the nicer thing here? */
+  const bool background = !b_v3d;
+
+  sync_view_layer(b_view_layer);
+  sync_integrator(b_view_layer, background);
+  sync_film(b_view_layer, b_v3d);
+  sync_shaders(b_depsgraph, b_v3d);
+  sync_images();
+
+  geometry_synced.clear(); /* use for objects and motion sync */
+
+  if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
+      scene->camera->get_motion_position() == Camera::MOTION_POSITION_CENTER) {
+    sync_objects(b_depsgraph, b_v3d);
+  }
+  sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state);
+
+  geometry_synced.clear();
+
+  /* Shader sync done at the end, since object sync uses it.
+   * false = don't delete unused shaders, not supported. */
+  shader_map.post_sync(false);
+
+  free_data_after_sync(b_depsgraph);
+
+  VLOG(1) << "Total time spent synchronizing data: " << timer.get_time();
+
+  has_updates_ = false;
+}
+
+/* Integrator */
+
+void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  experimental = (get_enum(cscene, "feature_set") != 0);
+
+  Integrator *integrator = scene->integrator;
+
+  integrator->set_min_bounce(get_int(cscene, "min_light_bounces"));
+  integrator->set_max_bounce(get_int(cscene, "max_bounces"));
+
+  integrator->set_max_diffuse_bounce(get_int(cscene, "diffuse_bounces"));
+  integrator->set_max_glossy_bounce(get_int(cscene, "glossy_bounces"));
+  integrator->set_max_transmission_bounce(get_int(cscene, "transmission_bounces"));
+  integrator->set_max_volume_bounce(get_int(cscene, "volume_bounces"));
+
+  integrator->set_transparent_min_bounce(get_int(cscene, "min_transparent_bounces"));
+  integrator->set_transparent_max_bounce(get_int(cscene, "transparent_max_bounces"));
+
+  integrator->set_volume_max_steps(get_int(cscene, "volume_max_steps"));
+  float volume_step_rate = (preview) ? get_float(cscene, "volume_preview_step_rate") :
+                                       get_float(cscene, "volume_step_rate");
+  integrator->set_volume_step_rate(volume_step_rate);
+
+  integrator->set_caustics_reflective(get_boolean(cscene, "caustics_reflective"));
+  integrator->set_caustics_refractive(get_boolean(cscene, "caustics_refractive"));
+  integrator->set_filter_glossy(get_float(cscene, "blur_glossy"));
+
+  int seed = get_int(cscene, "seed");
+  if (get_boolean(cscene, "use_animated_seed")) {
+    seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed"));
+    if (b_scene.frame_subframe() != 0.0f) {
+      /* TODO(sergey): Ideally should be some sort of hash_merge,
+       * but this is good enough for now.
+       */
+      seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX),
+                         get_int(cscene, "seed"));
+    }
+  }
+
+  integrator->set_seed(seed);
+
+  integrator->set_sample_clamp_direct(get_float(cscene, "sample_clamp_direct"));
+  integrator->set_sample_clamp_indirect(get_float(cscene, "sample_clamp_indirect"));
+  if (!preview) {
+    integrator->set_motion_blur(view_layer.use_motion_blur);
+  }
+
+  integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
+
+  SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
+      cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
+  integrator->set_sampling_pattern(sampling_pattern);
+
+  if (preview) {
+    integrator->set_use_adaptive_sampling(
+        RNA_boolean_get(&cscene, "use_preview_adaptive_sampling"));
+    integrator->set_adaptive_threshold(get_float(cscene, "preview_adaptive_threshold"));
+    integrator->set_adaptive_min_samples(get_int(cscene, "preview_adaptive_min_samples"));
+  }
+  else {
+    integrator->set_use_adaptive_sampling(RNA_boolean_get(&cscene, "use_adaptive_sampling"));
+    integrator->set_adaptive_threshold(get_float(cscene, "adaptive_threshold"));
+    integrator->set_adaptive_min_samples(get_int(cscene, "adaptive_min_samples"));
+  }
+
+  if (get_boolean(cscene, "use_fast_gi")) {
+    if (preview) {
+      integrator->set_ao_bounces(get_int(cscene, "ao_bounces"));
+    }
+    else {
+      integrator->set_ao_bounces(get_int(cscene, "ao_bounces_render"));
+    }
+  }
+  else {
+    integrator->set_ao_bounces(0);
+  }
+
+  const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
+  integrator->set_use_denoise(denoise_params.use);
+
+  /* Only update denoiser parameters if the denoiser is actually used. This allows to tweak
+   * denoiser parameters before enabling it without render resetting on every change. The downside
+   * is that the interface and the integrator are technically out of sync. */
+  if (denoise_params.use) {
+    integrator->set_denoiser_type(denoise_params.type);
+    integrator->set_denoise_start_sample(denoise_params.start_sample);
+    integrator->set_use_denoise_pass_albedo(denoise_params.use_pass_albedo);
+    integrator->set_use_denoise_pass_normal(denoise_params.use_pass_normal);
+    integrator->set_denoiser_prefilter(denoise_params.prefilter);
+  }
+
+  /* UPDATE_NONE as we don't want to tag the integrator as modified (this was done by the
+   * set calls above), but we need to make sure that the dependent things are tagged. */
+  integrator->tag_update(scene, Integrator::UPDATE_NONE);
+}
+
+/* Film */
+
+void BlenderSync::sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+
+  Film *film = scene->film;
+
+  if (b_v3d) {
+    const BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
+    film->set_display_pass(new_viewport_parameters.display_pass);
+    film->set_show_active_pixels(new_viewport_parameters.show_active_pixels);
+  }
+
+  film->set_exposure(get_float(cscene, "film_exposure"));
+  film->set_filter_type(
+      (FilterType)get_enum(cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS));
+  float filter_width = (film->get_filter_type() == FILTER_BOX) ? 1.0f :
+                                                                 get_float(cscene, "filter_width");
+  film->set_filter_width(filter_width);
+
+  if (b_scene.world()) {
+    BL::WorldMistSettings b_mist = b_scene.world().mist_settings();
+
+    film->set_mist_start(b_mist.start());
+    film->set_mist_depth(b_mist.depth());
+
+    switch (b_mist.falloff()) {
+      case BL::WorldMistSettings::falloff_QUADRATIC:
+        film->set_mist_falloff(2.0f);
+        break;
+      case BL::WorldMistSettings::falloff_LINEAR:
+        film->set_mist_falloff(1.0f);
+        break;
+      case BL::WorldMistSettings::falloff_INVERSE_QUADRATIC:
+        film->set_mist_falloff(0.5f);
+        break;
+    }
+  }
+
+  /* Blender viewport does not support proper shadow catcher compositing, so force an approximate
+   * mode to improve visual feedback. */
+  if (b_v3d) {
+    film->set_use_approximate_shadow_catcher(true);
+  }
+  else {
+    film->set_use_approximate_shadow_catcher(!get_boolean(crl, "use_pass_shadow_catcher"));
+  }
+}
+
+/* Render Layer */
+
+void BlenderSync::sync_view_layer(BL::ViewLayer &b_view_layer)
+{
+  view_layer.name = b_view_layer.name();
+
+  /* Filter. */
+  view_layer.use_background_shader = b_view_layer.use_sky();
+  /* Always enable surfaces for baking, otherwise there is nothing to bake to. */
+  view_layer.use_surfaces = b_view_layer.use_solid() || scene->bake_manager->get_baking();
+  view_layer.use_hair = b_view_layer.use_strand();
+  view_layer.use_volumes = b_view_layer.use_volumes();
+  view_layer.use_motion_blur = b_view_layer.use_motion_blur() &&
+                               b_scene.render().use_motion_blur();
+
+  /* Material override. */
+  view_layer.material_override = b_view_layer.material_override();
+
+  /* Sample override. */
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  int use_layer_samples = get_enum(cscene, "use_layer_samples");
+
+  view_layer.bound_samples = (use_layer_samples == 1);
+  view_layer.samples = 0;
+
+  if (use_layer_samples != 2) {
+    int samples = b_view_layer.samples();
+    view_layer.samples = samples;
+  }
+}
+
+/* Images */
+void BlenderSync::sync_images()
+{
+  /* Sync is a convention for this API, but currently it frees unused buffers. */
+
+  const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
+  if (is_interface_locked == false && BlenderSession::headless == false) {
+    /* If interface is not locked, it's possible image is needed for
+     * the display.
+     */
+    return;
+  }
+  /* Free buffers used by images which are not needed for render. */
+  for (BL::Image &b_image : b_data.images) {
+    /* TODO(sergey): Consider making it an utility function to check
+     * whether image is considered builtin.
+     */
+    const bool is_builtin = b_image.packed_file() ||
+                            b_image.source() == BL::Image::source_GENERATED ||
+                            b_image.source() == BL::Image::source_MOVIE || b_engine.is_preview();
+    if (is_builtin == false) {
+      b_image.buffers_free();
+    }
+    /* TODO(sergey): Free builtin images not used by any shader. */
+  }
+}
+
+/* Passes */
+
+static PassType get_blender_pass_type(BL::RenderPass &b_pass)
+{
+  string name = b_pass.name();
+#define MAP_PASS(passname, passtype) \
+  if (name == passname) { \
+    return passtype; \
+  } \
+  ((void)0)
+
+  /* NOTE: Keep in sync with defined names from DNA_scene_types.h */
+
+  MAP_PASS("Combined", PASS_COMBINED);
+  MAP_PASS("Noisy Image", PASS_COMBINED);
+
+  MAP_PASS("Depth", PASS_DEPTH);
+  MAP_PASS("Mist", PASS_MIST);
+  MAP_PASS("Position", PASS_POSITION);
+  MAP_PASS("Normal", PASS_NORMAL);
+  MAP_PASS("IndexOB", PASS_OBJECT_ID);
+  MAP_PASS("UV", PASS_UV);
+  MAP_PASS("Vector", PASS_MOTION);
+  MAP_PASS("IndexMA", PASS_MATERIAL_ID);
+
+  MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
+  MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
+  MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
+  MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT);
+
+  MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
+  MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
+  MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
+  MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT);
+
+  MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
+  MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
+  MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
+
+  MAP_PASS("Emit", PASS_EMISSION);
+  MAP_PASS("Env", PASS_BACKGROUND);
+  MAP_PASS("AO", PASS_AO);
+  MAP_PASS("Shadow", PASS_SHADOW);
+
+  MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
+  MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
+
+  MAP_PASS("Denoising Normal", PASS_DENOISING_NORMAL);
+  MAP_PASS("Denoising Albedo", PASS_DENOISING_ALBEDO);
+  MAP_PASS("Denoising Depth", PASS_DENOISING_DEPTH);
+
+  MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
+  MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
+
+  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
+  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
+
+  if (string_startswith(name, cryptomatte_prefix)) {
+    return PASS_CRYPTOMATTE;
+  }
+
+#undef MAP_PASS
+
+  return PASS_NONE;
+}
+
+static Pass *pass_add(Scene *scene,
+                      PassType type,
+                      const char *name,
+                      PassMode mode = PassMode::DENOISED)
+{
+  Pass *pass = scene->create_node<Pass>();
+
+  pass->set_type(type);
+  pass->set_name(ustring(name));
+  pass->set_mode(mode);
+
+  return pass;
+}
+
+void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  /* Delete all existing passes. */
+  set<Pass *> clear_passes(scene->passes.begin(), scene->passes.end());
+  scene->delete_nodes(clear_passes);
+
+  /* Always add combined pass. */
+  pass_add(scene, PASS_COMBINED, "Combined");
+
+  /* Blender built-in data and light passes. */
+  for (BL::RenderPass &b_pass : b_rlay.passes) {
+    const PassType pass_type = get_blender_pass_type(b_pass);
+
+    if (pass_type == PASS_NONE) {
+      LOG(ERROR) << "Unknown pass " << b_pass.name();
+      continue;
+    }
+
+    if (pass_type == PASS_MOTION &&
+        (b_view_layer.use_motion_blur() && b_scene.render().use_motion_blur())) {
+      continue;
+    }
+
+    pass_add(scene, pass_type, b_pass.name().c_str());
+  }
+
+  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+
+  /* Debug passes. */
+  if (get_boolean(crl, "pass_debug_sample_count")) {
+    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+    pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
+  }
+
+  /* Cycles specific passes. */
+  if (get_boolean(crl, "use_pass_volume_direct")) {
+    b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
+    pass_add(scene, PASS_VOLUME_DIRECT, "VolumeDir");
+  }
+  if (get_boolean(crl, "use_pass_volume_indirect")) {
+    b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
+    pass_add(scene, PASS_VOLUME_INDIRECT, "VolumeInd");
+  }
+  if (get_boolean(crl, "use_pass_shadow_catcher")) {
+    b_engine.add_pass("Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
+    pass_add(scene, PASS_SHADOW_CATCHER, "Shadow Catcher");
+  }
+
+  /* Cryptomatte stores two ID/weight pairs per RGBA layer.
+   * User facing parameter is the number of pairs. */
+  int crypto_depth = divide_up(min(16, b_view_layer.pass_cryptomatte_depth()), 2);
+  scene->film->set_cryptomatte_depth(crypto_depth);
+  CryptomatteType cryptomatte_passes = CRYPT_NONE;
+  if (b_view_layer.use_pass_cryptomatte_object()) {
+    for (int i = 0; i < crypto_depth; i++) {
+      string passname = cryptomatte_prefix + string_printf("Object%02d", i);
+      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
+    }
+    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_OBJECT);
+  }
+  if (b_view_layer.use_pass_cryptomatte_material()) {
+    for (int i = 0; i < crypto_depth; i++) {
+      string passname = cryptomatte_prefix + string_printf("Material%02d", i);
+      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
+    }
+    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_MATERIAL);
+  }
+  if (b_view_layer.use_pass_cryptomatte_asset()) {
+    for (int i = 0; i < crypto_depth; i++) {
+      string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
+      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
+    }
+    cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_ASSET);
+  }
+  scene->film->set_cryptomatte_passes(cryptomatte_passes);
+
+  /* Denoising passes. */
+  const bool use_denoising = get_boolean(cscene, "use_denoising") &&
+                             get_boolean(crl, "use_denoising");
+  const bool store_denoising_passes = get_boolean(crl, "denoising_store_passes");
+  if (use_denoising) {
+    b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
+    pass_add(scene, PASS_COMBINED, "Noisy Image", PassMode::NOISY);
+    if (get_boolean(crl, "use_pass_shadow_catcher")) {
+      b_engine.add_pass("Noisy Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
+      pass_add(scene, PASS_SHADOW_CATCHER, "Noisy Shadow Catcher", PassMode::NOISY);
+    }
+  }
+  if (store_denoising_passes) {
+    b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
+    pass_add(scene, PASS_DENOISING_NORMAL, "Denoising Normal", PassMode::NOISY);
+
+    b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
+    pass_add(scene, PASS_DENOISING_ALBEDO, "Denoising Albedo", PassMode::NOISY);
+
+    b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
+    pass_add(scene, PASS_DENOISING_DEPTH, "Denoising Depth", PassMode::NOISY);
+  }
+
+  /* Custom AOV passes. */
+  BL::ViewLayer::aovs_iterator b_aov_iter;
+  for (b_view_layer.aovs.begin(b_aov_iter); b_aov_iter != b_view_layer.aovs.end(); ++b_aov_iter) {
+    BL::AOV b_aov(*b_aov_iter);
+    if (!b_aov.is_valid()) {
+      continue;
+    }
+
+    string name = b_aov.name();
+    bool is_color = b_aov.type() == BL::AOV::type_COLOR;
+
+    if (is_color) {
+      b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      pass_add(scene, PASS_AOV_COLOR, name.c_str());
+    }
+    else {
+      b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
+      pass_add(scene, PASS_AOV_VALUE, name.c_str());
+    }
+  }
+
+  scene->film->set_pass_alpha_threshold(b_view_layer.pass_alpha_threshold());
+}
+
+void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
+{
+  /* When viewport display is not needed during render we can force some
+   * caches to be releases from blender side in order to reduce peak memory
+   * footprint during synchronization process.
+   */
+
+  const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
+  const bool is_persistent_data = b_engine.render() && b_engine.render().use_persistent_data();
+  const bool can_free_caches =
+      (BlenderSession::headless || is_interface_locked) &&
+      /* Baking re-uses the depsgraph multiple times, clearing crashes
+       * reading un-evaluated mesh data which isn't aligned with the
+       * geometry we're baking, see T71012. */
+      !scene->bake_manager->get_baking() &&
+      /* Persistent data must main caches for performance and correctness. */
+      !is_persistent_data;
+
+  if (!can_free_caches) {
+    return;
+  }
+  /* TODO(sergey): We can actually remove the whole dependency graph,
+   * but that will need some API support first.
+   */
+  for (BL::Object &b_ob : b_depsgraph.objects) {
+    b_ob.cache_release();
+  }
+}
+
+/* Scene Parameters */
+
+SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
+{
+  SceneParams params;
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
+
+  if (shadingsystem == 0)
+    params.shadingsystem = SHADINGSYSTEM_SVM;
+  else if (shadingsystem == 1)
+    params.shadingsystem = SHADINGSYSTEM_OSL;
+
+  if (background || DebugFlags().viewport_static_bvh)
+    params.bvh_type = BVH_TYPE_STATIC;
+  else
+    params.bvh_type = BVH_TYPE_DYNAMIC;
+
+  params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
+  params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
+  params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
+
+  PointerRNA csscene = RNA_pointer_get(&b_scene.ptr, "cycles_curves");
+  params.hair_subdivisions = get_int(csscene, "subdivisions");
+  params.hair_shape = (CurveShapeType)get_enum(
+      csscene, "shape", CURVE_NUM_SHAPE_TYPES, CURVE_THICK);
+
+  int texture_limit;
+  if (background) {
+    texture_limit = RNA_enum_get(&cscene, "texture_limit_render");
+  }
+  else {
+    texture_limit = RNA_enum_get(&cscene, "texture_limit");
+  }
+  if (texture_limit > 0 && b_scene.render().use_simplify()) {
+    params.texture_limit = 1 << (texture_limit + 6);
+  }
+  else {
+    params.texture_limit = 0;
+  }
+
+  params.bvh_layout = DebugFlags().cpu.bvh_layout;
+
+  params.background = background;
+
+  return params;
+}
+
+/* Session Parameters */
+
+bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  return (background) ? false : get_boolean(cscene, "preview_pause");
+}
+
+SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
+                                              BL::Preferences &b_preferences,
+                                              BL::Scene &b_scene,
+                                              bool background)
+{
+  SessionParams params;
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  /* feature set */
+  params.experimental = (get_enum(cscene, "feature_set") != 0);
+
+  /* Headless and background rendering. */
+  params.headless = BlenderSession::headless;
+  params.background = background;
+
+  /* Device */
+  params.threads = blender_device_threads(b_scene);
+  params.device = blender_device_info(b_preferences, b_scene, params.background);
+
+  /* samples */
+  int samples = get_int(cscene, "samples");
+  int preview_samples = get_int(cscene, "preview_samples");
+
+  if (background) {
+    params.samples = samples;
+  }
+  else {
+    params.samples = preview_samples;
+    if (params.samples == 0)
+      params.samples = INT_MAX;
+  }
+
+  /* Clamp samples. */
+  params.samples = min(params.samples, Integrator::MAX_SAMPLES);
+
+  /* Viewport Performance */
+  params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
+
+  if (background) {
+    params.pixel_size = 1;
+  }
+
+  /* shading system - scene level needs full refresh */
+  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
+
+  if (shadingsystem == 0)
+    params.shadingsystem = SHADINGSYSTEM_SVM;
+  else if (shadingsystem == 1)
+    params.shadingsystem = SHADINGSYSTEM_OSL;
+
+  /* Time limit. */
+  if (background) {
+    params.time_limit = get_float(cscene, "time_limit");
+  }
+  else {
+    /* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
+     * usually higher than acceptable level for the final frame. */
+    /* TODO: It might be useful to support time limit in the viewport as well, but needs some
+     * extra thoughts and input. */
+    params.time_limit = 0.0;
+  }
+
+  /* Profiling. */
+  params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
+                         BlenderSession::print_render_stats;
+
+  if (background) {
+    params.use_auto_tile = RNA_boolean_get(&cscene, "use_auto_tile");
+    params.tile_size = max(get_int(cscene, "tile_size"), 8);
+  }
+  else {
+    params.use_auto_tile = false;
+  }
+
+  return params;
+}
+
+DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
+                                              BL::ViewLayer &b_view_layer,
+                                              bool background)
+{
+  enum DenoiserInput {
+    DENOISER_INPUT_RGB = 1,
+    DENOISER_INPUT_RGB_ALBEDO = 2,
+    DENOISER_INPUT_RGB_ALBEDO_NORMAL = 3,
+
+    DENOISER_INPUT_NUM,
+  };
+
+  DenoiseParams denoising;
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  int input_passes = -1;
+
+  if (background) {
+    /* Final Render Denoising */
+    denoising.use = get_boolean(cscene, "use_denoising");
+    denoising.type = (DenoiserType)get_enum(cscene, "denoiser", DENOISER_NUM, DENOISER_NONE);
+    denoising.prefilter = (DenoiserPrefilter)get_enum(
+        cscene, "denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_NONE);
+
+    input_passes = (DenoiserInput)get_enum(
+        cscene, "denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO_NORMAL);
+
+    if (b_view_layer) {
+      PointerRNA clayer = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+      if (!get_boolean(clayer, "use_denoising")) {
+        denoising.use = false;
+      }
+    }
+  }
+  else {
+    /* Viewport Denoising */
+    denoising.use = get_boolean(cscene, "use_preview_denoising");
+    denoising.type = (DenoiserType)get_enum(
+        cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
+    denoising.prefilter = (DenoiserPrefilter)get_enum(
+        cscene, "preview_denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_FAST);
+    denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");
+
+    input_passes = (DenoiserInput)get_enum(
+        cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO);
+
+    /* Auto select fastest denoiser. */
+    if (denoising.type == DENOISER_NONE) {
+      if (!Device::available_devices(DEVICE_MASK_OPTIX).empty()) {
+        denoising.type = DENOISER_OPTIX;
+      }
+      else if (openimagedenoise_supported()) {
+        denoising.type = DENOISER_OPENIMAGEDENOISE;
+      }
+      else {
+        denoising.use = false;
+      }
+    }
+  }
+
+  switch (input_passes) {
+    case DENOISER_INPUT_RGB:
+      denoising.use_pass_albedo = false;
+      denoising.use_pass_normal = false;
+      break;
+
+    case DENOISER_INPUT_RGB_ALBEDO:
+      denoising.use_pass_albedo = true;
+      denoising.use_pass_normal = false;
+      break;
+
+    case DENOISER_INPUT_RGB_ALBEDO_NORMAL:
+      denoising.use_pass_albedo = true;
+      denoising.use_pass_normal = true;
+      break;
+
+    default:
+      LOG(ERROR) << "Unhandled input passes enum " << input_passes;
+      break;
+  }
+
+  return denoising;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/sync.h b/intern/cycles/blender/sync.h
new file mode 100644
index 00000000000..c2377406876
--- /dev/null
+++ b/intern/cycles/blender/sync.h
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_SYNC_H__
+#define __BLENDER_SYNC_H__
+
+#include "MEM_guardedalloc.h"
+#include "RNA_access.h"
+#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
+
+#include "blender/id_map.h"
+#include "blender/util.h"
+#include "blender/viewport.h"
+
+#include "scene/scene.h"
+#include "session/session.h"
+
+#include "util/map.h"
+#include "util/set.h"
+#include "util/transform.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Background;
+class BlenderObjectCulling;
+class BlenderViewportParameters;
+class Camera;
+class Film;
+class Hair;
+class Light;
+class Mesh;
+class Object;
+class ParticleSystem;
+class Scene;
+class ViewLayer;
+class Shader;
+class ShaderGraph;
+class ShaderNode;
+class TaskPool;
+
+class BlenderSync {
+ public:
+  BlenderSync(BL::RenderEngine &b_engine,
+              BL::BlendData &b_data,
+              BL::Scene &b_scene,
+              Scene *scene,
+              bool preview,
+              bool use_developer_ui,
+              Progress &progress);
+  ~BlenderSync();
+
+  void reset(BL::BlendData &b_data, BL::Scene &b_scene);
+
+  /* sync */
+  void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
+  void sync_data(BL::RenderSettings &b_render,
+                 BL::Depsgraph &b_depsgraph,
+                 BL::SpaceView3D &b_v3d,
+                 BL::Object &b_override,
+                 int width,
+                 int height,
+                 void **python_thread_state);
+  void sync_view_layer(BL::ViewLayer &b_view_layer);
+  void sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+  void sync_integrator(BL::ViewLayer &b_view_layer, bool background);
+  void sync_camera(BL::RenderSettings &b_render,
+                   BL::Object &b_override,
+                   int width,
+                   int height,
+                   const char *viewname);
+  void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
+  inline int get_layer_samples()
+  {
+    return view_layer.samples;
+  }
+  inline int get_layer_bound_samples()
+  {
+    return view_layer.bound_samples;
+  }
+
+  /* get parameters */
+  static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
+  static SessionParams get_session_params(BL::RenderEngine &b_engine,
+                                          BL::Preferences &b_userpref,
+                                          BL::Scene &b_scene,
+                                          bool background);
+  static bool get_session_pause(BL::Scene &b_scene, bool background);
+  static BufferParams get_buffer_params(
+      BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);
+
+ private:
+  static DenoiseParams get_denoise_params(BL::Scene &b_scene,
+                                          BL::ViewLayer &b_view_layer,
+                                          bool background);
+
+  /* sync */
+  void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
+  void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
+  void sync_objects(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, float motion_time = 0.0f);
+  void sync_motion(BL::RenderSettings &b_render,
+                   BL::Depsgraph &b_depsgraph,
+                   BL::SpaceView3D &b_v3d,
+                   BL::Object &b_override,
+                   int width,
+                   int height,
+                   void **python_thread_state);
+  void sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d);
+  void sync_view();
+
+  /* Shader */
+  array<Node *> find_used_shaders(BL::Object &b_ob);
+  void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all);
+  void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
+  void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
+
+  /* Object */
+  Object *sync_object(BL::Depsgraph &b_depsgraph,
+                      BL::ViewLayer &b_view_layer,
+                      BL::DepsgraphObjectInstance &b_instance,
+                      float motion_time,
+                      bool use_particle_hair,
+                      bool show_lights,
+                      BlenderObjectCulling &culling,
+                      bool *use_portal,
+                      TaskPool *geom_task_pool);
+  void sync_object_motion_init(BL::Object &b_parent, BL::Object &b_ob, Object *object);
+
+  void sync_procedural(BL::Object &b_ob,
+                       BL::MeshSequenceCacheModifier &b_mesh_cache,
+                       bool has_subdivision);
+
+  bool sync_object_attributes(BL::DepsgraphObjectInstance &b_instance, Object *object);
+
+  /* Volume */
+  void sync_volume(BObjectInfo &b_ob_info, Volume *volume);
+
+  /* Mesh */
+  void sync_mesh(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Mesh *mesh);
+  void sync_mesh_motion(BL::Depsgraph b_depsgraph,
+                        BObjectInfo &b_ob_info,
+                        Mesh *mesh,
+                        int motion_step);
+
+  /* Hair */
+  void sync_hair(BL::Depsgraph b_depsgraph, BObjectInfo &b_ob_info, Hair *hair);
+  void sync_hair_motion(BL::Depsgraph b_depsgraph,
+                        BObjectInfo &b_ob_info,
+                        Hair *hair,
+                        int motion_step);
+  void sync_hair(Hair *hair, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
+  void sync_particle_hair(
+      Hair *hair, BL::Mesh &b_mesh, BObjectInfo &b_ob_info, bool motion, int motion_step = 0);
+  bool object_has_particle_hair(BL::Object b_ob);
+
+  /* Camera */
+  void sync_camera_motion(
+      BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
+
+  /* Geometry */
+  Geometry *sync_geometry(BL::Depsgraph &b_depsgrpah,
+                          BObjectInfo &b_ob_info,
+                          bool object_updated,
+                          bool use_particle_hair,
+                          TaskPool *task_pool);
+
+  void sync_geometry_motion(BL::Depsgraph &b_depsgraph,
+                            BObjectInfo &b_ob_info,
+                            Object *object,
+                            float motion_time,
+                            bool use_particle_hair,
+                            TaskPool *task_pool);
+
+  /* Light */
+  void sync_light(BL::Object &b_parent,
+                  int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
+                  BObjectInfo &b_ob_info,
+                  int random_id,
+                  Transform &tfm,
+                  bool *use_portal);
+  void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal);
+
+  /* Particles */
+  bool sync_dupli_particle(BL::Object &b_ob,
+                           BL::DepsgraphObjectInstance &b_instance,
+                           Object *object);
+
+  /* Images. */
+  void sync_images();
+
+  /* Early data free. */
+  void free_data_after_sync(BL::Depsgraph &b_depsgraph);
+
+  /* util */
+  void find_shader(BL::ID &id, array<Node *> &used_shaders, Shader *default_shader);
+  bool BKE_object_is_modified(BL::Object &b_ob);
+  bool object_is_geometry(BL::Object &b_ob);
+  bool object_is_light(BL::Object &b_ob);
+
+  /* variables */
+  BL::RenderEngine b_engine;
+  BL::BlendData b_data;
+  BL::Scene b_scene;
+
+  id_map<void *, Shader> shader_map;
+  id_map<ObjectKey, Object> object_map;
+  id_map<void *, Procedural> procedural_map;
+  id_map<GeometryKey, Geometry> geometry_map;
+  id_map<ObjectKey, Light> light_map;
+  id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
+  set<Geometry *> geometry_synced;
+  set<Geometry *> geometry_motion_synced;
+  set<Geometry *> geometry_motion_attribute_synced;
+  set<float> motion_times;
+  void *world_map;
+  bool world_recalc;
+  BlenderViewportParameters viewport_parameters;
+
+  Scene *scene;
+  bool preview;
+  bool experimental;
+  bool use_developer_ui;
+
+  float dicing_rate;
+  int max_subdivisions;
+
+  struct RenderLayerInfo {
+    RenderLayerInfo()
+        : material_override(PointerRNA_NULL),
+          use_background_shader(true),
+          use_surfaces(true),
+          use_hair(true),
+          use_volumes(true),
+          use_motion_blur(true),
+          samples(0),
+          bound_samples(false)
+    {
+    }
+
+    string name;
+    BL::Material material_override;
+    bool use_background_shader;
+    bool use_surfaces;
+    bool use_hair;
+    bool use_volumes;
+    bool use_motion_blur;
+    int samples;
+    bool bound_samples;
+  } view_layer;
+
+  Progress &progress;
+
+ protected:
+  /* Indicates that `sync_recalc()` detected changes in the scene.
+   * If this flag is false then the data is considered to be up-to-date and will not be
+   * synchronized at all. */
+  bool has_updates_ = true;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_SYNC_H__ */
diff --git a/intern/cycles/blender/texture.cpp b/intern/cycles/blender/texture.cpp
new file mode 100644
index 00000000000..43745bb8376
--- /dev/null
+++ b/intern/cycles/blender/texture.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/texture.h"
+
+CCL_NAMESPACE_BEGIN
+
+namespace {
+
+/* Point density helpers. */
+
+void density_texture_space_invert(float3 &loc, float3 &size)
+{
+  if (size.x != 0.0f)
+    size.x = 0.5f / size.x;
+  if (size.y != 0.0f)
+    size.y = 0.5f / size.y;
+  if (size.z != 0.0f)
+    size.z = 0.5f / size.z;
+
+  loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
+}
+
+} /* namespace */
+
+void point_density_texture_space(BL::Depsgraph &b_depsgraph,
+                                 BL::ShaderNodeTexPointDensity &b_point_density_node,
+                                 float3 &loc,
+                                 float3 &size)
+{
+  BL::Object b_ob(b_point_density_node.object());
+  if (!b_ob) {
+    loc = zero_float3();
+    size = zero_float3();
+    return;
+  }
+  float3 min, max;
+  b_point_density_node.calc_point_density_minmax(b_depsgraph, &min[0], &max[0]);
+  loc = (min + max) * 0.5f;
+  size = (max - min) * 0.5f;
+  density_texture_space_invert(loc, size);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/texture.h b/intern/cycles/blender/texture.h
new file mode 100644
index 00000000000..ead0c4e631b
--- /dev/null
+++ b/intern/cycles/blender/texture.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_TEXTURE_H__
+#define __BLENDER_TEXTURE_H__
+
+#include "blender/sync.h"
+#include <stdlib.h>
+
+CCL_NAMESPACE_BEGIN
+
+void point_density_texture_space(BL::Depsgraph &b_depsgraph,
+                                 BL::ShaderNodeTexPointDensity &b_point_density_node,
+                                 float3 &loc,
+                                 float3 &size);
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_TEXTURE_H__ */
diff --git a/intern/cycles/blender/util.h b/intern/cycles/blender/util.h
new file mode 100644
index 00000000000..33fd2c416c8
--- /dev/null
+++ b/intern/cycles/blender/util.h
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_UTIL_H__
+#define __BLENDER_UTIL_H__
+
+#include "scene/mesh.h"
+
+#include "util/algorithm.h"
+#include "util/array.h"
+#include "util/map.h"
+#include "util/path.h"
+#include "util/set.h"
+#include "util/transform.h"
+#include "util/types.h"
+#include "util/vector.h"
+
+/* Hacks to hook into Blender API
+ * todo: clean this up ... */
+
+extern "C" {
+void BKE_image_user_frame_calc(void *ima, void *iuser, int cfra);
+void BKE_image_user_file_path(void *iuser, void *ima, char *path);
+unsigned char *BKE_image_get_pixels_for_frame(void *image, int frame, int tile);
+float *BKE_image_get_float_pixels_for_frame(void *image, int frame, int tile);
+}
+
+CCL_NAMESPACE_BEGIN
+
+struct BObjectInfo {
+  /* Object directly provided by the depsgraph iterator. This object is only valid during one
+   * iteration and must not be accessed afterwards. Transforms and visibility should be checked on
+   * this object. */
+  BL::Object iter_object;
+
+  /* This object remains alive even after the object iterator is done. It corresponds to one
+   * original object. It is the object that owns the object data below. */
+  BL::Object real_object;
+
+  /* The object-data referenced by the iter object. This is still valid after the depsgraph
+   * iterator is done. It might have a different type compared to real_object.data(). */
+  BL::ID object_data;
+
+  /* True when the current geometry is the data of the referenced object. False when it is a
+   * geometry instance that does not have a 1-to-1 relationship with an object. */
+  bool is_real_object_data() const
+  {
+    return const_cast<BL::Object &>(real_object).data() == object_data;
+  }
+};
+
+typedef BL::ShaderNodeAttribute::attribute_type_enum BlenderAttributeType;
+BlenderAttributeType blender_attribute_name_split_type(ustring name, string *r_real_name);
+
+void python_thread_state_save(void **python_thread_state);
+void python_thread_state_restore(void **python_thread_state);
+
+static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
+                                      BObjectInfo &b_ob_info,
+                                      BL::Depsgraph & /*depsgraph*/,
+                                      bool /*calc_undeformed*/,
+                                      Mesh::SubdivisionType subdivision_type)
+{
+  /* TODO: make this work with copy-on-write, modifiers are already evaluated. */
+#if 0
+  bool subsurf_mod_show_render = false;
+  bool subsurf_mod_show_viewport = false;
+
+  if (subdivision_type != Mesh::SUBDIVISION_NONE) {
+    BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length() - 1];
+
+    subsurf_mod_show_render = subsurf_mod.show_render();
+    subsurf_mod_show_viewport = subsurf_mod.show_viewport();
+
+    subsurf_mod.show_render(false);
+    subsurf_mod.show_viewport(false);
+  }
+#endif
+
+  BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
+                                                            BL::Mesh(PointerRNA_NULL);
+
+  if (b_ob_info.is_real_object_data()) {
+    if (mesh) {
+      /* Make a copy to split faces if we use autosmooth, otherwise not needed.
+       * Also in edit mode do we need to make a copy, to ensure data layers like
+       * UV are not empty. */
+      if (mesh.is_editmode() ||
+          (mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
+        BL::Depsgraph depsgraph(PointerRNA_NULL);
+        mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
+      }
+    }
+    else {
+      BL::Depsgraph depsgraph(PointerRNA_NULL);
+      mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
+    }
+  }
+  else {
+    /* TODO: what to do about non-mesh geometry instances? */
+  }
+
+#if 0
+  if (subdivision_type != Mesh::SUBDIVISION_NONE) {
+    BL::Modifier subsurf_mod = object.modifiers[object.modifiers.length() - 1];
+
+    subsurf_mod.show_render(subsurf_mod_show_render);
+    subsurf_mod.show_viewport(subsurf_mod_show_viewport);
+  }
+#endif
+
+  if ((bool)mesh && subdivision_type == Mesh::SUBDIVISION_NONE) {
+    if (mesh.use_auto_smooth()) {
+      mesh.split_faces(false);
+    }
+
+    mesh.calc_loop_triangles();
+  }
+
+  return mesh;
+}
+
+static inline void free_object_to_mesh(BL::BlendData & /*data*/,
+                                       BObjectInfo &b_ob_info,
+                                       BL::Mesh &mesh)
+{
+  if (!b_ob_info.is_real_object_data()) {
+    return;
+  }
+  /* Free mesh if we didn't just use the existing one. */
+  BL::Object object = b_ob_info.real_object;
+  if (object.data().ptr.data != mesh.ptr.data) {
+    object.to_mesh_clear();
+  }
+}
+
+static inline void colorramp_to_array(BL::ColorRamp &ramp,
+                                      array<float3> &ramp_color,
+                                      array<float> &ramp_alpha,
+                                      int size)
+{
+  ramp_color.resize(size);
+  ramp_alpha.resize(size);
+
+  for (int i = 0; i < size; i++) {
+    float color[4];
+
+    ramp.evaluate((float)i / (float)(size - 1), color);
+    ramp_color[i] = make_float3(color[0], color[1], color[2]);
+    ramp_alpha[i] = color[3];
+  }
+}
+
+static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *min_x, float *max_x)
+{
+  *min_x = min(*min_x, curve.points[0].location()[0]);
+  *max_x = max(*max_x, curve.points[curve.points.length() - 1].location()[0]);
+}
+
+static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
+                                       int num_curves,
+                                       float *min_x,
+                                       float *max_x)
+{
+  // const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
+  *min_x = FLT_MAX;
+  *max_x = -FLT_MAX;
+  for (int i = 0; i < num_curves; ++i) {
+    BL::CurveMap map(cumap.curves[i]);
+    curvemap_minmax_curve(map, min_x, max_x);
+  }
+}
+
+static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &data, int size)
+{
+  cumap.update();
+  BL::CurveMap curve = cumap.curves[0];
+  data.resize(size);
+  for (int i = 0; i < size; i++) {
+    float t = (float)i / (float)(size - 1);
+    data[i] = cumap.evaluate(curve, t);
+  }
+}
+
+static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
+                                               array<float> &data,
+                                               int size)
+{
+  float min = 0.0f, max = 1.0f;
+
+  curvemapping_minmax(cumap, 1, &min, &max);
+
+  const float range = max - min;
+
+  cumap.update();
+
+  BL::CurveMap map = cumap.curves[0];
+
+  data.resize(size);
+
+  for (int i = 0; i < size; i++) {
+    float t = min + (float)i / (float)(size - 1) * range;
+    data[i] = cumap.evaluate(map, t);
+  }
+}
+
+static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
+                                               array<float3> &data,
+                                               int size,
+                                               bool rgb_curve)
+{
+  float min_x = 0.0f, max_x = 1.0f;
+
+  /* TODO(sergey): There is no easy way to automatically guess what is
+   * the range to be used here for the case when mapping is applied on
+   * top of another mapping (i.e. R curve applied on top of common
+   * one).
+   *
+   * Using largest possible range form all curves works correct for the
+   * cases like vector curves and should be good enough heuristic for
+   * the color curves as well.
+   *
+   * There might be some better estimations here tho.
+   */
+  const int num_curves = rgb_curve ? 4 : 3;
+  curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
+
+  const float range_x = max_x - min_x;
+
+  cumap.update();
+
+  BL::CurveMap mapR = cumap.curves[0];
+  BL::CurveMap mapG = cumap.curves[1];
+  BL::CurveMap mapB = cumap.curves[2];
+
+  data.resize(size);
+
+  if (rgb_curve) {
+    BL::CurveMap mapI = cumap.curves[3];
+    for (int i = 0; i < size; i++) {
+      const float t = min_x + (float)i / (float)(size - 1) * range_x;
+      data[i] = make_float3(cumap.evaluate(mapR, cumap.evaluate(mapI, t)),
+                            cumap.evaluate(mapG, cumap.evaluate(mapI, t)),
+                            cumap.evaluate(mapB, cumap.evaluate(mapI, t)));
+    }
+  }
+  else {
+    for (int i = 0; i < size; i++) {
+      float t = min_x + (float)i / (float)(size - 1) * range_x;
+      data[i] = make_float3(
+          cumap.evaluate(mapR, t), cumap.evaluate(mapG, t), cumap.evaluate(mapB, t));
+    }
+  }
+}
+
+static inline bool BKE_object_is_modified(BL::Object &self, BL::Scene &scene, bool preview)
+{
+  return self.is_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true : false;
+}
+
+static inline bool BKE_object_is_deform_modified(BObjectInfo &self, BL::Scene &scene, bool preview)
+{
+  if (!self.is_real_object_data()) {
+    return false;
+  }
+  return self.real_object.is_deform_modified(scene, (preview) ? (1 << 0) : (1 << 1)) ? true :
+                                                                                       false;
+}
+
+static inline int render_resolution_x(BL::RenderSettings &b_render)
+{
+  return b_render.resolution_x() * b_render.resolution_percentage() / 100;
+}
+
+static inline int render_resolution_y(BL::RenderSettings &b_render)
+{
+  return b_render.resolution_y() * b_render.resolution_percentage() / 100;
+}
+
+static inline string image_user_file_path(BL::ImageUser &iuser,
+                                          BL::Image &ima,
+                                          int cfra,
+                                          bool load_tiled)
+{
+  char filepath[1024];
+  iuser.tile(0);
+  BKE_image_user_frame_calc(ima.ptr.data, iuser.ptr.data, cfra);
+  BKE_image_user_file_path(iuser.ptr.data, ima.ptr.data, filepath);
+
+  string filepath_str = string(filepath);
+  if (load_tiled && ima.source() == BL::Image::source_TILED) {
+    string udim;
+    if (ima.tiles.length() > 0) {
+      udim = to_string(ima.tiles[0].number());
+    }
+    string_replace(filepath_str, udim, "<UDIM>");
+  }
+  return filepath_str;
+}
+
+static inline int image_user_frame_number(BL::ImageUser &iuser, BL::Image &ima, int cfra)
+{
+  BKE_image_user_frame_calc(ima.ptr.data, iuser.ptr.data, cfra);
+  return iuser.frame_current();
+}
+
+static inline unsigned char *image_get_pixels_for_frame(BL::Image &image, int frame, int tile)
+{
+  return BKE_image_get_pixels_for_frame(image.ptr.data, frame, tile);
+}
+
+static inline float *image_get_float_pixels_for_frame(BL::Image &image, int frame, int tile)
+{
+  return BKE_image_get_float_pixels_for_frame(image.ptr.data, frame, tile);
+}
+
+static inline void render_add_metadata(BL::RenderResult &b_rr, string name, string value)
+{
+  b_rr.stamp_data_add_field(name.c_str(), value.c_str());
+}
+
+/* Utilities */
+
+static inline Transform get_transform(const BL::Array<float, 16> &array)
+{
+  ProjectionTransform projection;
+
+  /* We assume both types to be just 16 floats, and transpose because blender
+   * use column major matrix order while we use row major. */
+  memcpy((void *)&projection, &array, sizeof(float) * 16);
+  projection = projection_transpose(projection);
+
+  /* Drop last row, matrix is assumed to be affine transform. */
+  return projection_to_transform(projection);
+}
+
+static inline float2 get_float2(const BL::Array<float, 2> &array)
+{
+  return make_float2(array[0], array[1]);
+}
+
+static inline float3 get_float3(const BL::Array<float, 2> &array)
+{
+  return make_float3(array[0], array[1], 0.0f);
+}
+
+static inline float3 get_float3(const BL::Array<float, 3> &array)
+{
+  return make_float3(array[0], array[1], array[2]);
+}
+
+static inline float3 get_float3(const BL::Array<float, 4> &array)
+{
+  return make_float3(array[0], array[1], array[2]);
+}
+
+static inline float4 get_float4(const BL::Array<float, 4> &array)
+{
+  return make_float4(array[0], array[1], array[2], array[3]);
+}
+
+static inline int3 get_int3(const BL::Array<int, 3> &array)
+{
+  return make_int3(array[0], array[1], array[2]);
+}
+
+static inline int4 get_int4(const BL::Array<int, 4> &array)
+{
+  return make_int4(array[0], array[1], array[2], array[3]);
+}
+
+static inline float3 get_float3(PointerRNA &ptr, const char *name)
+{
+  float3 f;
+  RNA_float_get_array(&ptr, name, &f.x);
+  return f;
+}
+
+static inline void set_float3(PointerRNA &ptr, const char *name, float3 value)
+{
+  RNA_float_set_array(&ptr, name, &value.x);
+}
+
+static inline float4 get_float4(PointerRNA &ptr, const char *name)
+{
+  float4 f;
+  RNA_float_get_array(&ptr, name, &f.x);
+  return f;
+}
+
+static inline void set_float4(PointerRNA &ptr, const char *name, float4 value)
+{
+  RNA_float_set_array(&ptr, name, &value.x);
+}
+
+static inline bool get_boolean(PointerRNA &ptr, const char *name)
+{
+  return RNA_boolean_get(&ptr, name) ? true : false;
+}
+
+static inline void set_boolean(PointerRNA &ptr, const char *name, bool value)
+{
+  RNA_boolean_set(&ptr, name, (int)value);
+}
+
+static inline float get_float(PointerRNA &ptr, const char *name)
+{
+  return RNA_float_get(&ptr, name);
+}
+
+static inline void set_float(PointerRNA &ptr, const char *name, float value)
+{
+  RNA_float_set(&ptr, name, value);
+}
+
+static inline int get_int(PointerRNA &ptr, const char *name)
+{
+  return RNA_int_get(&ptr, name);
+}
+
+static inline void set_int(PointerRNA &ptr, const char *name, int value)
+{
+  RNA_int_set(&ptr, name, value);
+}
+
+/* Get a RNA enum value with sanity check: if the RNA value is above num_values
+ * the function will return a fallback default value.
+ *
+ * NOTE: This function assumes that RNA enum values are a continuous sequence
+ * from 0 to num_values-1. Be careful to use it with enums where some values are
+ * deprecated!
+ */
+static inline int get_enum(PointerRNA &ptr,
+                           const char *name,
+                           int num_values = -1,
+                           int default_value = -1)
+{
+  int value = RNA_enum_get(&ptr, name);
+  if (num_values != -1 && value >= num_values) {
+    assert(default_value != -1);
+    value = default_value;
+  }
+  return value;
+}
+
+static inline string get_enum_identifier(PointerRNA &ptr, const char *name)
+{
+  PropertyRNA *prop = RNA_struct_find_property(&ptr, name);
+  const char *identifier = "";
+  int value = RNA_property_enum_get(&ptr, prop);
+
+  RNA_property_enum_identifier(NULL, &ptr, prop, value, &identifier);
+
+  return string(identifier);
+}
+
+static inline void set_enum(PointerRNA &ptr, const char *name, int value)
+{
+  RNA_enum_set(&ptr, name, value);
+}
+
+static inline void set_enum(PointerRNA &ptr, const char *name, const string &identifier)
+{
+  RNA_enum_set_identifier(NULL, &ptr, name, identifier.c_str());
+}
+
+static inline string get_string(PointerRNA &ptr, const char *name)
+{
+  char cstrbuf[1024];
+  char *cstr = RNA_string_get_alloc(&ptr, name, cstrbuf, sizeof(cstrbuf), NULL);
+  string str(cstr);
+  if (cstr != cstrbuf)
+    MEM_freeN(cstr);
+
+  return str;
+}
+
+static inline void set_string(PointerRNA &ptr, const char *name, const string &value)
+{
+  RNA_string_set(&ptr, name, value.c_str());
+}
+
+/* Relative Paths */
+
+static inline string blender_absolute_path(BL::BlendData &b_data, BL::ID &b_id, const string &path)
+{
+  if (path.size() >= 2 && path[0] == '/' && path[1] == '/') {
+    string dirname;
+
+    if (b_id.library()) {
+      BL::ID b_library_id(b_id.library());
+      dirname = blender_absolute_path(b_data, b_library_id, b_id.library().filepath());
+    }
+    else
+      dirname = b_data.filepath();
+
+    return path_join(path_dirname(dirname), path.substr(2));
+  }
+
+  return path;
+}
+
+static inline string get_text_datablock_content(const PointerRNA &ptr)
+{
+  if (ptr.data == NULL) {
+    return "";
+  }
+
+  string content;
+  BL::Text::lines_iterator iter;
+  for (iter.begin(ptr); iter; ++iter) {
+    content += iter->body() + "\n";
+  }
+
+  return content;
+}
+
+/* Texture Space */
+
+static inline void mesh_texture_space(BL::Mesh &b_mesh, float3 &loc, float3 &size)
+{
+  loc = get_float3(b_mesh.texspace_location());
+  size = get_float3(b_mesh.texspace_size());
+
+  if (size.x != 0.0f)
+    size.x = 0.5f / size.x;
+  if (size.y != 0.0f)
+    size.y = 0.5f / size.y;
+  if (size.z != 0.0f)
+    size.z = 0.5f / size.z;
+
+  loc = loc * size - make_float3(0.5f, 0.5f, 0.5f);
+}
+
+/* Object motion steps, returns 0 if no motion blur needed. */
+static inline uint object_motion_steps(BL::Object &b_parent,
+                                       BL::Object &b_ob,
+                                       const int max_steps = INT_MAX)
+{
+  /* Get motion enabled and steps from object itself. */
+  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+  bool use_motion = get_boolean(cobject, "use_motion_blur");
+  if (!use_motion) {
+    return 0;
+  }
+
+  int steps = max(1, get_int(cobject, "motion_steps"));
+
+  /* Also check parent object, so motion blur and steps can be
+   * controlled by dupligroup duplicator for linked groups. */
+  if (b_parent.ptr.data != b_ob.ptr.data) {
+    PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
+    use_motion &= get_boolean(parent_cobject, "use_motion_blur");
+
+    if (!use_motion) {
+      return 0;
+    }
+
+    steps = max(steps, get_int(parent_cobject, "motion_steps"));
+  }
+
+  /* Use uneven number of steps so we get one keyframe at the current frame,
+   * and use 2^(steps - 1) so objects with more/fewer steps still have samples
+   * at the same times, to avoid sampling at many different times. */
+  return min((2 << (steps - 1)) + 1, max_steps);
+}
+
+/* object uses deformation motion blur */
+static inline bool object_use_deform_motion(BL::Object &b_parent, BL::Object &b_ob)
+{
+  PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+  bool use_deform_motion = get_boolean(cobject, "use_deform_motion");
+  /* If motion blur is enabled for the object we also check
+   * whether it's enabled for the parent object as well.
+   *
+   * This way we can control motion blur from the dupligroup
+   * duplicator much easier.
+   */
+  if (use_deform_motion && b_parent.ptr.data != b_ob.ptr.data) {
+    PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
+    use_deform_motion &= get_boolean(parent_cobject, "use_deform_motion");
+  }
+  return use_deform_motion;
+}
+
+static inline BL::FluidDomainSettings object_fluid_gas_domain_find(BL::Object &b_ob)
+{
+  for (BL::Modifier &b_mod : b_ob.modifiers) {
+    if (b_mod.is_a(&RNA_FluidModifier)) {
+      BL::FluidModifier b_mmd(b_mod);
+
+      if (b_mmd.fluid_type() == BL::FluidModifier::fluid_type_DOMAIN &&
+          b_mmd.domain_settings().domain_type() == BL::FluidDomainSettings::domain_type_GAS) {
+        return b_mmd.domain_settings();
+      }
+    }
+  }
+
+  return BL::FluidDomainSettings(PointerRNA_NULL);
+}
+
+static inline BL::MeshSequenceCacheModifier object_mesh_cache_find(BL::Object &b_ob,
+                                                                   bool *has_subdivision_modifier)
+{
+  for (int i = b_ob.modifiers.length() - 1; i >= 0; --i) {
+    BL::Modifier b_mod = b_ob.modifiers[i];
+
+    if (b_mod.type() == BL::Modifier::type_MESH_SEQUENCE_CACHE) {
+      BL::MeshSequenceCacheModifier mesh_cache = BL::MeshSequenceCacheModifier(b_mod);
+      return mesh_cache;
+    }
+
+    /* Skip possible particles system modifiers as they do not modify the geometry. */
+    if (b_mod.type() == BL::Modifier::type_PARTICLE_SYSTEM) {
+      continue;
+    }
+
+    if (b_mod.type() == BL::Modifier::type_SUBSURF) {
+      if (has_subdivision_modifier) {
+        *has_subdivision_modifier = true;
+      }
+      continue;
+    }
+
+    break;
+  }
+
+  return BL::MeshSequenceCacheModifier(PointerRNA_NULL);
+}
+
+static inline Mesh::SubdivisionType object_subdivision_type(BL::Object &b_ob,
+                                                            bool preview,
+                                                            bool experimental)
+{
+  PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
+
+  if (cobj.data && b_ob.modifiers.length() > 0 && experimental) {
+    BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length() - 1];
+    bool enabled = preview ? mod.show_viewport() : mod.show_render();
+
+    if (enabled && mod.type() == BL::Modifier::type_SUBSURF &&
+        RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
+      BL::SubsurfModifier subsurf(mod);
+
+      if (subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) {
+        return Mesh::SUBDIVISION_CATMULL_CLARK;
+      }
+      else {
+        return Mesh::SUBDIVISION_LINEAR;
+      }
+    }
+  }
+
+  return Mesh::SUBDIVISION_NONE;
+}
+
+static inline uint object_ray_visibility(BL::Object &b_ob)
+{
+  uint flag = 0;
+
+  flag |= b_ob.visible_camera() ? PATH_RAY_CAMERA : 0;
+  flag |= b_ob.visible_diffuse() ? PATH_RAY_DIFFUSE : 0;
+  flag |= b_ob.visible_glossy() ? PATH_RAY_GLOSSY : 0;
+  flag |= b_ob.visible_transmission() ? PATH_RAY_TRANSMIT : 0;
+  flag |= b_ob.visible_shadow() ? PATH_RAY_SHADOW : 0;
+  flag |= b_ob.visible_volume_scatter() ? PATH_RAY_VOLUME_SCATTER : 0;
+
+  return flag;
+}
+
+class EdgeMap {
+ public:
+  EdgeMap()
+  {
+  }
+
+  void clear()
+  {
+    edges_.clear();
+  }
+
+  void insert(int v0, int v1)
+  {
+    get_sorted_verts(v0, v1);
+    edges_.insert(std::pair<int, int>(v0, v1));
+  }
+
+  bool exists(int v0, int v1)
+  {
+    get_sorted_verts(v0, v1);
+    return edges_.find(std::pair<int, int>(v0, v1)) != edges_.end();
+  }
+
+ protected:
+  void get_sorted_verts(int &v0, int &v1)
+  {
+    if (v0 > v1) {
+      swap(v0, v1);
+    }
+  }
+
+  set<std::pair<int, int>> edges_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_UTIL_H__ */
diff --git a/intern/cycles/blender/viewport.cpp b/intern/cycles/blender/viewport.cpp
new file mode 100644
index 00000000000..2a6f7e3ecee
--- /dev/null
+++ b/intern/cycles/blender/viewport.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "blender/viewport.h"
+#include "blender/util.h"
+
+#include "scene/pass.h"
+
+#include "util/log.h"
+
+CCL_NAMESPACE_BEGIN
+
+BlenderViewportParameters::BlenderViewportParameters()
+    : use_scene_world(true),
+      use_scene_lights(true),
+      studiolight_rotate_z(0.0f),
+      studiolight_intensity(1.0f),
+      studiolight_background_alpha(1.0f),
+      display_pass(PASS_COMBINED),
+      show_active_pixels(false)
+{
+}
+
+BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui)
+    : BlenderViewportParameters()
+{
+  if (!b_v3d) {
+    return;
+  }
+
+  BL::View3DShading shading = b_v3d.shading();
+  PointerRNA cshading = RNA_pointer_get(&shading.ptr, "cycles");
+
+  /* We only copy the shading parameters if we are in look-dev mode.
+   * Otherwise defaults are being used. These defaults mimic normal render settings. */
+  if (shading.type() == BL::View3DShading::type_RENDERED) {
+    use_scene_world = shading.use_scene_world_render();
+    use_scene_lights = shading.use_scene_lights_render();
+
+    if (!use_scene_world) {
+      studiolight_rotate_z = shading.studiolight_rotate_z();
+      studiolight_intensity = shading.studiolight_intensity();
+      studiolight_background_alpha = shading.studiolight_background_alpha();
+      studiolight_path = shading.selected_studio_light().path();
+    }
+  }
+
+  /* Film. */
+
+  /* Lookup display pass based on the enum identifier.
+   * This is because integer values of python enum are not aligned with the passes definition in
+   * the kernel. */
+
+  display_pass = PASS_COMBINED;
+
+  const string display_pass_identifier = get_enum_identifier(cshading, "render_pass");
+  if (!display_pass_identifier.empty()) {
+    const ustring pass_type_identifier(string_to_lower(display_pass_identifier));
+    const NodeEnum *pass_type_enum = Pass::get_type_enum();
+    if (pass_type_enum->exists(pass_type_identifier)) {
+      display_pass = static_cast<PassType>((*pass_type_enum)[pass_type_identifier]);
+    }
+  }
+
+  if (use_developer_ui) {
+    show_active_pixels = get_boolean(cshading, "show_active_pixels");
+  }
+}
+
+bool BlenderViewportParameters::shader_modified(const BlenderViewportParameters &other) const
+{
+  return use_scene_world != other.use_scene_world || use_scene_lights != other.use_scene_lights ||
+         studiolight_rotate_z != other.studiolight_rotate_z ||
+         studiolight_intensity != other.studiolight_intensity ||
+         studiolight_background_alpha != other.studiolight_background_alpha ||
+         studiolight_path != other.studiolight_path;
+}
+
+bool BlenderViewportParameters::film_modified(const BlenderViewportParameters &other) const
+{
+  return display_pass != other.display_pass || show_active_pixels != other.show_active_pixels;
+}
+
+bool BlenderViewportParameters::modified(const BlenderViewportParameters &other) const
+{
+  return shader_modified(other) || film_modified(other);
+}
+
+bool BlenderViewportParameters::use_custom_shader() const
+{
+  return !(use_scene_world && use_scene_lights);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/viewport.h b/intern/cycles/blender/viewport.h
new file mode 100644
index 00000000000..a445973f4d2
--- /dev/null
+++ b/intern/cycles/blender/viewport.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_VIEWPORT_H__
+#define __BLENDER_VIEWPORT_H__
+
+#include "MEM_guardedalloc.h"
+
+#include "RNA_access.h"
+#include "RNA_blender_cpp.h"
+#include "RNA_types.h"
+
+#include "scene/film.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BlenderViewportParameters {
+ public:
+  /* Shader. */
+  bool use_scene_world;
+  bool use_scene_lights;
+  float studiolight_rotate_z;
+  float studiolight_intensity;
+  float studiolight_background_alpha;
+  ustring studiolight_path;
+
+  /* Film. */
+  PassType display_pass;
+  bool show_active_pixels;
+
+  BlenderViewportParameters();
+  BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui);
+
+  /* Check whether any of shading related settings are different from the given parameters. */
+  bool shader_modified(const BlenderViewportParameters &other) const;
+
+  /* Check whether any of film related settings are different from the given parameters. */
+  bool film_modified(const BlenderViewportParameters &other) const;
+
+  /* Check whether any of settings are different from the given parameters. */
+  bool modified(const BlenderViewportParameters &other) const;
+
+  /* Returns truth when a custom shader defined by the viewport is to be used instead of the
+   * regular background shader or scene light. */
+  bool use_custom_shader() const;
+};
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/blender/volume.cpp b/intern/cycles/blender/volume.cpp
new file mode 100644
index 00000000000..a41e15621a7
--- /dev/null
+++ b/intern/cycles/blender/volume.cpp
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/volume.h"
+#include "scene/colorspace.h"
+#include "scene/image.h"
+#include "scene/image_vdb.h"
+#include "scene/object.h"
+
+#include "blender/sync.h"
+#include "blender/util.h"
+
+#ifdef WITH_OPENVDB
+#  include <openvdb/openvdb.h>
+openvdb::GridBase::ConstPtr BKE_volume_grid_openvdb_for_read(const struct Volume *volume,
+                                                             const struct VolumeGrid *grid);
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO: verify this is not loading unnecessary attributes. */
+class BlenderSmokeLoader : public ImageLoader {
+ public:
+  BlenderSmokeLoader(BL::Object &b_ob, AttributeStandard attribute)
+      : b_domain(object_fluid_gas_domain_find(b_ob)), attribute(attribute)
+  {
+    BL::Mesh b_mesh(b_ob.data());
+    mesh_texture_space(b_mesh, texspace_loc, texspace_size);
+  }
+
+  bool load_metadata(const ImageDeviceFeatures &, ImageMetaData &metadata) override
+  {
+    if (!b_domain) {
+      return false;
+    }
+
+    if (attribute == ATTR_STD_VOLUME_DENSITY || attribute == ATTR_STD_VOLUME_FLAME ||
+        attribute == ATTR_STD_VOLUME_HEAT || attribute == ATTR_STD_VOLUME_TEMPERATURE) {
+      metadata.type = IMAGE_DATA_TYPE_FLOAT;
+      metadata.channels = 1;
+    }
+    else if (attribute == ATTR_STD_VOLUME_COLOR) {
+      metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+      metadata.channels = 4;
+    }
+    else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
+      metadata.type = IMAGE_DATA_TYPE_FLOAT4;
+      metadata.channels = 3;
+    }
+    else {
+      return false;
+    }
+
+    int3 resolution = get_int3(b_domain.domain_resolution());
+    int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+    /* Velocity and heat data is always low-resolution. */
+    if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
+      amplify = 1;
+    }
+
+    metadata.width = resolution.x * amplify;
+    metadata.height = resolution.y * amplify;
+    metadata.depth = resolution.z * amplify;
+
+    /* Create a matrix to transform from object space to mesh texture space.
+     * This does not work with deformations but that can probably only be done
+     * well with a volume grid mapping of coordinates. */
+    metadata.transform_3d = transform_translate(-texspace_loc) * transform_scale(texspace_size);
+    metadata.use_transform_3d = true;
+
+    return true;
+  }
+
+  bool load_pixels(const ImageMetaData &, void *pixels, const size_t, const bool) override
+  {
+    if (!b_domain) {
+      return false;
+    }
+#ifdef WITH_FLUID
+    int3 resolution = get_int3(b_domain.domain_resolution());
+    int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+    /* Velocity and heat data is always low-resolution. */
+    if (attribute == ATTR_STD_VOLUME_VELOCITY || attribute == ATTR_STD_VOLUME_HEAT) {
+      amplify = 1;
+    }
+
+    const int width = resolution.x * amplify;
+    const int height = resolution.y * amplify;
+    const int depth = resolution.z * amplify;
+    const size_t num_pixels = ((size_t)width) * height * depth;
+
+    float *fpixels = (float *)pixels;
+
+    if (attribute == ATTR_STD_VOLUME_DENSITY) {
+      FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_density_grid_get(&b_domain.ptr, fpixels);
+        return true;
+      }
+    }
+    else if (attribute == ATTR_STD_VOLUME_FLAME) {
+      /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
+       * as 1500..3000 K with the first part faded to zero density */
+      FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_flame_grid_get(&b_domain.ptr, fpixels);
+        return true;
+      }
+    }
+    else if (attribute == ATTR_STD_VOLUME_COLOR) {
+      /* the RGB is "premultiplied" by density for better interpolation results */
+      FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels * 4) {
+        FluidDomainSettings_color_grid_get(&b_domain.ptr, fpixels);
+        return true;
+      }
+    }
+    else if (attribute == ATTR_STD_VOLUME_VELOCITY) {
+      FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels * 3) {
+        FluidDomainSettings_velocity_grid_get(&b_domain.ptr, fpixels);
+        return true;
+      }
+    }
+    else if (attribute == ATTR_STD_VOLUME_HEAT) {
+      FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_heat_grid_get(&b_domain.ptr, fpixels);
+        return true;
+      }
+    }
+    else if (attribute == ATTR_STD_VOLUME_TEMPERATURE) {
+      FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_temperature_grid_get(&b_domain.ptr, fpixels);
+        return true;
+      }
+    }
+    else {
+      fprintf(stderr,
+              "Cycles error: unknown volume attribute %s, skipping\n",
+              Attribute::standard_name(attribute));
+      fpixels[0] = 0.0f;
+      return false;
+    }
+#else
+    (void)pixels;
+#endif
+    fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
+    return false;
+  }
+
+  string name() const override
+  {
+    return Attribute::standard_name(attribute);
+  }
+
+  bool equals(const ImageLoader &other) const override
+  {
+    const BlenderSmokeLoader &other_loader = (const BlenderSmokeLoader &)other;
+    return b_domain == other_loader.b_domain && attribute == other_loader.attribute;
+  }
+
+  BL::FluidDomainSettings b_domain;
+  float3 texspace_loc, texspace_size;
+  AttributeStandard attribute;
+};
+
+static void sync_smoke_volume(Scene *scene, BObjectInfo &b_ob_info, Volume *volume, float frame)
+{
+  if (!b_ob_info.is_real_object_data()) {
+    return;
+  }
+  BL::FluidDomainSettings b_domain = object_fluid_gas_domain_find(b_ob_info.real_object);
+  if (!b_domain) {
+    return;
+  }
+
+  AttributeStandard attributes[] = {ATTR_STD_VOLUME_DENSITY,
+                                    ATTR_STD_VOLUME_COLOR,
+                                    ATTR_STD_VOLUME_FLAME,
+                                    ATTR_STD_VOLUME_HEAT,
+                                    ATTR_STD_VOLUME_TEMPERATURE,
+                                    ATTR_STD_VOLUME_VELOCITY,
+                                    ATTR_STD_NONE};
+
+  for (int i = 0; attributes[i] != ATTR_STD_NONE; i++) {
+    AttributeStandard std = attributes[i];
+    if (!volume->need_attribute(scene, std)) {
+      continue;
+    }
+
+    volume->set_clipping(b_domain.clipping());
+
+    Attribute *attr = volume->attributes.add(std);
+
+    ImageLoader *loader = new BlenderSmokeLoader(b_ob_info.real_object, std);
+    ImageParams params;
+    params.frame = frame;
+
+    attr->data_voxel() = scene->image_manager->add_image(loader, params);
+  }
+}
+
+class BlenderVolumeLoader : public VDBImageLoader {
+ public:
+  BlenderVolumeLoader(BL::BlendData &b_data, BL::Volume &b_volume, const string &grid_name)
+      : VDBImageLoader(grid_name), b_volume(b_volume)
+  {
+    b_volume.grids.load(b_data.ptr.data);
+
+#ifdef WITH_OPENVDB
+    for (BL::VolumeGrid &b_volume_grid : b_volume.grids) {
+      if (b_volume_grid.name() == grid_name) {
+        const bool unload = !b_volume_grid.is_loaded();
+
+        ::Volume *volume = (::Volume *)b_volume.ptr.data;
+        const VolumeGrid *volume_grid = (VolumeGrid *)b_volume_grid.ptr.data;
+        grid = BKE_volume_grid_openvdb_for_read(volume, volume_grid);
+
+        if (unload) {
+          b_volume_grid.unload();
+        }
+
+        break;
+      }
+    }
+#endif
+  }
+
+  BL::Volume b_volume;
+};
+
+static void sync_volume_object(BL::BlendData &b_data,
+                               BObjectInfo &b_ob_info,
+                               Scene *scene,
+                               Volume *volume)
+{
+  BL::Volume b_volume(b_ob_info.object_data);
+  b_volume.grids.load(b_data.ptr.data);
+
+  BL::VolumeRender b_render(b_volume.render());
+
+  volume->set_clipping(b_render.clipping());
+  volume->set_step_size(b_render.step_size());
+  volume->set_object_space((b_render.space() == BL::VolumeRender::space_OBJECT));
+
+  /* Find grid with matching name. */
+  for (BL::VolumeGrid &b_grid : b_volume.grids) {
+    ustring name = ustring(b_grid.name());
+    AttributeStandard std = ATTR_STD_NONE;
+
+    if (name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
+      std = ATTR_STD_VOLUME_DENSITY;
+    }
+    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
+      std = ATTR_STD_VOLUME_COLOR;
+    }
+    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
+      std = ATTR_STD_VOLUME_FLAME;
+    }
+    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+      std = ATTR_STD_VOLUME_HEAT;
+    }
+    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
+      std = ATTR_STD_VOLUME_TEMPERATURE;
+    }
+    else if (name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
+      std = ATTR_STD_VOLUME_VELOCITY;
+    }
+
+    if ((std != ATTR_STD_NONE && volume->need_attribute(scene, std)) ||
+        volume->need_attribute(scene, name)) {
+      Attribute *attr = (std != ATTR_STD_NONE) ?
+                            volume->attributes.add(std) :
+                            volume->attributes.add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
+
+      ImageLoader *loader = new BlenderVolumeLoader(b_data, b_volume, name.string());
+      ImageParams params;
+      params.frame = b_volume.grids.frame();
+
+      attr->data_voxel() = scene->image_manager->add_image(loader, params, false);
+    }
+  }
+}
+
+void BlenderSync::sync_volume(BObjectInfo &b_ob_info, Volume *volume)
+{
+  volume->clear(true);
+
+  if (view_layer.use_volumes) {
+    if (b_ob_info.object_data.is_a(&RNA_Volume)) {
+      /* Volume object. Create only attributes, bounding mesh will then
+       * be automatically generated later. */
+      sync_volume_object(b_data, b_ob_info, scene, volume);
+    }
+    else {
+      /* Smoke domain. */
+      sync_smoke_volume(scene, b_ob_info, volume, b_scene.frame_current());
+    }
+  }
+
+  /* Tag update. */
+  volume->tag_update(scene, true);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index f7e47a8764d..9edc30cf9c4 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -22,30 +22,30 @@ set(INC_SYS
 set(SRC
   bvh.cpp
   bvh2.cpp
-  bvh_binning.cpp
-  bvh_build.cpp
-  bvh_embree.cpp
-  bvh_multi.cpp
-  bvh_node.cpp
-  bvh_optix.cpp
-  bvh_sort.cpp
-  bvh_split.cpp
-  bvh_unaligned.cpp
+  binning.cpp
+  build.cpp
+  embree.cpp
+  multi.cpp
+  node.cpp
+  optix.cpp
+  sort.cpp
+  split.cpp
+  unaligned.cpp
 )
 
 set(SRC_HEADERS
   bvh.h
   bvh2.h
-  bvh_binning.h
-  bvh_build.h
-  bvh_embree.h
-  bvh_multi.h
-  bvh_node.h
-  bvh_optix.h
-  bvh_params.h
-  bvh_sort.h
-  bvh_split.h
-  bvh_unaligned.h
+  binning.h
+  build.h
+  embree.h
+  multi.h
+  node.h
+  optix.h
+  params.h
+  sort.h
+  split.h
+  unaligned.h
 )
 
 set(LIB
diff --git a/intern/cycles/bvh/binning.cpp b/intern/cycles/bvh/binning.cpp
new file mode 100644
index 00000000000..da591ef5cea
--- /dev/null
+++ b/intern/cycles/bvh/binning.cpp
@@ -0,0 +1,293 @@
+/*
+ * Adapted from code copyright 2009-2011 Intel Corporation
+ * Modifications Copyright 2012, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define __KERNEL_SSE__
+
+#include "bvh/binning.h"
+
+#include <stdlib.h>
+
+#include "util/algorithm.h"
+#include "util/boundbox.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* SSE replacements */
+
+__forceinline void prefetch_L1(const void * /*ptr*/)
+{
+}
+__forceinline void prefetch_L2(const void * /*ptr*/)
+{
+}
+__forceinline void prefetch_L3(const void * /*ptr*/)
+{
+}
+__forceinline void prefetch_NTA(const void * /*ptr*/)
+{
+}
+
+template<size_t src> __forceinline float extract(const int4 &b)
+{
+  return b[src];
+}
+template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
+{
+  float4 r = a;
+  r[dst] = b;
+  return r;
+}
+
+__forceinline int get_best_dimension(const float4 &bestSAH)
+{
+  // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
+
+  float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
+
+  if (bestSAH.x == minSAH)
+    return 0;
+  else if (bestSAH.y == minSAH)
+    return 1;
+  else
+    return 2;
+}
+
+/* BVH Object Binning */
+
+BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
+                                   BVHReference *prims,
+                                   const BVHUnaligned *unaligned_heuristic,
+                                   const Transform *aligned_space)
+    : BVHRange(job),
+      splitSAH(FLT_MAX),
+      dim(0),
+      pos(0),
+      unaligned_heuristic_(unaligned_heuristic),
+      aligned_space_(aligned_space)
+{
+  if (aligned_space_ == NULL) {
+    bounds_ = bounds();
+    cent_bounds_ = cent_bounds();
+  }
+  else {
+    /* TODO(sergey): With some additional storage we can avoid
+     * need in re-calculating this.
+     */
+    bounds_ = unaligned_heuristic->compute_aligned_boundbox(
+        *this, prims, *aligned_space, &cent_bounds_);
+  }
+
+  /* compute number of bins to use and precompute scaling factor for binning */
+  num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
+  scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
+
+  /* initialize binning counter and bounds */
+  BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
+  int4 bin_count[MAX_BINS];         /* number of primitives mapped to bin */
+
+  for (size_t i = 0; i < num_bins; i++) {
+    bin_count[i] = make_int4(0);
+    bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
+  }
+
+  /* map geometry to bins, unrolled once */
+  {
+    int64_t i;
+
+    for (i = 0; i < int64_t(size()) - 1; i += 2) {
+      prefetch_L2(&prims[start() + i + 8]);
+
+      /* map even and odd primitive to bin */
+      const BVHReference &prim0 = prims[start() + i + 0];
+      const BVHReference &prim1 = prims[start() + i + 1];
+
+      BoundBox bounds0 = get_prim_bounds(prim0);
+      BoundBox bounds1 = get_prim_bounds(prim1);
+
+      int4 bin0 = get_bin(bounds0);
+      int4 bin1 = get_bin(bounds1);
+
+      /* increase bounds for bins for even primitive */
+      int b00 = (int)extract<0>(bin0);
+      bin_count[b00][0]++;
+      bin_bounds[b00][0].grow(bounds0);
+      int b01 = (int)extract<1>(bin0);
+      bin_count[b01][1]++;
+      bin_bounds[b01][1].grow(bounds0);
+      int b02 = (int)extract<2>(bin0);
+      bin_count[b02][2]++;
+      bin_bounds[b02][2].grow(bounds0);
+
+      /* increase bounds of bins for odd primitive */
+      int b10 = (int)extract<0>(bin1);
+      bin_count[b10][0]++;
+      bin_bounds[b10][0].grow(bounds1);
+      int b11 = (int)extract<1>(bin1);
+      bin_count[b11][1]++;
+      bin_bounds[b11][1].grow(bounds1);
+      int b12 = (int)extract<2>(bin1);
+      bin_count[b12][2]++;
+      bin_bounds[b12][2].grow(bounds1);
+    }
+
+    /* for uneven number of primitives */
+    if (i < int64_t(size())) {
+      /* map primitive to bin */
+      const BVHReference &prim0 = prims[start() + i];
+      BoundBox bounds0 = get_prim_bounds(prim0);
+      int4 bin0 = get_bin(bounds0);
+
+      /* increase bounds of bins */
+      int b00 = (int)extract<0>(bin0);
+      bin_count[b00][0]++;
+      bin_bounds[b00][0].grow(bounds0);
+      int b01 = (int)extract<1>(bin0);
+      bin_count[b01][1]++;
+      bin_bounds[b01][1].grow(bounds0);
+      int b02 = (int)extract<2>(bin0);
+      bin_count[b02][2]++;
+      bin_bounds[b02][2].grow(bounds0);
+    }
+  }
+
+  /* sweep from right to left and compute parallel prefix of merged bounds */
+  float4 r_area[MAX_BINS];  /* area of bounds of primitives on the right */
+  float4 r_count[MAX_BINS]; /* number of primitives on the right */
+  int4 count = make_int4(0);
+
+  BoundBox bx = BoundBox::empty;
+  BoundBox by = BoundBox::empty;
+  BoundBox bz = BoundBox::empty;
+
+  for (size_t i = num_bins - 1; i > 0; i--) {
+    count = count + bin_count[i];
+    r_count[i] = blocks(count);
+
+    bx = merge(bx, bin_bounds[i][0]);
+    r_area[i][0] = bx.half_area();
+    by = merge(by, bin_bounds[i][1]);
+    r_area[i][1] = by.half_area();
+    bz = merge(bz, bin_bounds[i][2]);
+    r_area[i][2] = bz.half_area();
+    r_area[i][3] = r_area[i][2];
+  }
+
+  /* sweep from left to right and compute SAH */
+  int4 ii = make_int4(1);
+  float4 bestSAH = make_float4(FLT_MAX);
+  int4 bestSplit = make_int4(-1);
+
+  count = make_int4(0);
+
+  bx = BoundBox::empty;
+  by = BoundBox::empty;
+  bz = BoundBox::empty;
+
+  for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
+    count = count + bin_count[i - 1];
+
+    bx = merge(bx, bin_bounds[i - 1][0]);
+    float Ax = bx.half_area();
+    by = merge(by, bin_bounds[i - 1][1]);
+    float Ay = by.half_area();
+    bz = merge(bz, bin_bounds[i - 1][2]);
+    float Az = bz.half_area();
+
+    float4 lCount = blocks(count);
+    float4 lArea = make_float4(Ax, Ay, Az, Az);
+    float4 sah = lArea * lCount + r_area[i] * r_count[i];
+
+    bestSplit = select(sah < bestSAH, ii, bestSplit);
+    bestSAH = min(sah, bestSAH);
+  }
+
+  int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
+  bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
+
+  /* find best dimension */
+  dim = get_best_dimension(bestSAH);
+  splitSAH = bestSAH[dim];
+  pos = bestSplit[dim];
+  leafSAH = bounds_.half_area() * blocks(size());
+}
+
+void BVHObjectBinning::split(BVHReference *prims,
+                             BVHObjectBinning &left_o,
+                             BVHObjectBinning &right_o) const
+{
+  size_t N = size();
+
+  BoundBox lgeom_bounds = BoundBox::empty;
+  BoundBox rgeom_bounds = BoundBox::empty;
+  BoundBox lcent_bounds = BoundBox::empty;
+  BoundBox rcent_bounds = BoundBox::empty;
+
+  int64_t l = 0, r = N - 1;
+
+  while (l <= r) {
+    prefetch_L2(&prims[start() + l + 8]);
+    prefetch_L2(&prims[start() + r - 8]);
+
+    BVHReference prim = prims[start() + l];
+    BoundBox unaligned_bounds = get_prim_bounds(prim);
+    float3 unaligned_center = unaligned_bounds.center2();
+    float3 center = prim.bounds().center2();
+
+    if (get_bin(unaligned_center)[dim] < pos) {
+      lgeom_bounds.grow(prim.bounds());
+      lcent_bounds.grow(center);
+      l++;
+    }
+    else {
+      rgeom_bounds.grow(prim.bounds());
+      rcent_bounds.grow(center);
+      swap(prims[start() + l], prims[start() + r]);
+      r--;
+    }
+  }
+  /* finish */
+  if (l != 0 && N - 1 - r != 0) {
+    right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
+                               prims);
+    left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
+    return;
+  }
+
+  /* object medium split if we did not make progress, can happen when all
+   * primitives have same centroid */
+  lgeom_bounds = BoundBox::empty;
+  rgeom_bounds = BoundBox::empty;
+  lcent_bounds = BoundBox::empty;
+  rcent_bounds = BoundBox::empty;
+
+  for (size_t i = 0; i < N / 2; i++) {
+    lgeom_bounds.grow(prims[start() + i].bounds());
+    lcent_bounds.grow(prims[start() + i].bounds().center2());
+  }
+
+  for (size_t i = N / 2; i < N; i++) {
+    rgeom_bounds.grow(prims[start() + i].bounds());
+    rcent_bounds.grow(prims[start() + i].bounds().center2());
+  }
+
+  right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
+                             prims);
+  left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/binning.h b/intern/cycles/bvh/binning.h
new file mode 100644
index 00000000000..876500ec540
--- /dev/null
+++ b/intern/cycles/bvh/binning.h
@@ -0,0 +1,115 @@
+/*
+ * Adapted from code copyright 2009-2011 Intel Corporation
+ * Modifications Copyright 2012, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_BINNING_H__
+#define __BVH_BINNING_H__
+
+#include "bvh/params.h"
+#include "bvh/unaligned.h"
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHBuild;
+
+/* Single threaded object binner. Finds the split with the best SAH heuristic
+ * by testing for each dimension multiple partitionings for regular spaced
+ * partition locations. A partitioning for a partition location is computed,
+ * by putting primitives whose centroid is on the left and right of the split
+ * location to different sets. The SAH is evaluated by computing the number of
+ * blocks occupied by the primitives in the partitions. */
+
+class BVHObjectBinning : public BVHRange {
+ public:
+  __forceinline BVHObjectBinning() : leafSAH(FLT_MAX)
+  {
+  }
+
+  BVHObjectBinning(const BVHRange &job,
+                   BVHReference *prims,
+                   const BVHUnaligned *unaligned_heuristic = NULL,
+                   const Transform *aligned_space = NULL);
+
+  void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const;
+
+  __forceinline const BoundBox &unaligned_bounds()
+  {
+    return bounds_;
+  }
+
+  float splitSAH; /* SAH cost of the best split */
+  float leafSAH;  /* SAH cost of creating a leaf */
+
+ protected:
+  int dim;         /* best split dimension */
+  int pos;         /* best split position */
+  size_t num_bins; /* actual number of bins to use */
+  float3 scale;    /* scaling factor to compute bin */
+
+  /* Effective bounds and centroid bounds. */
+  BoundBox bounds_;
+  BoundBox cent_bounds_;
+
+  const BVHUnaligned *unaligned_heuristic_;
+  const Transform *aligned_space_;
+
+  enum { MAX_BINS = 32 };
+  enum { LOG_BLOCK_SIZE = 2 };
+
+  /* computes the bin numbers for each dimension for a box. */
+  __forceinline int4 get_bin(const BoundBox &box) const
+  {
+    int4 a = make_int4((box.center2() - cent_bounds_.min) * scale - make_float3(0.5f));
+    int4 mn = make_int4(0);
+    int4 mx = make_int4((int)num_bins - 1);
+
+    return clamp(a, mn, mx);
+  }
+
+  /* computes the bin numbers for each dimension for a point. */
+  __forceinline int4 get_bin(const float3 &c) const
+  {
+    return make_int4((c - cent_bounds_.min) * scale - make_float3(0.5f));
+  }
+
+  /* compute the number of blocks occupied for each dimension. */
+  __forceinline float4 blocks(const int4 &a) const
+  {
+    return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
+  }
+
+  /* compute the number of blocks occupied in one dimension. */
+  __forceinline int blocks(size_t a) const
+  {
+    return (int)((a + ((1LL << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
+  }
+
+  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
+  {
+    if (aligned_space_ == NULL) {
+      return prim.bounds();
+    }
+    else {
+      return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
+    }
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_BINNING_H__ */
diff --git a/intern/cycles/bvh/build.cpp b/intern/cycles/bvh/build.cpp
new file mode 100644
index 00000000000..3ce268dfb25
--- /dev/null
+++ b/intern/cycles/bvh/build.cpp
@@ -0,0 +1,1144 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/build.h"
+
+#include "bvh/binning.h"
+#include "bvh/node.h"
+#include "bvh/params.h"
+#include "bvh/split.h"
+
+#include "scene/curves.h"
+#include "scene/hair.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/scene.h"
+
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/queue.h"
+#include "util/simd.h"
+#include "util/stack_allocator.h"
+#include "util/time.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Constructor / Destructor */
+
+BVHBuild::BVHBuild(const vector<Object *> &objects_,
+                   array<int> &prim_type_,
+                   array<int> &prim_index_,
+                   array<int> &prim_object_,
+                   array<float2> &prim_time_,
+                   const BVHParams &params_,
+                   Progress &progress_)
+    : objects(objects_),
+      prim_type(prim_type_),
+      prim_index(prim_index_),
+      prim_object(prim_object_),
+      prim_time(prim_time_),
+      params(params_),
+      progress(progress_),
+      progress_start_time(0.0),
+      unaligned_heuristic(objects_)
+{
+  spatial_min_overlap = 0.0f;
+}
+
+BVHBuild::~BVHBuild()
+{
+}
+
+/* Adding References */
+
+void BVHBuild::add_reference_triangles(BoundBox &root,
+                                       BoundBox &center,
+                                       Mesh *mesh,
+                                       int object_index)
+{
+  const PrimitiveType primitive_type = mesh->primitive_type();
+  const Attribute *attr_mP = NULL;
+  if (mesh->has_motion_blur()) {
+    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+  const size_t num_triangles = mesh->num_triangles();
+  for (uint j = 0; j < num_triangles; j++) {
+    Mesh::Triangle t = mesh->get_triangle(j);
+    const float3 *verts = &mesh->verts[0];
+    if (attr_mP == NULL) {
+      BoundBox bounds = BoundBox::empty;
+      t.bounds_grow(verts, bounds);
+      if (bounds.valid() && t.valid(verts)) {
+        references.push_back(BVHReference(bounds, j, object_index, primitive_type));
+        root.grow(bounds);
+        center.grow(bounds.center2());
+      }
+    }
+    else if (params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
+      /* Motion triangles, simple case: single node for the whole
+       * primitive. Lowest memory footprint and faster BVH build but
+       * least optimal ray-tracing.
+       */
+      /* TODO(sergey): Support motion steps for spatially split BVH. */
+      const size_t num_verts = mesh->verts.size();
+      const size_t num_steps = mesh->motion_steps;
+      const float3 *vert_steps = attr_mP->data_float3();
+      BoundBox bounds = BoundBox::empty;
+      t.bounds_grow(verts, bounds);
+      for (size_t step = 0; step < num_steps - 1; step++) {
+        t.bounds_grow(vert_steps + step * num_verts, bounds);
+      }
+      if (bounds.valid()) {
+        references.push_back(BVHReference(bounds, j, object_index, primitive_type));
+        root.grow(bounds);
+        center.grow(bounds.center2());
+      }
+    }
+    else {
+      /* Motion triangles, trace optimized case:  we split triangle
+       * primitives into separate nodes for each of the time steps.
+       * This way we minimize overlap of neighbor curve primitives.
+       */
+      const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
+      const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+      const size_t num_verts = mesh->verts.size();
+      const size_t num_steps = mesh->motion_steps;
+      const float3 *vert_steps = attr_mP->data_float3();
+      /* Calculate bounding box of the previous time step.
+       * Will be reused later to avoid duplicated work on
+       * calculating BVH time step boundbox.
+       */
+      float3 prev_verts[3];
+      t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
+      BoundBox prev_bounds = BoundBox::empty;
+      prev_bounds.grow(prev_verts[0]);
+      prev_bounds.grow(prev_verts[1]);
+      prev_bounds.grow(prev_verts[2]);
+      /* Create all primitive time steps, */
+      for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+        const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+        float3 curr_verts[3];
+        t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
+        BoundBox curr_bounds = BoundBox::empty;
+        curr_bounds.grow(curr_verts[0]);
+        curr_bounds.grow(curr_verts[1]);
+        curr_bounds.grow(curr_verts[2]);
+        BoundBox bounds = prev_bounds;
+        bounds.grow(curr_bounds);
+        if (bounds.valid()) {
+          const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+          references.push_back(
+              BVHReference(bounds, j, object_index, primitive_type, prev_time, curr_time));
+          root.grow(bounds);
+          center.grow(bounds.center2());
+        }
+        /* Current time boundbox becomes previous one for the
+         * next time step.
+         */
+        prev_bounds = curr_bounds;
+      }
+    }
+  }
+}
+
+void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int object_index)
+{
+  const Attribute *curve_attr_mP = NULL;
+  if (hair->has_motion_blur()) {
+    curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+
+  const PrimitiveType primitive_type = hair->primitive_type();
+
+  const size_t num_curves = hair->num_curves();
+  for (uint j = 0; j < num_curves; j++) {
+    const Hair::Curve curve = hair->get_curve(j);
+    const float *curve_radius = &hair->get_curve_radius()[0];
+    for (int k = 0; k < curve.num_keys - 1; k++) {
+      if (curve_attr_mP == NULL) {
+        /* Really simple logic for static hair. */
+        BoundBox bounds = BoundBox::empty;
+        curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
+        if (bounds.valid()) {
+          int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
+          references.push_back(BVHReference(bounds, j, object_index, packed_type));
+          root.grow(bounds);
+          center.grow(bounds.center2());
+        }
+      }
+      else if (params.num_motion_curve_steps == 0 || params.use_spatial_split) {
+        /* Simple case of motion curves: single node for the while
+         * shutter time. Lowest memory usage but less optimal
+         * rendering.
+         */
+        /* TODO(sergey): Support motion steps for spatially split BVH. */
+        BoundBox bounds = BoundBox::empty;
+        curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
+        const size_t num_keys = hair->get_curve_keys().size();
+        const size_t num_steps = hair->get_motion_steps();
+        const float3 *key_steps = curve_attr_mP->data_float3();
+        for (size_t step = 0; step < num_steps - 1; step++) {
+          curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
+        }
+        if (bounds.valid()) {
+          int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
+          references.push_back(BVHReference(bounds, j, object_index, packed_type));
+          root.grow(bounds);
+          center.grow(bounds.center2());
+        }
+      }
+      else {
+        /* Motion curves, trace optimized case:  we split curve keys
+         * primitives into separate nodes for each of the time steps.
+         * This way we minimize overlap of neighbor curve primitives.
+         */
+        const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
+        const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+        const size_t num_steps = hair->get_motion_steps();
+        const float3 *curve_keys = &hair->get_curve_keys()[0];
+        const float3 *key_steps = curve_attr_mP->data_float3();
+        const size_t num_keys = hair->get_curve_keys().size();
+        /* Calculate bounding box of the previous time step.
+         * Will be reused later to avoid duplicated work on
+         * calculating BVH time step boundbox.
+         */
+        float4 prev_keys[4];
+        curve.cardinal_motion_keys(curve_keys,
+                                   curve_radius,
+                                   key_steps,
+                                   num_keys,
+                                   num_steps,
+                                   0.0f,
+                                   k - 1,
+                                   k,
+                                   k + 1,
+                                   k + 2,
+                                   prev_keys);
+        BoundBox prev_bounds = BoundBox::empty;
+        curve.bounds_grow(prev_keys, prev_bounds);
+        /* Create all primitive time steps, */
+        for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+          const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+          float4 curr_keys[4];
+          curve.cardinal_motion_keys(curve_keys,
+                                     curve_radius,
+                                     key_steps,
+                                     num_keys,
+                                     num_steps,
+                                     curr_time,
+                                     k - 1,
+                                     k,
+                                     k + 1,
+                                     k + 2,
+                                     curr_keys);
+          BoundBox curr_bounds = BoundBox::empty;
+          curve.bounds_grow(curr_keys, curr_bounds);
+          BoundBox bounds = prev_bounds;
+          bounds.grow(curr_bounds);
+          if (bounds.valid()) {
+            const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+            int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
+            references.push_back(
+                BVHReference(bounds, j, object_index, packed_type, prev_time, curr_time));
+            root.grow(bounds);
+            center.grow(bounds.center2());
+          }
+          /* Current time boundbox becomes previous one for the
+           * next time step.
+           */
+          prev_bounds = curr_bounds;
+        }
+      }
+    }
+  }
+}
+
+void BVHBuild::add_reference_geometry(BoundBox &root,
+                                      BoundBox &center,
+                                      Geometry *geom,
+                                      int object_index)
+{
+  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
+    Mesh *mesh = static_cast<Mesh *>(geom);
+    add_reference_triangles(root, center, mesh, object_index);
+  }
+  else if (geom->geometry_type == Geometry::HAIR) {
+    Hair *hair = static_cast<Hair *>(geom);
+    add_reference_curves(root, center, hair, object_index);
+  }
+}
+
+void BVHBuild::add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i)
+{
+  references.push_back(BVHReference(ob->bounds, -1, i, 0));
+  root.grow(ob->bounds);
+  center.grow(ob->bounds.center2());
+}
+
+static size_t count_curve_segments(Hair *hair)
+{
+  size_t num = 0, num_curves = hair->num_curves();
+
+  for (size_t i = 0; i < num_curves; i++)
+    num += hair->get_curve(i).num_keys - 1;
+
+  return num;
+}
+
+static size_t count_primitives(Geometry *geom)
+{
+  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
+    Mesh *mesh = static_cast<Mesh *>(geom);
+    return mesh->num_triangles();
+  }
+  else if (geom->geometry_type == Geometry::HAIR) {
+    Hair *hair = static_cast<Hair *>(geom);
+    return count_curve_segments(hair);
+  }
+
+  return 0;
+}
+
+void BVHBuild::add_references(BVHRange &root)
+{
+  /* reserve space for references */
+  size_t num_alloc_references = 0;
+
+  foreach (Object *ob, objects) {
+    if (params.top_level) {
+      if (!ob->is_traceable()) {
+        continue;
+      }
+      if (!ob->get_geometry()->is_instanced()) {
+        num_alloc_references += count_primitives(ob->get_geometry());
+      }
+      else
+        num_alloc_references++;
+    }
+    else {
+      num_alloc_references += count_primitives(ob->get_geometry());
+    }
+  }
+
+  references.reserve(num_alloc_references);
+
+  /* add references from objects */
+  BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
+  int i = 0;
+
+  foreach (Object *ob, objects) {
+    if (params.top_level) {
+      if (!ob->is_traceable()) {
+        ++i;
+        continue;
+      }
+      if (!ob->get_geometry()->is_instanced())
+        add_reference_geometry(bounds, center, ob->get_geometry(), i);
+      else
+        add_reference_object(bounds, center, ob, i);
+    }
+    else
+      add_reference_geometry(bounds, center, ob->get_geometry(), i);
+
+    i++;
+
+    if (progress.get_cancel())
+      return;
+  }
+
+  /* happens mostly on empty meshes */
+  if (!bounds.valid())
+    bounds.grow(zero_float3());
+
+  root = BVHRange(bounds, center, 0, references.size());
+}
+
+/* Build */
+
+BVHNode *BVHBuild::run()
+{
+  BVHRange root;
+
+  /* add references */
+  add_references(root);
+
+  if (progress.get_cancel())
+    return NULL;
+
+  /* init spatial splits */
+  if (params.top_level) {
+    /* NOTE: Technically it is supported by the builder but it's not really
+     * optimized for speed yet and not really clear yet if it has measurable
+     * improvement on render time. Needs some extra investigation before
+     * enabling spatial split for top level BVH.
+     */
+    params.use_spatial_split = false;
+  }
+
+  spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
+  spatial_free_index = 0;
+
+  need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
+
+  /* init progress updates */
+  double build_start_time;
+  build_start_time = progress_start_time = time_dt();
+  progress_count = 0;
+  progress_total = references.size();
+  progress_original_total = progress_total;
+
+  prim_type.resize(references.size());
+  prim_index.resize(references.size());
+  prim_object.resize(references.size());
+  if (need_prim_time) {
+    prim_time.resize(references.size());
+  }
+  else {
+    prim_time.resize(0);
+  }
+
+  /* build recursively */
+  BVHNode *rootnode;
+
+  if (params.use_spatial_split) {
+    /* Perform multithreaded spatial split build. */
+    BVHSpatialStorage *local_storage = &spatial_storage.local();
+    rootnode = build_node(root, references, 0, local_storage);
+    task_pool.wait_work();
+  }
+  else {
+    /* Perform multithreaded binning build. */
+    BVHObjectBinning rootbin(root, (references.size()) ? &references[0] : NULL);
+    rootnode = build_node(rootbin, 0);
+    task_pool.wait_work();
+  }
+
+  /* clean up temporary memory usage by threads */
+  spatial_storage.clear();
+
+  /* delete if we canceled */
+  if (rootnode) {
+    if (progress.get_cancel()) {
+      rootnode->deleteSubtree();
+      rootnode = NULL;
+      VLOG(1) << "BVH build cancelled.";
+    }
+    else {
+      /*rotate(rootnode, 4, 5);*/
+      rootnode->update_visibility();
+      rootnode->update_time();
+    }
+    if (rootnode != NULL) {
+      VLOG(1) << "BVH build statistics:\n"
+              << "  Build time: " << time_dt() - build_start_time << "\n"
+              << "  Total number of nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT))
+              << "\n"
+              << "  Number of inner nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT))
+              << "\n"
+              << "  Number of leaf nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT))
+              << "\n"
+              << "  Number of unaligned nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))
+              << "\n"
+              << "  Allocation slop factor: "
+              << ((prim_type.capacity() != 0) ? (float)prim_type.size() / prim_type.capacity() :
+                                                1.0f)
+              << "\n"
+              << "  Maximum depth: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH)) << "\n";
+    }
+  }
+
+  return rootnode;
+}
+
+void BVHBuild::progress_update()
+{
+  if (time_dt() - progress_start_time < 0.25)
+    return;
+
+  double progress_start = (double)progress_count / (double)progress_total;
+  double duplicates = (double)(progress_total - progress_original_total) / (double)progress_total;
+
+  string msg = string_printf(
+      "Building BVH %.0f%%, duplicates %.0f%%", progress_start * 100.0, duplicates * 100.0);
+
+  progress.set_substatus(msg);
+  progress_start_time = time_dt();
+}
+
+void BVHBuild::thread_build_node(InnerNode *inner,
+                                 int child,
+                                 const BVHObjectBinning &range,
+                                 int level)
+{
+  if (progress.get_cancel())
+    return;
+
+  /* build nodes */
+  BVHNode *node = build_node(range, level);
+
+  /* set child in inner node */
+  inner->children[child] = node;
+
+  /* update progress */
+  if (range.size() < THREAD_TASK_SIZE) {
+    /*rotate(node, INT_MAX, 5);*/
+
+    thread_scoped_lock lock(build_mutex);
+
+    progress_count += range.size();
+    progress_update();
+  }
+}
+
+void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
+                                               int child,
+                                               const BVHRange &range,
+                                               vector<BVHReference> &references,
+                                               int level)
+{
+  if (progress.get_cancel()) {
+    return;
+  }
+
+  /* Get per-thread memory for spatial split. */
+  BVHSpatialStorage *local_storage = &spatial_storage.local();
+
+  /* build nodes */
+  BVHNode *node = build_node(range, references, level, local_storage);
+
+  /* set child in inner node */
+  inner->children[child] = node;
+}
+
+bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
+                                          const vector<BVHReference> &references) const
+{
+  size_t size = range.size();
+  size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
+
+  if (size > max_leaf_size)
+    return false;
+
+  size_t num_triangles = 0;
+  size_t num_motion_triangles = 0;
+  size_t num_curves = 0;
+  size_t num_motion_curves = 0;
+
+  for (int i = 0; i < size; i++) {
+    const BVHReference &ref = references[range.start() + i];
+
+    if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+      if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
+        num_motion_curves++;
+      }
+      else {
+        num_curves++;
+      }
+    }
+    else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+      if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
+        num_motion_triangles++;
+      }
+      else {
+        num_triangles++;
+      }
+    }
+  }
+
+  return (num_triangles <= params.max_triangle_leaf_size) &&
+         (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
+         (num_curves <= params.max_curve_leaf_size) &&
+         (num_motion_curves <= params.max_motion_curve_leaf_size);
+}
+
+/* multithreaded binning builder */
+BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
+{
+  size_t size = range.size();
+  float leafSAH = params.sah_primitive_cost * range.leafSAH;
+  float splitSAH = params.sah_node_cost * range.bounds().half_area() +
+                   params.sah_primitive_cost * range.splitSAH;
+
+  /* Have at least one inner node on top level, for performance and correct
+   * visibility tests, since object instances do not check visibility flag.
+   */
+  if (!(range.size() > 0 && params.top_level && level == 0)) {
+    /* Make leaf node when threshold reached or SAH tells us. */
+    if ((params.small_enough_for_leaf(size, level)) ||
+        (range_within_max_leaf_size(range, references) && leafSAH < splitSAH)) {
+      return create_leaf_node(range, references);
+    }
+  }
+
+  BVHObjectBinning unaligned_range;
+  float unalignedSplitSAH = FLT_MAX;
+  float unalignedLeafSAH = FLT_MAX;
+  Transform aligned_space;
+  bool do_unalinged_split = false;
+  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
+    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references[0]);
+    unaligned_range = BVHObjectBinning(
+        range, &references[0], &unaligned_heuristic, &aligned_space);
+    unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
+                        params.sah_primitive_cost * unaligned_range.splitSAH;
+    unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
+    if (!(range.size() > 0 && params.top_level && level == 0)) {
+      if (unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
+          range_within_max_leaf_size(range, references)) {
+        return create_leaf_node(range, references);
+      }
+    }
+    /* Check whether unaligned split is better than the regular one. */
+    if (unalignedSplitSAH < splitSAH) {
+      do_unalinged_split = true;
+    }
+  }
+
+  /* Perform split. */
+  BVHObjectBinning left, right;
+  if (do_unalinged_split) {
+    unaligned_range.split(&references[0], left, right);
+  }
+  else {
+    range.split(&references[0], left, right);
+  }
+
+  BoundBox bounds;
+  if (do_unalinged_split) {
+    bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references[0], aligned_space);
+  }
+  else {
+    bounds = range.bounds();
+  }
+
+  /* Create inner node. */
+  InnerNode *inner;
+  if (range.size() < THREAD_TASK_SIZE) {
+    /* local build */
+    BVHNode *leftnode = build_node(left, level + 1);
+    BVHNode *rightnode = build_node(right, level + 1);
+
+    inner = new InnerNode(bounds, leftnode, rightnode);
+  }
+  else {
+    /* Threaded build */
+    inner = new InnerNode(bounds);
+
+    task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); });
+    task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); });
+  }
+
+  if (do_unalinged_split) {
+    inner->set_aligned_space(aligned_space);
+  }
+
+  return inner;
+}
+
+/* multithreaded spatial split builder */
+BVHNode *BVHBuild::build_node(const BVHRange &range,
+                              vector<BVHReference> &references,
+                              int level,
+                              BVHSpatialStorage *storage)
+{
+  /* Update progress.
+   *
+   * TODO(sergey): Currently it matches old behavior, but we can move it to the
+   * task thread (which will mimic non=split builder) and save some CPU ticks
+   * on checking cancel status.
+   */
+  progress_update();
+  if (progress.get_cancel()) {
+    return NULL;
+  }
+
+  /* Small enough or too deep => create leaf. */
+  if (!(range.size() > 0 && params.top_level && level == 0)) {
+    if (params.small_enough_for_leaf(range.size(), level)) {
+      progress_count += range.size();
+      return create_leaf_node(range, references);
+    }
+  }
+
+  /* Perform splitting test. */
+  BVHMixedSplit split(this, storage, range, references, level);
+
+  if (!(range.size() > 0 && params.top_level && level == 0)) {
+    if (split.no_split) {
+      progress_count += range.size();
+      return create_leaf_node(range, references);
+    }
+  }
+  float leafSAH = params.sah_primitive_cost * split.leafSAH;
+  float splitSAH = params.sah_node_cost * range.bounds().half_area() +
+                   params.sah_primitive_cost * split.nodeSAH;
+
+  BVHMixedSplit unaligned_split;
+  float unalignedSplitSAH = FLT_MAX;
+  /* float unalignedLeafSAH = FLT_MAX; */
+  Transform aligned_space;
+  bool do_unalinged_split = false;
+  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
+    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0));
+    unaligned_split = BVHMixedSplit(
+        this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
+    /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
+    unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
+                        params.sah_primitive_cost * unaligned_split.nodeSAH;
+    /* TOOD(sergey): Check we can create leaf already. */
+    /* Check whether unaligned split is better than the regular one. */
+    if (unalignedSplitSAH < splitSAH) {
+      do_unalinged_split = true;
+    }
+  }
+
+  /* Do split. */
+  BVHRange left, right;
+  if (do_unalinged_split) {
+    unaligned_split.split(this, left, right, range);
+  }
+  else {
+    split.split(this, left, right, range);
+  }
+
+  progress_total += left.size() + right.size() - range.size();
+
+  BoundBox bounds;
+  if (do_unalinged_split) {
+    bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space);
+  }
+  else {
+    bounds = range.bounds();
+  }
+
+  /* Create inner node. */
+  InnerNode *inner;
+  if (range.size() < THREAD_TASK_SIZE) {
+    /* Local build. */
+
+    /* Build left node. */
+    vector<BVHReference> right_references(references.begin() + right.start(),
+                                          references.begin() + right.end());
+    right.set_start(0);
+
+    BVHNode *leftnode = build_node(left, references, level + 1, storage);
+
+    /* Build right node. */
+    BVHNode *rightnode = build_node(right, right_references, level + 1, storage);
+
+    inner = new InnerNode(bounds, leftnode, rightnode);
+  }
+  else {
+    /* Threaded build. */
+    inner = new InnerNode(bounds);
+
+    vector<BVHReference> left_references(references.begin() + left.start(),
+                                         references.begin() + left.end());
+    vector<BVHReference> right_references(references.begin() + right.start(),
+                                          references.begin() + right.end());
+    right.set_start(0);
+
+    /* Create tasks for left and right nodes, using copy for most arguments and
+     * move for reference to avoid memory copies. */
+    task_pool.push([=, refs = std::move(left_references)]() mutable {
+      thread_build_spatial_split_node(inner, 0, left, refs, level + 1);
+    });
+    task_pool.push([=, refs = std::move(right_references)]() mutable {
+      thread_build_spatial_split_node(inner, 1, right, refs, level + 1);
+    });
+  }
+
+  if (do_unalinged_split) {
+    inner->set_aligned_space(aligned_space);
+  }
+
+  return inner;
+}
+
+/* Create Nodes */
+
+BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
+{
+  if (num == 0) {
+    BoundBox bounds = BoundBox::empty;
+    return new LeafNode(bounds, 0, 0, 0);
+  }
+  else if (num == 1) {
+    assert(start < prim_type.size());
+    prim_type[start] = ref->prim_type();
+    prim_index[start] = ref->prim_index();
+    prim_object[start] = ref->prim_object();
+    if (need_prim_time) {
+      prim_time[start] = make_float2(ref->time_from(), ref->time_to());
+    }
+
+    const uint visibility = objects[ref->prim_object()]->visibility_for_tracing();
+    BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start + 1);
+    leaf_node->time_from = ref->time_from();
+    leaf_node->time_to = ref->time_to();
+    return leaf_node;
+  }
+  else {
+    int mid = num / 2;
+    BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
+    BVHNode *leaf1 = create_object_leaf_nodes(ref + mid, start + mid, num - mid);
+
+    BoundBox bounds = BoundBox::empty;
+    bounds.grow(leaf0->bounds);
+    bounds.grow(leaf1->bounds);
+
+    BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
+    inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
+    inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
+    return inner_node;
+  }
+}
+
+BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHReference> &references)
+{
+  /* This is a bit overallocating here (considering leaf size into account),
+   * but chunk-based re-allocation in vector makes it difficult to use small
+   * size of stack storage here. Some tweaks are possible tho.
+   *
+   * NOTES:
+   *  - If the size is too big, we'll have inefficient stack usage,
+   *    and lots of cache misses.
+   *  - If the size is too small, then we can run out of memory
+   *    allowed to be used by vector.
+   *    In practice it wouldn't mean crash, just allocator will fallback
+   *    to heap which is slower.
+   *  - Optimistic re-allocation in STL could jump us out of stack usage
+   *    because re-allocation happens in chunks and size of those chunks we
+   *    can not control.
+   */
+  typedef StackAllocator<256, int> LeafStackAllocator;
+  typedef StackAllocator<256, float2> LeafTimeStackAllocator;
+  typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
+
+  vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM];
+  vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM];
+  vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM];
+  vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM];
+  vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM];
+
+  /* TODO(sergey): In theory we should be able to store references. */
+  vector<BVHReference, LeafReferenceStackAllocator> object_references;
+
+  uint visibility[PRIMITIVE_NUM] = {0};
+  /* NOTE: Keep initialization in sync with actual number of primitives. */
+  BoundBox bounds[PRIMITIVE_NUM] = {
+      BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
+  int ob_num = 0;
+  int num_new_prims = 0;
+  /* Fill in per-type type/index array. */
+  for (int i = 0; i < range.size(); i++) {
+    const BVHReference &ref = references[range.start() + i];
+    if (ref.prim_index() != -1) {
+      uint32_t type_index = bitscan((uint32_t)(ref.prim_type() & PRIMITIVE_ALL));
+      p_ref[type_index].push_back(ref);
+      p_type[type_index].push_back(ref.prim_type());
+      p_index[type_index].push_back(ref.prim_index());
+      p_object[type_index].push_back(ref.prim_object());
+      p_time[type_index].push_back(make_float2(ref.time_from(), ref.time_to()));
+
+      bounds[type_index].grow(ref.bounds());
+      visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
+      ++num_new_prims;
+    }
+    else {
+      object_references.push_back(ref);
+      ++ob_num;
+    }
+  }
+
+  /* Create leaf nodes for every existing primitive.
+   *
+   * Here we write primitive types, indices and objects to a temporary array.
+   * This way we keep all the heavy memory allocation code outside of the
+   * thread lock in the case of spatial split building.
+   *
+   * TODO(sergey): With some pointer trickery we can write directly to the
+   * destination buffers for the non-spatial split BVH.
+   */
+  BVHNode *leaves[PRIMITIVE_NUM + 1] = {NULL};
+  int num_leaves = 0;
+  size_t start_index = 0;
+  vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object;
+  vector<float2, LeafTimeStackAllocator> local_prim_time;
+  local_prim_type.resize(num_new_prims);
+  local_prim_index.resize(num_new_prims);
+  local_prim_object.resize(num_new_prims);
+  if (need_prim_time) {
+    local_prim_time.resize(num_new_prims);
+  }
+  for (int i = 0; i < PRIMITIVE_NUM; ++i) {
+    int num = (int)p_type[i].size();
+    if (num != 0) {
+      assert(p_type[i].size() == p_index[i].size());
+      assert(p_type[i].size() == p_object[i].size());
+      Transform aligned_space;
+      bool alignment_found = false;
+      for (int j = 0; j < num; ++j) {
+        const int index = start_index + j;
+        local_prim_type[index] = p_type[i][j];
+        local_prim_index[index] = p_index[i][j];
+        local_prim_object[index] = p_object[i][j];
+        if (need_prim_time) {
+          local_prim_time[index] = p_time[i][j];
+        }
+        if (params.use_unaligned_nodes && !alignment_found) {
+          alignment_found = unaligned_heuristic.compute_aligned_space(p_ref[i][j], &aligned_space);
+        }
+      }
+      LeafNode *leaf_node = new LeafNode(bounds[i], visibility[i], start_index, start_index + num);
+      if (true) {
+        float time_from = 1.0f, time_to = 0.0f;
+        for (int j = 0; j < num; ++j) {
+          const BVHReference &ref = p_ref[i][j];
+          time_from = min(time_from, ref.time_from());
+          time_to = max(time_to, ref.time_to());
+        }
+        leaf_node->time_from = time_from;
+        leaf_node->time_to = time_to;
+      }
+      if (alignment_found) {
+        /* Need to recalculate leaf bounds with new alignment. */
+        leaf_node->bounds = BoundBox::empty;
+        for (int j = 0; j < num; ++j) {
+          const BVHReference &ref = p_ref[i][j];
+          BoundBox ref_bounds = unaligned_heuristic.compute_aligned_prim_boundbox(ref,
+                                                                                  aligned_space);
+          leaf_node->bounds.grow(ref_bounds);
+        }
+        /* Set alignment space. */
+        leaf_node->set_aligned_space(aligned_space);
+      }
+      leaves[num_leaves++] = leaf_node;
+      start_index += num;
+    }
+  }
+  /* Get size of new data to be copied to the packed arrays. */
+  const int num_new_leaf_data = start_index;
+  const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
+  /* Copy actual data to the packed array. */
+  if (params.use_spatial_split) {
+    spatial_spin_lock.lock();
+    /* We use first free index in the packed arrays and mode pointer to the
+     * end of the current range.
+     *
+     * This doesn't give deterministic packed arrays, but it shouldn't really
+     * matter because order of children in BVH is deterministic.
+     */
+    start_index = spatial_free_index;
+    spatial_free_index += range.size();
+    /* Extend an array when needed. */
+    const size_t range_end = start_index + range.size();
+    if (prim_type.size() < range_end) {
+      /* Avoid extra re-allocations by pre-allocating bigger array in an
+       * advance.
+       */
+      if (range_end >= prim_type.capacity()) {
+        float progress = (float)progress_count / (float)progress_total;
+        float factor = (1.0f - progress);
+        const size_t reserve = (size_t)(range_end + (float)range_end * factor);
+        prim_type.reserve(reserve);
+        prim_index.reserve(reserve);
+        prim_object.reserve(reserve);
+        if (need_prim_time) {
+          prim_time.reserve(reserve);
+        }
+      }
+
+      prim_type.resize(range_end);
+      prim_index.resize(range_end);
+      prim_object.resize(range_end);
+      if (need_prim_time) {
+        prim_time.resize(range_end);
+      }
+    }
+    /* Perform actual data copy. */
+    if (new_leaf_data_size > 0) {
+      memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
+      memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
+      memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
+      if (need_prim_time) {
+        memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data);
+      }
+    }
+    spatial_spin_lock.unlock();
+  }
+  else {
+    /* For the regular BVH builder we simply copy new data starting at the
+     * range start. This is totally thread-safe, all threads are living
+     * inside of their own range.
+     */
+    start_index = range.start();
+    if (new_leaf_data_size > 0) {
+      memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
+      memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
+      memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
+      if (need_prim_time) {
+        memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data);
+      }
+    }
+  }
+
+  /* So far leaves were created with the zero-based index in an arrays,
+   * here we modify the indices to correspond to actual packed array start
+   * index.
+   */
+  for (int i = 0; i < num_leaves; ++i) {
+    LeafNode *leaf = (LeafNode *)leaves[i];
+    leaf->lo += start_index;
+    leaf->hi += start_index;
+  }
+
+  /* Create leaf node for object. */
+  if (num_leaves == 0 || ob_num) {
+    /* Only create object leaf nodes if there are objects or no other
+     * nodes created.
+     */
+    const BVHReference *ref = (ob_num) ? &object_references[0] : NULL;
+    leaves[num_leaves] = create_object_leaf_nodes(ref, start_index + num_new_leaf_data, ob_num);
+    ++num_leaves;
+  }
+
+  /* TODO(sergey): Need to take care of alignment when number of leaves
+   * is more than 1.
+   */
+  if (num_leaves == 1) {
+    /* Simplest case: single leaf, just return it.
+     * In all the rest cases we'll be creating intermediate inner node with
+     * an appropriate bounding box.
+     */
+    return leaves[0];
+  }
+  else if (num_leaves == 2) {
+    return new InnerNode(range.bounds(), leaves[0], leaves[1]);
+  }
+  else if (num_leaves == 3) {
+    BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
+    BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
+    return new InnerNode(range.bounds(), leaves[0], inner);
+  }
+  else {
+    /* Should be doing more branches if more primitive types added. */
+    assert(num_leaves <= 5);
+    BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
+    BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
+    BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
+    BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
+    BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
+    BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
+    if (num_leaves == 5) {
+      return new InnerNode(range.bounds(), inner_c, leaves[4]);
+    }
+    return inner_c;
+  }
+
+#undef MAX_ITEMS_PER_LEAF
+}
+
+/* Tree Rotations */
+
+void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
+{
+  /* in tested scenes, this resulted in slightly slower raytracing, so disabled
+   * it for now. could be implementation bug, or depend on the scene */
+  if (node)
+    for (int i = 0; i < iterations; i++)
+      rotate(node, max_depth);
+}
+
+void BVHBuild::rotate(BVHNode *node, int max_depth)
+{
+  /* nothing to rotate if we reached a leaf node. */
+  if (node->is_leaf() || max_depth < 0)
+    return;
+
+  InnerNode *parent = (InnerNode *)node;
+
+  /* rotate all children first */
+  for (size_t c = 0; c < 2; c++)
+    rotate(parent->children[c], max_depth - 1);
+
+  /* compute current area of all children */
+  BoundBox bounds0 = parent->children[0]->bounds;
+  BoundBox bounds1 = parent->children[1]->bounds;
+
+  float area0 = bounds0.half_area();
+  float area1 = bounds1.half_area();
+  float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
+
+  /* find best rotation. we pick a target child of a first child, and swap
+   * this with an other child. we perform the best such swap. */
+  float best_cost = FLT_MAX;
+  int best_child = -1, best_target = -1, best_other = -1;
+
+  for (size_t c = 0; c < 2; c++) {
+    /* ignore leaf nodes as we cannot descent into */
+    if (parent->children[c]->is_leaf())
+      continue;
+
+    InnerNode *child = (InnerNode *)parent->children[c];
+    BoundBox &other = (c == 0) ? bounds1 : bounds0;
+
+    /* transpose child bounds */
+    BoundBox target0 = child->children[0]->bounds;
+    BoundBox target1 = child->children[1]->bounds;
+
+    /* compute cost for both possible swaps */
+    float cost0 = merge(other, target1).half_area() - child_area[c];
+    float cost1 = merge(target0, other).half_area() - child_area[c];
+
+    if (min(cost0, cost1) < best_cost) {
+      best_child = (int)c;
+      best_other = (int)(1 - c);
+
+      if (cost0 < cost1) {
+        best_cost = cost0;
+        best_target = 0;
+      }
+      else {
+        best_cost = cost0;
+        best_target = 1;
+      }
+    }
+  }
+
+  /* if we did not find a swap that improves the SAH then do nothing */
+  if (best_cost >= 0)
+    return;
+
+  assert(best_child == 0 || best_child == 1);
+  assert(best_target != -1);
+
+  /* perform the best found tree rotation */
+  InnerNode *child = (InnerNode *)parent->children[best_child];
+
+  swap(parent->children[best_other], child->children[best_target]);
+  child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/build.h b/intern/cycles/bvh/build.h
new file mode 100644
index 00000000000..06b318f1ee0
--- /dev/null
+++ b/intern/cycles/bvh/build.h
@@ -0,0 +1,142 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_BUILD_H__
+#define __BVH_BUILD_H__
+
+#include <float.h>
+
+#include "bvh/params.h"
+#include "bvh/unaligned.h"
+
+#include "util/array.h"
+#include "util/task.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Boundbox;
+class BVHBuildTask;
+class BVHNode;
+class BVHSpatialSplitBuildTask;
+class BVHParams;
+class InnerNode;
+class Geometry;
+class Hair;
+class Mesh;
+class Object;
+class Progress;
+
+/* BVH Builder */
+
+class BVHBuild {
+ public:
+  /* Constructor/Destructor */
+  BVHBuild(const vector<Object *> &objects,
+           array<int> &prim_type,
+           array<int> &prim_index,
+           array<int> &prim_object,
+           array<float2> &prim_time,
+           const BVHParams &params,
+           Progress &progress);
+  ~BVHBuild();
+
+  BVHNode *run();
+
+ protected:
+  friend class BVHMixedSplit;
+  friend class BVHObjectSplit;
+  friend class BVHSpatialSplit;
+  friend class BVHBuildTask;
+  friend class BVHSpatialSplitBuildTask;
+  friend class BVHObjectBinning;
+
+  /* Adding references. */
+  void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
+  void add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i);
+  void add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i);
+  void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
+  void add_references(BVHRange &root);
+
+  /* Building. */
+  BVHNode *build_node(const BVHRange &range,
+                      vector<BVHReference> &references,
+                      int level,
+                      BVHSpatialStorage *storage);
+  BVHNode *build_node(const BVHObjectBinning &range, int level);
+  BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
+  BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
+
+  bool range_within_max_leaf_size(const BVHRange &range,
+                                  const vector<BVHReference> &references) const;
+
+  /* Threads. */
+  enum { THREAD_TASK_SIZE = 4096 };
+  void thread_build_node(InnerNode *node, int child, const BVHObjectBinning &range, int level);
+  void thread_build_spatial_split_node(InnerNode *node,
+                                       int child,
+                                       const BVHRange &range,
+                                       vector<BVHReference> &references,
+                                       int level);
+  thread_mutex build_mutex;
+
+  /* Progress. */
+  void progress_update();
+
+  /* Tree rotations. */
+  void rotate(BVHNode *node, int max_depth);
+  void rotate(BVHNode *node, int max_depth, int iterations);
+
+  /* Objects and primitive references. */
+  vector<Object *> objects;
+  vector<BVHReference> references;
+  int num_original_references;
+
+  /* Output primitive indexes and objects. */
+  array<int> &prim_type;
+  array<int> &prim_index;
+  array<int> &prim_object;
+  array<float2> &prim_time;
+
+  bool need_prim_time;
+
+  /* Build parameters. */
+  BVHParams params;
+
+  /* Progress reporting. */
+  Progress &progress;
+  double progress_start_time;
+  size_t progress_count;
+  size_t progress_total;
+  size_t progress_original_total;
+
+  /* Spatial splitting. */
+  float spatial_min_overlap;
+  enumerable_thread_specific<BVHSpatialStorage> spatial_storage;
+  size_t spatial_free_index;
+  thread_spin_lock spatial_spin_lock;
+
+  /* Threads. */
+  TaskPool task_pool;
+
+  /* Unaligned building. */
+  BVHUnaligned unaligned_heuristic;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_BUILD_H__ */
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 050e090bddf..ae6655eb27b 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -18,12 +18,12 @@
 #include "bvh/bvh.h"
 
 #include "bvh/bvh2.h"
-#include "bvh/bvh_embree.h"
-#include "bvh/bvh_multi.h"
-#include "bvh/bvh_optix.h"
+#include "bvh/embree.h"
+#include "bvh/multi.h"
+#include "bvh/optix.h"
 
-#include "util/util_logging.h"
-#include "util/util_progress.h"
+#include "util/log.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index b222dfb14ed..c1f55ee917e 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -18,10 +18,10 @@
 #ifndef __BVH_H__
 #define __BVH_H__
 
-#include "bvh/bvh_params.h"
-#include "util/util_array.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "bvh/params.h"
+#include "util/array.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp
index 3b864859f31..04290602145 100644
--- a/intern/cycles/bvh/bvh2.cpp
+++ b/intern/cycles/bvh/bvh2.cpp
@@ -21,12 +21,12 @@
 #include "scene/mesh.h"
 #include "scene/object.h"
 
-#include "bvh/bvh_build.h"
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
+#include "bvh/build.h"
+#include "bvh/node.h"
+#include "bvh/unaligned.h"
 
-#include "util/util_foreach.h"
-#include "util/util_progress.h"
+#include "util/foreach.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/bvh/bvh2.h b/intern/cycles/bvh/bvh2.h
index 1030a0f76c7..7937288f271 100644
--- a/intern/cycles/bvh/bvh2.h
+++ b/intern/cycles/bvh/bvh2.h
@@ -19,10 +19,10 @@
 #define __BVH2_H__
 
 #include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
+#include "bvh/params.h"
 
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp
deleted file mode 100644
index 1cc38275d11..00000000000
--- a/intern/cycles/bvh/bvh_binning.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Adapted from code copyright 2009-2011 Intel Corporation
- * Modifications Copyright 2012, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-//#define __KERNEL_SSE__
-
-#include "bvh/bvh_binning.h"
-
-#include <stdlib.h>
-
-#include "util/util_algorithm.h"
-#include "util/util_boundbox.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* SSE replacements */
-
-__forceinline void prefetch_L1(const void * /*ptr*/)
-{
-}
-__forceinline void prefetch_L2(const void * /*ptr*/)
-{
-}
-__forceinline void prefetch_L3(const void * /*ptr*/)
-{
-}
-__forceinline void prefetch_NTA(const void * /*ptr*/)
-{
-}
-
-template<size_t src> __forceinline float extract(const int4 &b)
-{
-  return b[src];
-}
-template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
-{
-  float4 r = a;
-  r[dst] = b;
-  return r;
-}
-
-__forceinline int get_best_dimension(const float4 &bestSAH)
-{
-  // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
-
-  float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
-
-  if (bestSAH.x == minSAH)
-    return 0;
-  else if (bestSAH.y == minSAH)
-    return 1;
-  else
-    return 2;
-}
-
-/* BVH Object Binning */
-
-BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
-                                   BVHReference *prims,
-                                   const BVHUnaligned *unaligned_heuristic,
-                                   const Transform *aligned_space)
-    : BVHRange(job),
-      splitSAH(FLT_MAX),
-      dim(0),
-      pos(0),
-      unaligned_heuristic_(unaligned_heuristic),
-      aligned_space_(aligned_space)
-{
-  if (aligned_space_ == NULL) {
-    bounds_ = bounds();
-    cent_bounds_ = cent_bounds();
-  }
-  else {
-    /* TODO(sergey): With some additional storage we can avoid
-     * need in re-calculating this.
-     */
-    bounds_ = unaligned_heuristic->compute_aligned_boundbox(
-        *this, prims, *aligned_space, &cent_bounds_);
-  }
-
-  /* compute number of bins to use and precompute scaling factor for binning */
-  num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
-  scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
-
-  /* initialize binning counter and bounds */
-  BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
-  int4 bin_count[MAX_BINS];         /* number of primitives mapped to bin */
-
-  for (size_t i = 0; i < num_bins; i++) {
-    bin_count[i] = make_int4(0);
-    bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
-  }
-
-  /* map geometry to bins, unrolled once */
-  {
-    int64_t i;
-
-    for (i = 0; i < int64_t(size()) - 1; i += 2) {
-      prefetch_L2(&prims[start() + i + 8]);
-
-      /* map even and odd primitive to bin */
-      const BVHReference &prim0 = prims[start() + i + 0];
-      const BVHReference &prim1 = prims[start() + i + 1];
-
-      BoundBox bounds0 = get_prim_bounds(prim0);
-      BoundBox bounds1 = get_prim_bounds(prim1);
-
-      int4 bin0 = get_bin(bounds0);
-      int4 bin1 = get_bin(bounds1);
-
-      /* increase bounds for bins for even primitive */
-      int b00 = (int)extract<0>(bin0);
-      bin_count[b00][0]++;
-      bin_bounds[b00][0].grow(bounds0);
-      int b01 = (int)extract<1>(bin0);
-      bin_count[b01][1]++;
-      bin_bounds[b01][1].grow(bounds0);
-      int b02 = (int)extract<2>(bin0);
-      bin_count[b02][2]++;
-      bin_bounds[b02][2].grow(bounds0);
-
-      /* increase bounds of bins for odd primitive */
-      int b10 = (int)extract<0>(bin1);
-      bin_count[b10][0]++;
-      bin_bounds[b10][0].grow(bounds1);
-      int b11 = (int)extract<1>(bin1);
-      bin_count[b11][1]++;
-      bin_bounds[b11][1].grow(bounds1);
-      int b12 = (int)extract<2>(bin1);
-      bin_count[b12][2]++;
-      bin_bounds[b12][2].grow(bounds1);
-    }
-
-    /* for uneven number of primitives */
-    if (i < int64_t(size())) {
-      /* map primitive to bin */
-      const BVHReference &prim0 = prims[start() + i];
-      BoundBox bounds0 = get_prim_bounds(prim0);
-      int4 bin0 = get_bin(bounds0);
-
-      /* increase bounds of bins */
-      int b00 = (int)extract<0>(bin0);
-      bin_count[b00][0]++;
-      bin_bounds[b00][0].grow(bounds0);
-      int b01 = (int)extract<1>(bin0);
-      bin_count[b01][1]++;
-      bin_bounds[b01][1].grow(bounds0);
-      int b02 = (int)extract<2>(bin0);
-      bin_count[b02][2]++;
-      bin_bounds[b02][2].grow(bounds0);
-    }
-  }
-
-  /* sweep from right to left and compute parallel prefix of merged bounds */
-  float4 r_area[MAX_BINS];  /* area of bounds of primitives on the right */
-  float4 r_count[MAX_BINS]; /* number of primitives on the right */
-  int4 count = make_int4(0);
-
-  BoundBox bx = BoundBox::empty;
-  BoundBox by = BoundBox::empty;
-  BoundBox bz = BoundBox::empty;
-
-  for (size_t i = num_bins - 1; i > 0; i--) {
-    count = count + bin_count[i];
-    r_count[i] = blocks(count);
-
-    bx = merge(bx, bin_bounds[i][0]);
-    r_area[i][0] = bx.half_area();
-    by = merge(by, bin_bounds[i][1]);
-    r_area[i][1] = by.half_area();
-    bz = merge(bz, bin_bounds[i][2]);
-    r_area[i][2] = bz.half_area();
-    r_area[i][3] = r_area[i][2];
-  }
-
-  /* sweep from left to right and compute SAH */
-  int4 ii = make_int4(1);
-  float4 bestSAH = make_float4(FLT_MAX);
-  int4 bestSplit = make_int4(-1);
-
-  count = make_int4(0);
-
-  bx = BoundBox::empty;
-  by = BoundBox::empty;
-  bz = BoundBox::empty;
-
-  for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
-    count = count + bin_count[i - 1];
-
-    bx = merge(bx, bin_bounds[i - 1][0]);
-    float Ax = bx.half_area();
-    by = merge(by, bin_bounds[i - 1][1]);
-    float Ay = by.half_area();
-    bz = merge(bz, bin_bounds[i - 1][2]);
-    float Az = bz.half_area();
-
-    float4 lCount = blocks(count);
-    float4 lArea = make_float4(Ax, Ay, Az, Az);
-    float4 sah = lArea * lCount + r_area[i] * r_count[i];
-
-    bestSplit = select(sah < bestSAH, ii, bestSplit);
-    bestSAH = min(sah, bestSAH);
-  }
-
-  int4 mask = float3_to_float4(cent_bounds_.size()) <= make_float4(0.0f);
-  bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
-
-  /* find best dimension */
-  dim = get_best_dimension(bestSAH);
-  splitSAH = bestSAH[dim];
-  pos = bestSplit[dim];
-  leafSAH = bounds_.half_area() * blocks(size());
-}
-
-void BVHObjectBinning::split(BVHReference *prims,
-                             BVHObjectBinning &left_o,
-                             BVHObjectBinning &right_o) const
-{
-  size_t N = size();
-
-  BoundBox lgeom_bounds = BoundBox::empty;
-  BoundBox rgeom_bounds = BoundBox::empty;
-  BoundBox lcent_bounds = BoundBox::empty;
-  BoundBox rcent_bounds = BoundBox::empty;
-
-  int64_t l = 0, r = N - 1;
-
-  while (l <= r) {
-    prefetch_L2(&prims[start() + l + 8]);
-    prefetch_L2(&prims[start() + r - 8]);
-
-    BVHReference prim = prims[start() + l];
-    BoundBox unaligned_bounds = get_prim_bounds(prim);
-    float3 unaligned_center = unaligned_bounds.center2();
-    float3 center = prim.bounds().center2();
-
-    if (get_bin(unaligned_center)[dim] < pos) {
-      lgeom_bounds.grow(prim.bounds());
-      lcent_bounds.grow(center);
-      l++;
-    }
-    else {
-      rgeom_bounds.grow(prim.bounds());
-      rcent_bounds.grow(center);
-      swap(prims[start() + l], prims[start() + r]);
-      r--;
-    }
-  }
-  /* finish */
-  if (l != 0 && N - 1 - r != 0) {
-    right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
-                               prims);
-    left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
-    return;
-  }
-
-  /* object medium split if we did not make progress, can happen when all
-   * primitives have same centroid */
-  lgeom_bounds = BoundBox::empty;
-  rgeom_bounds = BoundBox::empty;
-  lcent_bounds = BoundBox::empty;
-  rcent_bounds = BoundBox::empty;
-
-  for (size_t i = 0; i < N / 2; i++) {
-    lgeom_bounds.grow(prims[start() + i].bounds());
-    lcent_bounds.grow(prims[start() + i].bounds().center2());
-  }
-
-  for (size_t i = N / 2; i < N; i++) {
-    rgeom_bounds.grow(prims[start() + i].bounds());
-    rcent_bounds.grow(prims[start() + i].bounds().center2());
-  }
-
-  right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
-                             prims);
-  left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_binning.h b/intern/cycles/bvh/bvh_binning.h
deleted file mode 100644
index ae6dba2805d..00000000000
--- a/intern/cycles/bvh/bvh_binning.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Adapted from code copyright 2009-2011 Intel Corporation
- * Modifications Copyright 2012, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_BINNING_H__
-#define __BVH_BINNING_H__
-
-#include "bvh/bvh_params.h"
-#include "bvh/bvh_unaligned.h"
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHBuild;
-
-/* Single threaded object binner. Finds the split with the best SAH heuristic
- * by testing for each dimension multiple partitionings for regular spaced
- * partition locations. A partitioning for a partition location is computed,
- * by putting primitives whose centroid is on the left and right of the split
- * location to different sets. The SAH is evaluated by computing the number of
- * blocks occupied by the primitives in the partitions. */
-
-class BVHObjectBinning : public BVHRange {
- public:
-  __forceinline BVHObjectBinning() : leafSAH(FLT_MAX)
-  {
-  }
-
-  BVHObjectBinning(const BVHRange &job,
-                   BVHReference *prims,
-                   const BVHUnaligned *unaligned_heuristic = NULL,
-                   const Transform *aligned_space = NULL);
-
-  void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const;
-
-  __forceinline const BoundBox &unaligned_bounds()
-  {
-    return bounds_;
-  }
-
-  float splitSAH; /* SAH cost of the best split */
-  float leafSAH;  /* SAH cost of creating a leaf */
-
- protected:
-  int dim;         /* best split dimension */
-  int pos;         /* best split position */
-  size_t num_bins; /* actual number of bins to use */
-  float3 scale;    /* scaling factor to compute bin */
-
-  /* Effective bounds and centroid bounds. */
-  BoundBox bounds_;
-  BoundBox cent_bounds_;
-
-  const BVHUnaligned *unaligned_heuristic_;
-  const Transform *aligned_space_;
-
-  enum { MAX_BINS = 32 };
-  enum { LOG_BLOCK_SIZE = 2 };
-
-  /* computes the bin numbers for each dimension for a box. */
-  __forceinline int4 get_bin(const BoundBox &box) const
-  {
-    int4 a = make_int4((box.center2() - cent_bounds_.min) * scale - make_float3(0.5f));
-    int4 mn = make_int4(0);
-    int4 mx = make_int4((int)num_bins - 1);
-
-    return clamp(a, mn, mx);
-  }
-
-  /* computes the bin numbers for each dimension for a point. */
-  __forceinline int4 get_bin(const float3 &c) const
-  {
-    return make_int4((c - cent_bounds_.min) * scale - make_float3(0.5f));
-  }
-
-  /* compute the number of blocks occupied for each dimension. */
-  __forceinline float4 blocks(const int4 &a) const
-  {
-    return make_float4((a + make_int4((1 << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
-  }
-
-  /* compute the number of blocks occupied in one dimension. */
-  __forceinline int blocks(size_t a) const
-  {
-    return (int)((a + ((1LL << LOG_BLOCK_SIZE) - 1)) >> LOG_BLOCK_SIZE);
-  }
-
-  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
-  {
-    if (aligned_space_ == NULL) {
-      return prim.bounds();
-    }
-    else {
-      return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
-    }
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_BINNING_H__ */
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
deleted file mode 100644
index 9ae40b57bc3..00000000000
--- a/intern/cycles/bvh/bvh_build.cpp
+++ /dev/null
@@ -1,1144 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh_build.h"
-
-#include "bvh/bvh_binning.h"
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_params.h"
-#include "bvh_split.h"
-
-#include "scene/curves.h"
-#include "scene/hair.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/scene.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_queue.h"
-#include "util/util_simd.h"
-#include "util/util_stack_allocator.h"
-#include "util/util_time.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Constructor / Destructor */
-
-BVHBuild::BVHBuild(const vector<Object *> &objects_,
-                   array<int> &prim_type_,
-                   array<int> &prim_index_,
-                   array<int> &prim_object_,
-                   array<float2> &prim_time_,
-                   const BVHParams &params_,
-                   Progress &progress_)
-    : objects(objects_),
-      prim_type(prim_type_),
-      prim_index(prim_index_),
-      prim_object(prim_object_),
-      prim_time(prim_time_),
-      params(params_),
-      progress(progress_),
-      progress_start_time(0.0),
-      unaligned_heuristic(objects_)
-{
-  spatial_min_overlap = 0.0f;
-}
-
-BVHBuild::~BVHBuild()
-{
-}
-
-/* Adding References */
-
-void BVHBuild::add_reference_triangles(BoundBox &root,
-                                       BoundBox &center,
-                                       Mesh *mesh,
-                                       int object_index)
-{
-  const PrimitiveType primitive_type = mesh->primitive_type();
-  const Attribute *attr_mP = NULL;
-  if (mesh->has_motion_blur()) {
-    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  }
-  const size_t num_triangles = mesh->num_triangles();
-  for (uint j = 0; j < num_triangles; j++) {
-    Mesh::Triangle t = mesh->get_triangle(j);
-    const float3 *verts = &mesh->verts[0];
-    if (attr_mP == NULL) {
-      BoundBox bounds = BoundBox::empty;
-      t.bounds_grow(verts, bounds);
-      if (bounds.valid() && t.valid(verts)) {
-        references.push_back(BVHReference(bounds, j, object_index, primitive_type));
-        root.grow(bounds);
-        center.grow(bounds.center2());
-      }
-    }
-    else if (params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
-      /* Motion triangles, simple case: single node for the whole
-       * primitive. Lowest memory footprint and faster BVH build but
-       * least optimal ray-tracing.
-       */
-      /* TODO(sergey): Support motion steps for spatially split BVH. */
-      const size_t num_verts = mesh->verts.size();
-      const size_t num_steps = mesh->motion_steps;
-      const float3 *vert_steps = attr_mP->data_float3();
-      BoundBox bounds = BoundBox::empty;
-      t.bounds_grow(verts, bounds);
-      for (size_t step = 0; step < num_steps - 1; step++) {
-        t.bounds_grow(vert_steps + step * num_verts, bounds);
-      }
-      if (bounds.valid()) {
-        references.push_back(BVHReference(bounds, j, object_index, primitive_type));
-        root.grow(bounds);
-        center.grow(bounds.center2());
-      }
-    }
-    else {
-      /* Motion triangles, trace optimized case:  we split triangle
-       * primitives into separate nodes for each of the time steps.
-       * This way we minimize overlap of neighbor curve primitives.
-       */
-      const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
-      const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-      const size_t num_verts = mesh->verts.size();
-      const size_t num_steps = mesh->motion_steps;
-      const float3 *vert_steps = attr_mP->data_float3();
-      /* Calculate bounding box of the previous time step.
-       * Will be reused later to avoid duplicated work on
-       * calculating BVH time step boundbox.
-       */
-      float3 prev_verts[3];
-      t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
-      BoundBox prev_bounds = BoundBox::empty;
-      prev_bounds.grow(prev_verts[0]);
-      prev_bounds.grow(prev_verts[1]);
-      prev_bounds.grow(prev_verts[2]);
-      /* Create all primitive time steps, */
-      for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-        const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
-        float3 curr_verts[3];
-        t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
-        BoundBox curr_bounds = BoundBox::empty;
-        curr_bounds.grow(curr_verts[0]);
-        curr_bounds.grow(curr_verts[1]);
-        curr_bounds.grow(curr_verts[2]);
-        BoundBox bounds = prev_bounds;
-        bounds.grow(curr_bounds);
-        if (bounds.valid()) {
-          const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-          references.push_back(
-              BVHReference(bounds, j, object_index, primitive_type, prev_time, curr_time));
-          root.grow(bounds);
-          center.grow(bounds.center2());
-        }
-        /* Current time boundbox becomes previous one for the
-         * next time step.
-         */
-        prev_bounds = curr_bounds;
-      }
-    }
-  }
-}
-
-void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int object_index)
-{
-  const Attribute *curve_attr_mP = NULL;
-  if (hair->has_motion_blur()) {
-    curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-  }
-
-  const PrimitiveType primitive_type = hair->primitive_type();
-
-  const size_t num_curves = hair->num_curves();
-  for (uint j = 0; j < num_curves; j++) {
-    const Hair::Curve curve = hair->get_curve(j);
-    const float *curve_radius = &hair->get_curve_radius()[0];
-    for (int k = 0; k < curve.num_keys - 1; k++) {
-      if (curve_attr_mP == NULL) {
-        /* Really simple logic for static hair. */
-        BoundBox bounds = BoundBox::empty;
-        curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
-        if (bounds.valid()) {
-          int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
-          references.push_back(BVHReference(bounds, j, object_index, packed_type));
-          root.grow(bounds);
-          center.grow(bounds.center2());
-        }
-      }
-      else if (params.num_motion_curve_steps == 0 || params.use_spatial_split) {
-        /* Simple case of motion curves: single node for the while
-         * shutter time. Lowest memory usage but less optimal
-         * rendering.
-         */
-        /* TODO(sergey): Support motion steps for spatially split BVH. */
-        BoundBox bounds = BoundBox::empty;
-        curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds);
-        const size_t num_keys = hair->get_curve_keys().size();
-        const size_t num_steps = hair->get_motion_steps();
-        const float3 *key_steps = curve_attr_mP->data_float3();
-        for (size_t step = 0; step < num_steps - 1; step++) {
-          curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
-        }
-        if (bounds.valid()) {
-          int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
-          references.push_back(BVHReference(bounds, j, object_index, packed_type));
-          root.grow(bounds);
-          center.grow(bounds.center2());
-        }
-      }
-      else {
-        /* Motion curves, trace optimized case:  we split curve keys
-         * primitives into separate nodes for each of the time steps.
-         * This way we minimize overlap of neighbor curve primitives.
-         */
-        const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
-        const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-        const size_t num_steps = hair->get_motion_steps();
-        const float3 *curve_keys = &hair->get_curve_keys()[0];
-        const float3 *key_steps = curve_attr_mP->data_float3();
-        const size_t num_keys = hair->get_curve_keys().size();
-        /* Calculate bounding box of the previous time step.
-         * Will be reused later to avoid duplicated work on
-         * calculating BVH time step boundbox.
-         */
-        float4 prev_keys[4];
-        curve.cardinal_motion_keys(curve_keys,
-                                   curve_radius,
-                                   key_steps,
-                                   num_keys,
-                                   num_steps,
-                                   0.0f,
-                                   k - 1,
-                                   k,
-                                   k + 1,
-                                   k + 2,
-                                   prev_keys);
-        BoundBox prev_bounds = BoundBox::empty;
-        curve.bounds_grow(prev_keys, prev_bounds);
-        /* Create all primitive time steps, */
-        for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-          const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
-          float4 curr_keys[4];
-          curve.cardinal_motion_keys(curve_keys,
-                                     curve_radius,
-                                     key_steps,
-                                     num_keys,
-                                     num_steps,
-                                     curr_time,
-                                     k - 1,
-                                     k,
-                                     k + 1,
-                                     k + 2,
-                                     curr_keys);
-          BoundBox curr_bounds = BoundBox::empty;
-          curve.bounds_grow(curr_keys, curr_bounds);
-          BoundBox bounds = prev_bounds;
-          bounds.grow(curr_bounds);
-          if (bounds.valid()) {
-            const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-            int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
-            references.push_back(
-                BVHReference(bounds, j, object_index, packed_type, prev_time, curr_time));
-            root.grow(bounds);
-            center.grow(bounds.center2());
-          }
-          /* Current time boundbox becomes previous one for the
-           * next time step.
-           */
-          prev_bounds = curr_bounds;
-        }
-      }
-    }
-  }
-}
-
-void BVHBuild::add_reference_geometry(BoundBox &root,
-                                      BoundBox &center,
-                                      Geometry *geom,
-                                      int object_index)
-{
-  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
-    Mesh *mesh = static_cast<Mesh *>(geom);
-    add_reference_triangles(root, center, mesh, object_index);
-  }
-  else if (geom->geometry_type == Geometry::HAIR) {
-    Hair *hair = static_cast<Hair *>(geom);
-    add_reference_curves(root, center, hair, object_index);
-  }
-}
-
-void BVHBuild::add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i)
-{
-  references.push_back(BVHReference(ob->bounds, -1, i, 0));
-  root.grow(ob->bounds);
-  center.grow(ob->bounds.center2());
-}
-
-static size_t count_curve_segments(Hair *hair)
-{
-  size_t num = 0, num_curves = hair->num_curves();
-
-  for (size_t i = 0; i < num_curves; i++)
-    num += hair->get_curve(i).num_keys - 1;
-
-  return num;
-}
-
-static size_t count_primitives(Geometry *geom)
-{
-  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
-    Mesh *mesh = static_cast<Mesh *>(geom);
-    return mesh->num_triangles();
-  }
-  else if (geom->geometry_type == Geometry::HAIR) {
-    Hair *hair = static_cast<Hair *>(geom);
-    return count_curve_segments(hair);
-  }
-
-  return 0;
-}
-
-void BVHBuild::add_references(BVHRange &root)
-{
-  /* reserve space for references */
-  size_t num_alloc_references = 0;
-
-  foreach (Object *ob, objects) {
-    if (params.top_level) {
-      if (!ob->is_traceable()) {
-        continue;
-      }
-      if (!ob->get_geometry()->is_instanced()) {
-        num_alloc_references += count_primitives(ob->get_geometry());
-      }
-      else
-        num_alloc_references++;
-    }
-    else {
-      num_alloc_references += count_primitives(ob->get_geometry());
-    }
-  }
-
-  references.reserve(num_alloc_references);
-
-  /* add references from objects */
-  BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
-  int i = 0;
-
-  foreach (Object *ob, objects) {
-    if (params.top_level) {
-      if (!ob->is_traceable()) {
-        ++i;
-        continue;
-      }
-      if (!ob->get_geometry()->is_instanced())
-        add_reference_geometry(bounds, center, ob->get_geometry(), i);
-      else
-        add_reference_object(bounds, center, ob, i);
-    }
-    else
-      add_reference_geometry(bounds, center, ob->get_geometry(), i);
-
-    i++;
-
-    if (progress.get_cancel())
-      return;
-  }
-
-  /* happens mostly on empty meshes */
-  if (!bounds.valid())
-    bounds.grow(zero_float3());
-
-  root = BVHRange(bounds, center, 0, references.size());
-}
-
-/* Build */
-
-BVHNode *BVHBuild::run()
-{
-  BVHRange root;
-
-  /* add references */
-  add_references(root);
-
-  if (progress.get_cancel())
-    return NULL;
-
-  /* init spatial splits */
-  if (params.top_level) {
-    /* NOTE: Technically it is supported by the builder but it's not really
-     * optimized for speed yet and not really clear yet if it has measurable
-     * improvement on render time. Needs some extra investigation before
-     * enabling spatial split for top level BVH.
-     */
-    params.use_spatial_split = false;
-  }
-
-  spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
-  spatial_free_index = 0;
-
-  need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
-
-  /* init progress updates */
-  double build_start_time;
-  build_start_time = progress_start_time = time_dt();
-  progress_count = 0;
-  progress_total = references.size();
-  progress_original_total = progress_total;
-
-  prim_type.resize(references.size());
-  prim_index.resize(references.size());
-  prim_object.resize(references.size());
-  if (need_prim_time) {
-    prim_time.resize(references.size());
-  }
-  else {
-    prim_time.resize(0);
-  }
-
-  /* build recursively */
-  BVHNode *rootnode;
-
-  if (params.use_spatial_split) {
-    /* Perform multithreaded spatial split build. */
-    BVHSpatialStorage *local_storage = &spatial_storage.local();
-    rootnode = build_node(root, references, 0, local_storage);
-    task_pool.wait_work();
-  }
-  else {
-    /* Perform multithreaded binning build. */
-    BVHObjectBinning rootbin(root, (references.size()) ? &references[0] : NULL);
-    rootnode = build_node(rootbin, 0);
-    task_pool.wait_work();
-  }
-
-  /* clean up temporary memory usage by threads */
-  spatial_storage.clear();
-
-  /* delete if we canceled */
-  if (rootnode) {
-    if (progress.get_cancel()) {
-      rootnode->deleteSubtree();
-      rootnode = NULL;
-      VLOG(1) << "BVH build cancelled.";
-    }
-    else {
-      /*rotate(rootnode, 4, 5);*/
-      rootnode->update_visibility();
-      rootnode->update_time();
-    }
-    if (rootnode != NULL) {
-      VLOG(1) << "BVH build statistics:\n"
-              << "  Build time: " << time_dt() - build_start_time << "\n"
-              << "  Total number of nodes: "
-              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT))
-              << "\n"
-              << "  Number of inner nodes: "
-              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT))
-              << "\n"
-              << "  Number of leaf nodes: "
-              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT))
-              << "\n"
-              << "  Number of unaligned nodes: "
-              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))
-              << "\n"
-              << "  Allocation slop factor: "
-              << ((prim_type.capacity() != 0) ? (float)prim_type.size() / prim_type.capacity() :
-                                                1.0f)
-              << "\n"
-              << "  Maximum depth: "
-              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH)) << "\n";
-    }
-  }
-
-  return rootnode;
-}
-
-void BVHBuild::progress_update()
-{
-  if (time_dt() - progress_start_time < 0.25)
-    return;
-
-  double progress_start = (double)progress_count / (double)progress_total;
-  double duplicates = (double)(progress_total - progress_original_total) / (double)progress_total;
-
-  string msg = string_printf(
-      "Building BVH %.0f%%, duplicates %.0f%%", progress_start * 100.0, duplicates * 100.0);
-
-  progress.set_substatus(msg);
-  progress_start_time = time_dt();
-}
-
-void BVHBuild::thread_build_node(InnerNode *inner,
-                                 int child,
-                                 const BVHObjectBinning &range,
-                                 int level)
-{
-  if (progress.get_cancel())
-    return;
-
-  /* build nodes */
-  BVHNode *node = build_node(range, level);
-
-  /* set child in inner node */
-  inner->children[child] = node;
-
-  /* update progress */
-  if (range.size() < THREAD_TASK_SIZE) {
-    /*rotate(node, INT_MAX, 5);*/
-
-    thread_scoped_lock lock(build_mutex);
-
-    progress_count += range.size();
-    progress_update();
-  }
-}
-
-void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
-                                               int child,
-                                               const BVHRange &range,
-                                               vector<BVHReference> &references,
-                                               int level)
-{
-  if (progress.get_cancel()) {
-    return;
-  }
-
-  /* Get per-thread memory for spatial split. */
-  BVHSpatialStorage *local_storage = &spatial_storage.local();
-
-  /* build nodes */
-  BVHNode *node = build_node(range, references, level, local_storage);
-
-  /* set child in inner node */
-  inner->children[child] = node;
-}
-
-bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
-                                          const vector<BVHReference> &references) const
-{
-  size_t size = range.size();
-  size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
-
-  if (size > max_leaf_size)
-    return false;
-
-  size_t num_triangles = 0;
-  size_t num_motion_triangles = 0;
-  size_t num_curves = 0;
-  size_t num_motion_curves = 0;
-
-  for (int i = 0; i < size; i++) {
-    const BVHReference &ref = references[range.start() + i];
-
-    if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
-      if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
-        num_motion_curves++;
-      }
-      else {
-        num_curves++;
-      }
-    }
-    else if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
-      if (ref.prim_type() & PRIMITIVE_ALL_MOTION) {
-        num_motion_triangles++;
-      }
-      else {
-        num_triangles++;
-      }
-    }
-  }
-
-  return (num_triangles <= params.max_triangle_leaf_size) &&
-         (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
-         (num_curves <= params.max_curve_leaf_size) &&
-         (num_motion_curves <= params.max_motion_curve_leaf_size);
-}
-
-/* multithreaded binning builder */
-BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
-{
-  size_t size = range.size();
-  float leafSAH = params.sah_primitive_cost * range.leafSAH;
-  float splitSAH = params.sah_node_cost * range.bounds().half_area() +
-                   params.sah_primitive_cost * range.splitSAH;
-
-  /* Have at least one inner node on top level, for performance and correct
-   * visibility tests, since object instances do not check visibility flag.
-   */
-  if (!(range.size() > 0 && params.top_level && level == 0)) {
-    /* Make leaf node when threshold reached or SAH tells us. */
-    if ((params.small_enough_for_leaf(size, level)) ||
-        (range_within_max_leaf_size(range, references) && leafSAH < splitSAH)) {
-      return create_leaf_node(range, references);
-    }
-  }
-
-  BVHObjectBinning unaligned_range;
-  float unalignedSplitSAH = FLT_MAX;
-  float unalignedLeafSAH = FLT_MAX;
-  Transform aligned_space;
-  bool do_unalinged_split = false;
-  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
-    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references[0]);
-    unaligned_range = BVHObjectBinning(
-        range, &references[0], &unaligned_heuristic, &aligned_space);
-    unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
-                        params.sah_primitive_cost * unaligned_range.splitSAH;
-    unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
-    if (!(range.size() > 0 && params.top_level && level == 0)) {
-      if (unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
-          range_within_max_leaf_size(range, references)) {
-        return create_leaf_node(range, references);
-      }
-    }
-    /* Check whether unaligned split is better than the regular one. */
-    if (unalignedSplitSAH < splitSAH) {
-      do_unalinged_split = true;
-    }
-  }
-
-  /* Perform split. */
-  BVHObjectBinning left, right;
-  if (do_unalinged_split) {
-    unaligned_range.split(&references[0], left, right);
-  }
-  else {
-    range.split(&references[0], left, right);
-  }
-
-  BoundBox bounds;
-  if (do_unalinged_split) {
-    bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references[0], aligned_space);
-  }
-  else {
-    bounds = range.bounds();
-  }
-
-  /* Create inner node. */
-  InnerNode *inner;
-  if (range.size() < THREAD_TASK_SIZE) {
-    /* local build */
-    BVHNode *leftnode = build_node(left, level + 1);
-    BVHNode *rightnode = build_node(right, level + 1);
-
-    inner = new InnerNode(bounds, leftnode, rightnode);
-  }
-  else {
-    /* Threaded build */
-    inner = new InnerNode(bounds);
-
-    task_pool.push([=] { thread_build_node(inner, 0, left, level + 1); });
-    task_pool.push([=] { thread_build_node(inner, 1, right, level + 1); });
-  }
-
-  if (do_unalinged_split) {
-    inner->set_aligned_space(aligned_space);
-  }
-
-  return inner;
-}
-
-/* multithreaded spatial split builder */
-BVHNode *BVHBuild::build_node(const BVHRange &range,
-                              vector<BVHReference> &references,
-                              int level,
-                              BVHSpatialStorage *storage)
-{
-  /* Update progress.
-   *
-   * TODO(sergey): Currently it matches old behavior, but we can move it to the
-   * task thread (which will mimic non=split builder) and save some CPU ticks
-   * on checking cancel status.
-   */
-  progress_update();
-  if (progress.get_cancel()) {
-    return NULL;
-  }
-
-  /* Small enough or too deep => create leaf. */
-  if (!(range.size() > 0 && params.top_level && level == 0)) {
-    if (params.small_enough_for_leaf(range.size(), level)) {
-      progress_count += range.size();
-      return create_leaf_node(range, references);
-    }
-  }
-
-  /* Perform splitting test. */
-  BVHMixedSplit split(this, storage, range, references, level);
-
-  if (!(range.size() > 0 && params.top_level && level == 0)) {
-    if (split.no_split) {
-      progress_count += range.size();
-      return create_leaf_node(range, references);
-    }
-  }
-  float leafSAH = params.sah_primitive_cost * split.leafSAH;
-  float splitSAH = params.sah_node_cost * range.bounds().half_area() +
-                   params.sah_primitive_cost * split.nodeSAH;
-
-  BVHMixedSplit unaligned_split;
-  float unalignedSplitSAH = FLT_MAX;
-  /* float unalignedLeafSAH = FLT_MAX; */
-  Transform aligned_space;
-  bool do_unalinged_split = false;
-  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
-    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references.at(0));
-    unaligned_split = BVHMixedSplit(
-        this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
-    /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
-    unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
-                        params.sah_primitive_cost * unaligned_split.nodeSAH;
-    /* TOOD(sergey): Check we can create leaf already. */
-    /* Check whether unaligned split is better than the regular one. */
-    if (unalignedSplitSAH < splitSAH) {
-      do_unalinged_split = true;
-    }
-  }
-
-  /* Do split. */
-  BVHRange left, right;
-  if (do_unalinged_split) {
-    unaligned_split.split(this, left, right, range);
-  }
-  else {
-    split.split(this, left, right, range);
-  }
-
-  progress_total += left.size() + right.size() - range.size();
-
-  BoundBox bounds;
-  if (do_unalinged_split) {
-    bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references.at(0), aligned_space);
-  }
-  else {
-    bounds = range.bounds();
-  }
-
-  /* Create inner node. */
-  InnerNode *inner;
-  if (range.size() < THREAD_TASK_SIZE) {
-    /* Local build. */
-
-    /* Build left node. */
-    vector<BVHReference> right_references(references.begin() + right.start(),
-                                          references.begin() + right.end());
-    right.set_start(0);
-
-    BVHNode *leftnode = build_node(left, references, level + 1, storage);
-
-    /* Build right node. */
-    BVHNode *rightnode = build_node(right, right_references, level + 1, storage);
-
-    inner = new InnerNode(bounds, leftnode, rightnode);
-  }
-  else {
-    /* Threaded build. */
-    inner = new InnerNode(bounds);
-
-    vector<BVHReference> left_references(references.begin() + left.start(),
-                                         references.begin() + left.end());
-    vector<BVHReference> right_references(references.begin() + right.start(),
-                                          references.begin() + right.end());
-    right.set_start(0);
-
-    /* Create tasks for left and right nodes, using copy for most arguments and
-     * move for reference to avoid memory copies. */
-    task_pool.push([=, refs = std::move(left_references)]() mutable {
-      thread_build_spatial_split_node(inner, 0, left, refs, level + 1);
-    });
-    task_pool.push([=, refs = std::move(right_references)]() mutable {
-      thread_build_spatial_split_node(inner, 1, right, refs, level + 1);
-    });
-  }
-
-  if (do_unalinged_split) {
-    inner->set_aligned_space(aligned_space);
-  }
-
-  return inner;
-}
-
-/* Create Nodes */
-
-BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
-{
-  if (num == 0) {
-    BoundBox bounds = BoundBox::empty;
-    return new LeafNode(bounds, 0, 0, 0);
-  }
-  else if (num == 1) {
-    assert(start < prim_type.size());
-    prim_type[start] = ref->prim_type();
-    prim_index[start] = ref->prim_index();
-    prim_object[start] = ref->prim_object();
-    if (need_prim_time) {
-      prim_time[start] = make_float2(ref->time_from(), ref->time_to());
-    }
-
-    const uint visibility = objects[ref->prim_object()]->visibility_for_tracing();
-    BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start + 1);
-    leaf_node->time_from = ref->time_from();
-    leaf_node->time_to = ref->time_to();
-    return leaf_node;
-  }
-  else {
-    int mid = num / 2;
-    BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
-    BVHNode *leaf1 = create_object_leaf_nodes(ref + mid, start + mid, num - mid);
-
-    BoundBox bounds = BoundBox::empty;
-    bounds.grow(leaf0->bounds);
-    bounds.grow(leaf1->bounds);
-
-    BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
-    inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
-    inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
-    return inner_node;
-  }
-}
-
-BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHReference> &references)
-{
-  /* This is a bit overallocating here (considering leaf size into account),
-   * but chunk-based re-allocation in vector makes it difficult to use small
-   * size of stack storage here. Some tweaks are possible tho.
-   *
-   * NOTES:
-   *  - If the size is too big, we'll have inefficient stack usage,
-   *    and lots of cache misses.
-   *  - If the size is too small, then we can run out of memory
-   *    allowed to be used by vector.
-   *    In practice it wouldn't mean crash, just allocator will fallback
-   *    to heap which is slower.
-   *  - Optimistic re-allocation in STL could jump us out of stack usage
-   *    because re-allocation happens in chunks and size of those chunks we
-   *    can not control.
-   */
-  typedef StackAllocator<256, int> LeafStackAllocator;
-  typedef StackAllocator<256, float2> LeafTimeStackAllocator;
-  typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
-
-  vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM];
-  vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM];
-  vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM];
-  vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM];
-  vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM];
-
-  /* TODO(sergey): In theory we should be able to store references. */
-  vector<BVHReference, LeafReferenceStackAllocator> object_references;
-
-  uint visibility[PRIMITIVE_NUM] = {0};
-  /* NOTE: Keep initialization in sync with actual number of primitives. */
-  BoundBox bounds[PRIMITIVE_NUM] = {
-      BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
-  int ob_num = 0;
-  int num_new_prims = 0;
-  /* Fill in per-type type/index array. */
-  for (int i = 0; i < range.size(); i++) {
-    const BVHReference &ref = references[range.start() + i];
-    if (ref.prim_index() != -1) {
-      uint32_t type_index = bitscan((uint32_t)(ref.prim_type() & PRIMITIVE_ALL));
-      p_ref[type_index].push_back(ref);
-      p_type[type_index].push_back(ref.prim_type());
-      p_index[type_index].push_back(ref.prim_index());
-      p_object[type_index].push_back(ref.prim_object());
-      p_time[type_index].push_back(make_float2(ref.time_from(), ref.time_to()));
-
-      bounds[type_index].grow(ref.bounds());
-      visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
-      ++num_new_prims;
-    }
-    else {
-      object_references.push_back(ref);
-      ++ob_num;
-    }
-  }
-
-  /* Create leaf nodes for every existing primitive.
-   *
-   * Here we write primitive types, indices and objects to a temporary array.
-   * This way we keep all the heavy memory allocation code outside of the
-   * thread lock in the case of spatial split building.
-   *
-   * TODO(sergey): With some pointer trickery we can write directly to the
-   * destination buffers for the non-spatial split BVH.
-   */
-  BVHNode *leaves[PRIMITIVE_NUM + 1] = {NULL};
-  int num_leaves = 0;
-  size_t start_index = 0;
-  vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object;
-  vector<float2, LeafTimeStackAllocator> local_prim_time;
-  local_prim_type.resize(num_new_prims);
-  local_prim_index.resize(num_new_prims);
-  local_prim_object.resize(num_new_prims);
-  if (need_prim_time) {
-    local_prim_time.resize(num_new_prims);
-  }
-  for (int i = 0; i < PRIMITIVE_NUM; ++i) {
-    int num = (int)p_type[i].size();
-    if (num != 0) {
-      assert(p_type[i].size() == p_index[i].size());
-      assert(p_type[i].size() == p_object[i].size());
-      Transform aligned_space;
-      bool alignment_found = false;
-      for (int j = 0; j < num; ++j) {
-        const int index = start_index + j;
-        local_prim_type[index] = p_type[i][j];
-        local_prim_index[index] = p_index[i][j];
-        local_prim_object[index] = p_object[i][j];
-        if (need_prim_time) {
-          local_prim_time[index] = p_time[i][j];
-        }
-        if (params.use_unaligned_nodes && !alignment_found) {
-          alignment_found = unaligned_heuristic.compute_aligned_space(p_ref[i][j], &aligned_space);
-        }
-      }
-      LeafNode *leaf_node = new LeafNode(bounds[i], visibility[i], start_index, start_index + num);
-      if (true) {
-        float time_from = 1.0f, time_to = 0.0f;
-        for (int j = 0; j < num; ++j) {
-          const BVHReference &ref = p_ref[i][j];
-          time_from = min(time_from, ref.time_from());
-          time_to = max(time_to, ref.time_to());
-        }
-        leaf_node->time_from = time_from;
-        leaf_node->time_to = time_to;
-      }
-      if (alignment_found) {
-        /* Need to recalculate leaf bounds with new alignment. */
-        leaf_node->bounds = BoundBox::empty;
-        for (int j = 0; j < num; ++j) {
-          const BVHReference &ref = p_ref[i][j];
-          BoundBox ref_bounds = unaligned_heuristic.compute_aligned_prim_boundbox(ref,
-                                                                                  aligned_space);
-          leaf_node->bounds.grow(ref_bounds);
-        }
-        /* Set alignment space. */
-        leaf_node->set_aligned_space(aligned_space);
-      }
-      leaves[num_leaves++] = leaf_node;
-      start_index += num;
-    }
-  }
-  /* Get size of new data to be copied to the packed arrays. */
-  const int num_new_leaf_data = start_index;
-  const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
-  /* Copy actual data to the packed array. */
-  if (params.use_spatial_split) {
-    spatial_spin_lock.lock();
-    /* We use first free index in the packed arrays and mode pointer to the
-     * end of the current range.
-     *
-     * This doesn't give deterministic packed arrays, but it shouldn't really
-     * matter because order of children in BVH is deterministic.
-     */
-    start_index = spatial_free_index;
-    spatial_free_index += range.size();
-    /* Extend an array when needed. */
-    const size_t range_end = start_index + range.size();
-    if (prim_type.size() < range_end) {
-      /* Avoid extra re-allocations by pre-allocating bigger array in an
-       * advance.
-       */
-      if (range_end >= prim_type.capacity()) {
-        float progress = (float)progress_count / (float)progress_total;
-        float factor = (1.0f - progress);
-        const size_t reserve = (size_t)(range_end + (float)range_end * factor);
-        prim_type.reserve(reserve);
-        prim_index.reserve(reserve);
-        prim_object.reserve(reserve);
-        if (need_prim_time) {
-          prim_time.reserve(reserve);
-        }
-      }
-
-      prim_type.resize(range_end);
-      prim_index.resize(range_end);
-      prim_object.resize(range_end);
-      if (need_prim_time) {
-        prim_time.resize(range_end);
-      }
-    }
-    /* Perform actual data copy. */
-    if (new_leaf_data_size > 0) {
-      memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
-      memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
-      memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
-      if (need_prim_time) {
-        memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data);
-      }
-    }
-    spatial_spin_lock.unlock();
-  }
-  else {
-    /* For the regular BVH builder we simply copy new data starting at the
-     * range start. This is totally thread-safe, all threads are living
-     * inside of their own range.
-     */
-    start_index = range.start();
-    if (new_leaf_data_size > 0) {
-      memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
-      memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
-      memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
-      if (need_prim_time) {
-        memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data);
-      }
-    }
-  }
-
-  /* So far leaves were created with the zero-based index in an arrays,
-   * here we modify the indices to correspond to actual packed array start
-   * index.
-   */
-  for (int i = 0; i < num_leaves; ++i) {
-    LeafNode *leaf = (LeafNode *)leaves[i];
-    leaf->lo += start_index;
-    leaf->hi += start_index;
-  }
-
-  /* Create leaf node for object. */
-  if (num_leaves == 0 || ob_num) {
-    /* Only create object leaf nodes if there are objects or no other
-     * nodes created.
-     */
-    const BVHReference *ref = (ob_num) ? &object_references[0] : NULL;
-    leaves[num_leaves] = create_object_leaf_nodes(ref, start_index + num_new_leaf_data, ob_num);
-    ++num_leaves;
-  }
-
-  /* TODO(sergey): Need to take care of alignment when number of leaves
-   * is more than 1.
-   */
-  if (num_leaves == 1) {
-    /* Simplest case: single leaf, just return it.
-     * In all the rest cases we'll be creating intermediate inner node with
-     * an appropriate bounding box.
-     */
-    return leaves[0];
-  }
-  else if (num_leaves == 2) {
-    return new InnerNode(range.bounds(), leaves[0], leaves[1]);
-  }
-  else if (num_leaves == 3) {
-    BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
-    BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
-    return new InnerNode(range.bounds(), leaves[0], inner);
-  }
-  else {
-    /* Should be doing more branches if more primitive types added. */
-    assert(num_leaves <= 5);
-    BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
-    BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
-    BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
-    BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
-    BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
-    BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
-    if (num_leaves == 5) {
-      return new InnerNode(range.bounds(), inner_c, leaves[4]);
-    }
-    return inner_c;
-  }
-
-#undef MAX_ITEMS_PER_LEAF
-}
-
-/* Tree Rotations */
-
-void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
-{
-  /* in tested scenes, this resulted in slightly slower raytracing, so disabled
-   * it for now. could be implementation bug, or depend on the scene */
-  if (node)
-    for (int i = 0; i < iterations; i++)
-      rotate(node, max_depth);
-}
-
-void BVHBuild::rotate(BVHNode *node, int max_depth)
-{
-  /* nothing to rotate if we reached a leaf node. */
-  if (node->is_leaf() || max_depth < 0)
-    return;
-
-  InnerNode *parent = (InnerNode *)node;
-
-  /* rotate all children first */
-  for (size_t c = 0; c < 2; c++)
-    rotate(parent->children[c], max_depth - 1);
-
-  /* compute current area of all children */
-  BoundBox bounds0 = parent->children[0]->bounds;
-  BoundBox bounds1 = parent->children[1]->bounds;
-
-  float area0 = bounds0.half_area();
-  float area1 = bounds1.half_area();
-  float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
-
-  /* find best rotation. we pick a target child of a first child, and swap
-   * this with an other child. we perform the best such swap. */
-  float best_cost = FLT_MAX;
-  int best_child = -1, best_target = -1, best_other = -1;
-
-  for (size_t c = 0; c < 2; c++) {
-    /* ignore leaf nodes as we cannot descent into */
-    if (parent->children[c]->is_leaf())
-      continue;
-
-    InnerNode *child = (InnerNode *)parent->children[c];
-    BoundBox &other = (c == 0) ? bounds1 : bounds0;
-
-    /* transpose child bounds */
-    BoundBox target0 = child->children[0]->bounds;
-    BoundBox target1 = child->children[1]->bounds;
-
-    /* compute cost for both possible swaps */
-    float cost0 = merge(other, target1).half_area() - child_area[c];
-    float cost1 = merge(target0, other).half_area() - child_area[c];
-
-    if (min(cost0, cost1) < best_cost) {
-      best_child = (int)c;
-      best_other = (int)(1 - c);
-
-      if (cost0 < cost1) {
-        best_cost = cost0;
-        best_target = 0;
-      }
-      else {
-        best_cost = cost0;
-        best_target = 1;
-      }
-    }
-  }
-
-  /* if we did not find a swap that improves the SAH then do nothing */
-  if (best_cost >= 0)
-    return;
-
-  assert(best_child == 0 || best_child == 1);
-  assert(best_target != -1);
-
-  /* perform the best found tree rotation */
-  InnerNode *child = (InnerNode *)parent->children[best_child];
-
-  swap(parent->children[best_other], child->children[best_target]);
-  child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
deleted file mode 100644
index c35af083fbd..00000000000
--- a/intern/cycles/bvh/bvh_build.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_BUILD_H__
-#define __BVH_BUILD_H__
-
-#include <float.h>
-
-#include "bvh/bvh_params.h"
-#include "bvh/bvh_unaligned.h"
-
-#include "util/util_array.h"
-#include "util/util_task.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Boundbox;
-class BVHBuildTask;
-class BVHNode;
-class BVHSpatialSplitBuildTask;
-class BVHParams;
-class InnerNode;
-class Geometry;
-class Hair;
-class Mesh;
-class Object;
-class Progress;
-
-/* BVH Builder */
-
-class BVHBuild {
- public:
-  /* Constructor/Destructor */
-  BVHBuild(const vector<Object *> &objects,
-           array<int> &prim_type,
-           array<int> &prim_index,
-           array<int> &prim_object,
-           array<float2> &prim_time,
-           const BVHParams &params,
-           Progress &progress);
-  ~BVHBuild();
-
-  BVHNode *run();
-
- protected:
-  friend class BVHMixedSplit;
-  friend class BVHObjectSplit;
-  friend class BVHSpatialSplit;
-  friend class BVHBuildTask;
-  friend class BVHSpatialSplitBuildTask;
-  friend class BVHObjectBinning;
-
-  /* Adding references. */
-  void add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i);
-  void add_reference_curves(BoundBox &root, BoundBox &center, Hair *hair, int i);
-  void add_reference_geometry(BoundBox &root, BoundBox &center, Geometry *geom, int i);
-  void add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i);
-  void add_references(BVHRange &root);
-
-  /* Building. */
-  BVHNode *build_node(const BVHRange &range,
-                      vector<BVHReference> &references,
-                      int level,
-                      BVHSpatialStorage *storage);
-  BVHNode *build_node(const BVHObjectBinning &range, int level);
-  BVHNode *create_leaf_node(const BVHRange &range, const vector<BVHReference> &references);
-  BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
-
-  bool range_within_max_leaf_size(const BVHRange &range,
-                                  const vector<BVHReference> &references) const;
-
-  /* Threads. */
-  enum { THREAD_TASK_SIZE = 4096 };
-  void thread_build_node(InnerNode *node, int child, const BVHObjectBinning &range, int level);
-  void thread_build_spatial_split_node(InnerNode *node,
-                                       int child,
-                                       const BVHRange &range,
-                                       vector<BVHReference> &references,
-                                       int level);
-  thread_mutex build_mutex;
-
-  /* Progress. */
-  void progress_update();
-
-  /* Tree rotations. */
-  void rotate(BVHNode *node, int max_depth);
-  void rotate(BVHNode *node, int max_depth, int iterations);
-
-  /* Objects and primitive references. */
-  vector<Object *> objects;
-  vector<BVHReference> references;
-  int num_original_references;
-
-  /* Output primitive indexes and objects. */
-  array<int> &prim_type;
-  array<int> &prim_index;
-  array<int> &prim_object;
-  array<float2> &prim_time;
-
-  bool need_prim_time;
-
-  /* Build parameters. */
-  BVHParams params;
-
-  /* Progress reporting. */
-  Progress &progress;
-  double progress_start_time;
-  size_t progress_count;
-  size_t progress_total;
-  size_t progress_original_total;
-
-  /* Spatial splitting. */
-  float spatial_min_overlap;
-  enumerable_thread_specific<BVHSpatialStorage> spatial_storage;
-  size_t spatial_free_index;
-  thread_spin_lock spatial_spin_lock;
-
-  /* Threads. */
-  TaskPool task_pool;
-
-  /* Unaligned building. */
-  BVHUnaligned unaligned_heuristic;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_BUILD_H__ */
diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
deleted file mode 100644
index 59a72f27294..00000000000
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- * Copyright 2018, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This class implements a ray accelerator for Cycles using Intel's Embree library.
- * It supports triangles, curves, object and deformation blur and instancing.
- *
- * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are
- * mapped to Embree IDs by multiplying by two and adding one for curves.
- *
- * This implementation shares RTCDevices between Cycles instances. Eventually each instance should
- * get a separate RTCDevice to correctly keep track of memory usage.
- *
- * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be
- * merged, which would require changes to intersection refinement, shader setup, mesh light
- * sampling and a few other places in Cycles where direct access to vertex data is required.
- */
-
-#ifdef WITH_EMBREE
-
-#  include <embree3/rtcore_geometry.h>
-
-#  include "bvh/bvh_embree.h"
-
-/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
- */
-#  include "kernel/bvh/bvh_embree.h"
-#  include "kernel/bvh/bvh_util.h"
-#  include "kernel/device/cpu/compat.h"
-#  include "kernel/device/cpu/globals.h"
-#  include "kernel/sample/sample_lcg.h"
-
-#  include "scene/hair.h"
-#  include "scene/mesh.h"
-#  include "scene/object.h"
-
-#  include "util/util_foreach.h"
-#  include "util/util_logging.h"
-#  include "util/util_progress.h"
-#  include "util/util_stats.h"
-
-CCL_NAMESPACE_BEGIN
-
-static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT,
-              "Object and Embree max motion steps inconsistent");
-static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
-              "Object and Geometry max motion steps inconsistent");
-
-#  define IS_HAIR(x) (x & 1)
-
-/* This gets called by Embree at every valid ray/object intersection.
- * Things like recording subsurface or shadow hits for later evaluation
- * as well as filtering for volume objects happen here.
- * Cycles' own BVH does that directly inside the traversal calls.
- */
-static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
-{
-  /* Current implementation in Cycles assumes only single-ray intersection queries. */
-  assert(args->N == 1);
-
-  const RTCRay *ray = (RTCRay *)args->ray;
-  RTCHit *hit = (RTCHit *)args->hit;
-  CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
-  const KernelGlobalsCPU *kg = ctx->kg;
-
-  switch (ctx->type) {
-    case CCLIntersectContext::RAY_SHADOW_ALL: {
-      Intersection current_isect;
-      kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-
-      /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
-      const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
-      if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
-        ctx->opaque_hit = true;
-        return;
-      }
-
-      ++ctx->num_hits;
-
-      /* Always use baked shadow transparency for curves. */
-      if (current_isect.type & PRIMITIVE_ALL_CURVE) {
-        ctx->throughput *= intersection_curve_shadow_transparency(
-            kg, current_isect.object, current_isect.prim, current_isect.u);
-
-        if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
-          ctx->opaque_hit = true;
-          return;
-        }
-        else {
-          *args->valid = 0;
-          return;
-        }
-      }
-
-      /* Test if we need to record this transparent intersection. */
-      const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
-      if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
-        /* If maximum number of hits was reached, replace the intersection with the
-         * highest distance. We want to find the N closest intersections. */
-        const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
-        uint isect_index = num_recorded_hits;
-        if (num_recorded_hits + 1 >= max_record_hits) {
-          float max_t = ctx->isect_s[0].t;
-          uint max_recorded_hit = 0;
-
-          for (uint i = 1; i < num_recorded_hits; ++i) {
-            if (ctx->isect_s[i].t > max_t) {
-              max_recorded_hit = i;
-              max_t = ctx->isect_s[i].t;
-            }
-          }
-
-          if (num_recorded_hits >= max_record_hits) {
-            isect_index = max_recorded_hit;
-          }
-
-          /* Limit the ray distance and stop counting hits beyond this.
-           * TODO: is there some way we can tell Embree to stop intersecting beyond
-           * this distance when max number of hits is reached?. Or maybe it will
-           * become irrelevant if we make max_hits a very high number on the CPU. */
-          ctx->max_t = max(current_isect.t, max_t);
-        }
-
-        ctx->isect_s[isect_index] = current_isect;
-      }
-
-      /* Always increase the number of recorded hits, even beyond the maximum,
-       * so that we can detect this and trace another ray if needed. */
-      ++ctx->num_recorded_hits;
-
-      /* This tells Embree to continue tracing. */
-      *args->valid = 0;
-      break;
-    }
-    case CCLIntersectContext::RAY_LOCAL:
-    case CCLIntersectContext::RAY_SSS: {
-      /* Check if it's hitting the correct object. */
-      Intersection current_isect;
-      if (ctx->type == CCLIntersectContext::RAY_SSS) {
-        kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
-      }
-      else {
-        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        if (ctx->local_object_id != current_isect.object) {
-          /* This tells Embree to continue tracing. */
-          *args->valid = 0;
-          break;
-        }
-      }
-
-      /* No intersection information requested, just return a hit. */
-      if (ctx->max_hits == 0) {
-        break;
-      }
-
-      /* Ignore curves. */
-      if (IS_HAIR(hit->geomID)) {
-        /* This tells Embree to continue tracing. */
-        *args->valid = 0;
-        break;
-      }
-
-      LocalIntersection *local_isect = ctx->local_isect;
-      int hit_idx = 0;
-
-      if (ctx->lcg_state) {
-        /* See triangle_intersect_subsurface() for the native equivalent. */
-        for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
-          if (local_isect->hits[i].t == ray->tfar) {
-            /* This tells Embree to continue tracing. */
-            *args->valid = 0;
-            return;
-          }
-        }
-
-        local_isect->num_hits++;
-
-        if (local_isect->num_hits <= ctx->max_hits) {
-          hit_idx = local_isect->num_hits - 1;
-        }
-        else {
-          /* reservoir sampling: if we are at the maximum number of
-           * hits, randomly replace element or skip it */
-          hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
-
-          if (hit_idx >= ctx->max_hits) {
-            /* This tells Embree to continue tracing. */
-            *args->valid = 0;
-            return;
-          }
-        }
-      }
-      else {
-        /* Record closest intersection only. */
-        if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
-          *args->valid = 0;
-          return;
-        }
-
-        local_isect->num_hits = 1;
-      }
-
-      /* record intersection */
-      local_isect->hits[hit_idx] = current_isect;
-      local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
-      /* This tells Embree to continue tracing. */
-      *args->valid = 0;
-      break;
-    }
-    case CCLIntersectContext::RAY_VOLUME_ALL: {
-      /* Append the intersection to the end of the array. */
-      if (ctx->num_hits < ctx->max_hits) {
-        Intersection current_isect;
-        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
-        Intersection *isect = &ctx->isect_s[ctx->num_hits];
-        ++ctx->num_hits;
-        *isect = current_isect;
-        /* Only primitives from volume object. */
-        uint tri_object = isect->object;
-        int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-        if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-          --ctx->num_hits;
-        }
-        /* This tells Embree to continue tracing. */
-        *args->valid = 0;
-        break;
-      }
-    }
-    case CCLIntersectContext::RAY_REGULAR:
-    default:
-      /* Nothing to do here. */
-      break;
-  }
-}
-
-static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
-{
-  const RTCRay *ray = (RTCRay *)args->ray;
-  RTCHit *hit = (RTCHit *)args->hit;
-
-  /* Always ignore back-facing intersections. */
-  if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
-          make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
-    *args->valid = 0;
-    return;
-  }
-}
-
-static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
-{
-  const RTCRay *ray = (RTCRay *)args->ray;
-  RTCHit *hit = (RTCHit *)args->hit;
-
-  /* Always ignore back-facing intersections. */
-  if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
-          make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
-    *args->valid = 0;
-    return;
-  }
-
-  rtc_filter_occluded_func(args);
-}
-
-static size_t unaccounted_mem = 0;
-
-static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
-{
-  Stats *stats = (Stats *)userPtr;
-  if (stats) {
-    if (bytes > 0) {
-      stats->mem_alloc(bytes);
-    }
-    else {
-      stats->mem_free(-bytes);
-    }
-  }
-  else {
-    /* A stats pointer may not yet be available. Keep track of the memory usage for later. */
-    if (bytes >= 0) {
-      atomic_add_and_fetch_z(&unaccounted_mem, bytes);
-    }
-    else {
-      atomic_sub_and_fetch_z(&unaccounted_mem, -bytes);
-    }
-  }
-  return true;
-}
-
-static void rtc_error_func(void *, enum RTCError, const char *str)
-{
-  VLOG(1) << str;
-}
-
-static double progress_start_time = 0.0f;
-
-static bool rtc_progress_func(void *user_ptr, const double n)
-{
-  Progress *progress = (Progress *)user_ptr;
-
-  if (time_dt() - progress_start_time < 0.25) {
-    return true;
-  }
-
-  string msg = string_printf("Building BVH %.0f%%", n * 100.0);
-  progress->set_substatus(msg);
-  progress_start_time = time_dt();
-
-  return !progress->get_cancel();
-}
-
-BVHEmbree::BVHEmbree(const BVHParams &params_,
-                     const vector<Geometry *> &geometry_,
-                     const vector<Object *> &objects_)
-    : BVH(params_, geometry_, objects_),
-      scene(NULL),
-      rtc_device(NULL),
-      build_quality(RTC_BUILD_QUALITY_REFIT)
-{
-  SIMD_SET_FLUSH_TO_ZERO;
-}
-
-BVHEmbree::~BVHEmbree()
-{
-  if (scene) {
-    rtcReleaseScene(scene);
-  }
-}
-
-void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
-{
-  rtc_device = rtc_device_;
-  assert(rtc_device);
-
-  rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
-  rtcSetDeviceMemoryMonitorFunction(rtc_device, rtc_memory_monitor_func, stats);
-
-  progress.set_substatus("Building BVH");
-
-  if (scene) {
-    rtcReleaseScene(scene);
-    scene = NULL;
-  }
-
-  const bool dynamic = params.bvh_type == BVH_TYPE_DYNAMIC;
-
-  scene = rtcNewScene(rtc_device);
-  const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
-                                    RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST;
-  rtcSetSceneFlags(scene, scene_flags);
-  build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
-                            (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH :
-                                                        RTC_BUILD_QUALITY_MEDIUM);
-  rtcSetSceneBuildQuality(scene, build_quality);
-
-  int i = 0;
-  foreach (Object *ob, objects) {
-    if (params.top_level) {
-      if (!ob->is_traceable()) {
-        ++i;
-        continue;
-      }
-      if (!ob->get_geometry()->is_instanced()) {
-        add_object(ob, i);
-      }
-      else {
-        add_instance(ob, i);
-      }
-    }
-    else {
-      add_object(ob, i);
-    }
-    ++i;
-    if (progress.get_cancel())
-      return;
-  }
-
-  if (progress.get_cancel()) {
-    return;
-  }
-
-  rtcSetSceneProgressMonitorFunction(scene, rtc_progress_func, &progress);
-  rtcCommitScene(scene);
-}
-
-void BVHEmbree::add_object(Object *ob, int i)
-{
-  Geometry *geom = ob->get_geometry();
-
-  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
-    Mesh *mesh = static_cast<Mesh *>(geom);
-    if (mesh->num_triangles() > 0) {
-      add_triangles(ob, mesh, i);
-    }
-  }
-  else if (geom->geometry_type == Geometry::HAIR) {
-    Hair *hair = static_cast<Hair *>(geom);
-    if (hair->num_curves() > 0) {
-      add_curves(ob, hair, i);
-    }
-  }
-}
-
-void BVHEmbree::add_instance(Object *ob, int i)
-{
-  BVHEmbree *instance_bvh = (BVHEmbree *)(ob->get_geometry()->bvh);
-  assert(instance_bvh != NULL);
-
-  const size_t num_object_motion_steps = ob->use_motion() ? ob->get_motion().size() : 1;
-  const size_t num_motion_steps = min(num_object_motion_steps, RTC_MAX_TIME_STEP_COUNT);
-  assert(num_object_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
-
-  RTCGeometry geom_id = rtcNewGeometry(rtc_device, RTC_GEOMETRY_TYPE_INSTANCE);
-  rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
-  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
-
-  if (ob->use_motion()) {
-    array<DecomposedTransform> decomp(ob->get_motion().size());
-    transform_motion_decompose(decomp.data(), ob->get_motion().data(), ob->get_motion().size());
-    for (size_t step = 0; step < num_motion_steps; ++step) {
-      RTCQuaternionDecomposition rtc_decomp;
-      rtcInitQuaternionDecomposition(&rtc_decomp);
-      rtcQuaternionDecompositionSetQuaternion(
-          &rtc_decomp, decomp[step].x.w, decomp[step].x.x, decomp[step].x.y, decomp[step].x.z);
-      rtcQuaternionDecompositionSetScale(
-          &rtc_decomp, decomp[step].y.w, decomp[step].z.w, decomp[step].w.w);
-      rtcQuaternionDecompositionSetTranslation(
-          &rtc_decomp, decomp[step].y.x, decomp[step].y.y, decomp[step].y.z);
-      rtcQuaternionDecompositionSetSkew(
-          &rtc_decomp, decomp[step].z.x, decomp[step].z.y, decomp[step].w.x);
-      rtcSetGeometryTransformQuaternion(geom_id, step, &rtc_decomp);
-    }
-  }
-  else {
-    rtcSetGeometryTransform(
-        geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->get_tfm());
-  }
-
-  rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene);
-  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
-
-  rtcCommitGeometry(geom_id);
-  rtcAttachGeometryByID(scene, geom_id, i * 2);
-  rtcReleaseGeometry(geom_id);
-}
-
-void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
-{
-  size_t prim_offset = mesh->prim_offset;
-
-  const Attribute *attr_mP = NULL;
-  size_t num_motion_steps = 1;
-  if (mesh->has_motion_blur()) {
-    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (attr_mP) {
-      num_motion_steps = mesh->get_motion_steps();
-    }
-  }
-
-  assert(num_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
-  num_motion_steps = min(num_motion_steps, RTC_MAX_TIME_STEP_COUNT);
-
-  const size_t num_triangles = mesh->num_triangles();
-
-  RTCGeometry geom_id = rtcNewGeometry(rtc_device, RTC_GEOMETRY_TYPE_TRIANGLE);
-  rtcSetGeometryBuildQuality(geom_id, build_quality);
-  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
-
-  unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
-      geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
-  assert(rtc_indices);
-  if (!rtc_indices) {
-    VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
-            << ".\n";
-    return;
-  }
-  for (size_t j = 0; j < num_triangles; ++j) {
-    Mesh::Triangle t = mesh->get_triangle(j);
-    rtc_indices[j * 3] = t.v[0];
-    rtc_indices[j * 3 + 1] = t.v[1];
-    rtc_indices[j * 3 + 2] = t.v[2];
-  }
-
-  set_tri_vertex_buffer(geom_id, mesh, false);
-
-  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
-  rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
-  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
-
-  rtcCommitGeometry(geom_id);
-  rtcAttachGeometryByID(scene, geom_id, i * 2);
-  rtcReleaseGeometry(geom_id);
-}
-
-void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update)
-{
-  const Attribute *attr_mP = NULL;
-  size_t num_motion_steps = 1;
-  int t_mid = 0;
-  if (mesh->has_motion_blur()) {
-    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (attr_mP) {
-      num_motion_steps = mesh->get_motion_steps();
-      t_mid = (num_motion_steps - 1) / 2;
-      if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
-        assert(0);
-        num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
-      }
-    }
-  }
-  const size_t num_verts = mesh->get_verts().size();
-
-  for (int t = 0; t < num_motion_steps; ++t) {
-    const float3 *verts;
-    if (t == t_mid) {
-      verts = mesh->get_verts().data();
-    }
-    else {
-      int t_ = (t > t_mid) ? (t - 1) : t;
-      verts = &attr_mP->data_float3()[t_ * num_verts];
-    }
-
-    float *rtc_verts = (update) ?
-                           (float *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
-                           (float *)rtcSetNewGeometryBuffer(geom_id,
-                                                            RTC_BUFFER_TYPE_VERTEX,
-                                                            t,
-                                                            RTC_FORMAT_FLOAT3,
-                                                            sizeof(float) * 3,
-                                                            num_verts + 1);
-
-    assert(rtc_verts);
-    if (rtc_verts) {
-      for (size_t j = 0; j < num_verts; ++j) {
-        rtc_verts[0] = verts[j].x;
-        rtc_verts[1] = verts[j].y;
-        rtc_verts[2] = verts[j].z;
-        rtc_verts += 3;
-      }
-    }
-
-    if (update) {
-      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
-    }
-  }
-}
-
-void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update)
-{
-  const Attribute *attr_mP = NULL;
-  size_t num_motion_steps = 1;
-  if (hair->has_motion_blur()) {
-    attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (attr_mP) {
-      num_motion_steps = hair->get_motion_steps();
-    }
-  }
-
-  const size_t num_curves = hair->num_curves();
-  size_t num_keys = 0;
-  for (size_t j = 0; j < num_curves; ++j) {
-    const Hair::Curve c = hair->get_curve(j);
-    num_keys += c.num_keys;
-  }
-
-  /* Catmull-Rom splines need extra CVs at the beginning and end of each curve. */
-  size_t num_keys_embree = num_keys;
-  num_keys_embree += num_curves * 2;
-
-  /* Copy the CV data to Embree */
-  const int t_mid = (num_motion_steps - 1) / 2;
-  const float *curve_radius = &hair->get_curve_radius()[0];
-  for (int t = 0; t < num_motion_steps; ++t) {
-    const float3 *verts;
-    if (t == t_mid || attr_mP == NULL) {
-      verts = &hair->get_curve_keys()[0];
-    }
-    else {
-      int t_ = (t > t_mid) ? (t - 1) : t;
-      verts = &attr_mP->data_float3()[t_ * num_keys];
-    }
-
-    float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
-                                       geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
-                                   (float4 *)rtcSetNewGeometryBuffer(geom_id,
-                                                                     RTC_BUFFER_TYPE_VERTEX,
-                                                                     t,
-                                                                     RTC_FORMAT_FLOAT4,
-                                                                     sizeof(float) * 4,
-                                                                     num_keys_embree);
-
-    assert(rtc_verts);
-    if (rtc_verts) {
-      const size_t num_curves = hair->num_curves();
-      for (size_t j = 0; j < num_curves; ++j) {
-        Hair::Curve c = hair->get_curve(j);
-        int fk = c.first_key;
-        int k = 1;
-        for (; k < c.num_keys + 1; ++k, ++fk) {
-          rtc_verts[k] = float3_to_float4(verts[fk]);
-          rtc_verts[k].w = curve_radius[fk];
-        }
-        /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
-        rtc_verts[0] = rtc_verts[1];
-        rtc_verts[k] = rtc_verts[k - 1];
-        rtc_verts += c.num_keys + 2;
-      }
-    }
-
-    if (update) {
-      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
-    }
-  }
-}
-
-void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
-{
-  size_t prim_offset = hair->curve_segment_offset;
-
-  const Attribute *attr_mP = NULL;
-  size_t num_motion_steps = 1;
-  if (hair->has_motion_blur()) {
-    attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-    if (attr_mP) {
-      num_motion_steps = hair->get_motion_steps();
-    }
-  }
-
-  assert(num_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
-  num_motion_steps = min(num_motion_steps, RTC_MAX_TIME_STEP_COUNT);
-
-  const size_t num_curves = hair->num_curves();
-  size_t num_segments = 0;
-  for (size_t j = 0; j < num_curves; ++j) {
-    Hair::Curve c = hair->get_curve(j);
-    assert(c.num_segments() > 0);
-    num_segments += c.num_segments();
-  }
-
-  enum RTCGeometryType type = (hair->curve_shape == CURVE_RIBBON ?
-                                   RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
-                                   RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
-
-  RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
-  rtcSetGeometryTessellationRate(geom_id, params.curve_subdivisions + 1);
-  unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
-      geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
-  size_t rtc_index = 0;
-  for (size_t j = 0; j < num_curves; ++j) {
-    Hair::Curve c = hair->get_curve(j);
-    for (size_t k = 0; k < c.num_segments(); ++k) {
-      rtc_indices[rtc_index] = c.first_key + k;
-      /* Room for extra CVs at Catmull-Rom splines. */
-      rtc_indices[rtc_index] += j * 2;
-
-      ++rtc_index;
-    }
-  }
-
-  rtcSetGeometryBuildQuality(geom_id, build_quality);
-  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
-
-  set_curve_vertex_buffer(geom_id, hair, false);
-
-  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
-  if (hair->curve_shape == CURVE_RIBBON) {
-    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
-  }
-  else {
-    rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
-    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
-  }
-  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
-
-  rtcCommitGeometry(geom_id);
-  rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);
-  rtcReleaseGeometry(geom_id);
-}
-
-void BVHEmbree::refit(Progress &progress)
-{
-  progress.set_substatus("Refitting BVH nodes");
-
-  /* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
-  unsigned geom_id = 0;
-  foreach (Object *ob, objects) {
-    if (!params.top_level || (ob->is_traceable() && !ob->get_geometry()->is_instanced())) {
-      Geometry *geom = ob->get_geometry();
-
-      if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
-        Mesh *mesh = static_cast<Mesh *>(geom);
-        if (mesh->num_triangles() > 0) {
-          RTCGeometry geom = rtcGetGeometry(scene, geom_id);
-          set_tri_vertex_buffer(geom, mesh, true);
-          rtcSetGeometryUserData(geom, (void *)mesh->prim_offset);
-          rtcCommitGeometry(geom);
-        }
-      }
-      else if (geom->geometry_type == Geometry::HAIR) {
-        Hair *hair = static_cast<Hair *>(geom);
-        if (hair->num_curves() > 0) {
-          RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1);
-          set_curve_vertex_buffer(geom, hair, true);
-          rtcSetGeometryUserData(geom, (void *)hair->curve_segment_offset);
-          rtcCommitGeometry(geom);
-        }
-      }
-    }
-    geom_id += 2;
-  }
-
-  rtcCommitScene(scene);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_EMBREE */
diff --git a/intern/cycles/bvh/bvh_embree.h b/intern/cycles/bvh/bvh_embree.h
deleted file mode 100644
index 01636fbd1dc..00000000000
--- a/intern/cycles/bvh/bvh_embree.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright 2018, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_EMBREE_H__
-#define __BVH_EMBREE_H__
-
-#ifdef WITH_EMBREE
-
-#  include <embree3/rtcore.h>
-#  include <embree3/rtcore_scene.h>
-
-#  include "bvh/bvh.h"
-#  include "bvh/bvh_params.h"
-
-#  include "util/util_thread.h"
-#  include "util/util_types.h"
-#  include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Hair;
-class Mesh;
-
-class BVHEmbree : public BVH {
- public:
-  void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
-  void refit(Progress &progress);
-
-  RTCScene scene;
-
- protected:
-  friend class BVH;
-  BVHEmbree(const BVHParams &params,
-            const vector<Geometry *> &geometry,
-            const vector<Object *> &objects);
-  virtual ~BVHEmbree();
-
-  void add_object(Object *ob, int i);
-  void add_instance(Object *ob, int i);
-  void add_curves(const Object *ob, const Hair *hair, int i);
-  void add_triangles(const Object *ob, const Mesh *mesh, int i);
-
- private:
-  void set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update);
-  void set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update);
-
-  RTCDevice rtc_device;
-  enum RTCBuildQuality build_quality;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_EMBREE */
-
-#endif /* __BVH_EMBREE_H__ */
diff --git a/intern/cycles/bvh/bvh_multi.cpp b/intern/cycles/bvh/bvh_multi.cpp
deleted file mode 100644
index a9e771f20f1..00000000000
--- a/intern/cycles/bvh/bvh_multi.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2020, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh_multi.h"
-
-#include "util/util_foreach.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVHMulti::BVHMulti(const BVHParams &params_,
-                   const vector<Geometry *> &geometry_,
-                   const vector<Object *> &objects_)
-    : BVH(params_, geometry_, objects_)
-{
-}
-
-BVHMulti::~BVHMulti()
-{
-  foreach (BVH *bvh, sub_bvhs) {
-    delete bvh;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_multi.h b/intern/cycles/bvh/bvh_multi.h
deleted file mode 100644
index 840438c5d0c..00000000000
--- a/intern/cycles/bvh/bvh_multi.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2020, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_MULTI_H__
-#define __BVH_MULTI_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHMulti : public BVH {
- public:
-  vector<BVH *> sub_bvhs;
-
- protected:
-  friend class BVH;
-  BVHMulti(const BVHParams &params,
-           const vector<Geometry *> &geometry,
-           const vector<Object *> &objects);
-  virtual ~BVHMulti();
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_MULTI_H__ */
diff --git a/intern/cycles/bvh/bvh_node.cpp b/intern/cycles/bvh/bvh_node.cpp
deleted file mode 100644
index 38b554acfbf..00000000000
--- a/intern/cycles/bvh/bvh_node.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh_node.h"
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_build.h"
-
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* BVH Node */
-
-int BVHNode::getSubtreeSize(BVH_STAT stat) const
-{
-  int cnt = 0;
-
-  switch (stat) {
-    case BVH_STAT_NODE_COUNT:
-      cnt = 1;
-      break;
-    case BVH_STAT_LEAF_COUNT:
-      cnt = is_leaf() ? 1 : 0;
-      break;
-    case BVH_STAT_INNER_COUNT:
-      cnt = is_leaf() ? 0 : 1;
-      break;
-    case BVH_STAT_TRIANGLE_COUNT:
-      cnt = is_leaf() ? reinterpret_cast<const LeafNode *>(this)->num_triangles() : 0;
-      break;
-    case BVH_STAT_CHILDNODE_COUNT:
-      cnt = num_children();
-      break;
-    case BVH_STAT_ALIGNED_COUNT:
-      if (!is_unaligned) {
-        cnt = 1;
-      }
-      break;
-    case BVH_STAT_UNALIGNED_COUNT:
-      if (is_unaligned) {
-        cnt = 1;
-      }
-      break;
-    case BVH_STAT_ALIGNED_INNER_COUNT:
-      if (!is_leaf()) {
-        bool has_unaligned = false;
-        for (int j = 0; j < num_children(); j++) {
-          has_unaligned |= get_child(j)->is_unaligned;
-        }
-        cnt += has_unaligned ? 0 : 1;
-      }
-      break;
-    case BVH_STAT_UNALIGNED_INNER_COUNT:
-      if (!is_leaf()) {
-        bool has_unaligned = false;
-        for (int j = 0; j < num_children(); j++) {
-          has_unaligned |= get_child(j)->is_unaligned;
-        }
-        cnt += has_unaligned ? 1 : 0;
-      }
-      break;
-    case BVH_STAT_ALIGNED_LEAF_COUNT:
-      cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
-      break;
-    case BVH_STAT_UNALIGNED_LEAF_COUNT:
-      cnt = (is_leaf() && is_unaligned) ? 1 : 0;
-      break;
-    case BVH_STAT_DEPTH:
-      if (is_leaf()) {
-        cnt = 1;
-      }
-      else {
-        for (int i = 0; i < num_children(); i++) {
-          cnt = max(cnt, get_child(i)->getSubtreeSize(stat));
-        }
-        cnt += 1;
-      }
-      return cnt;
-    default:
-      assert(0); /* unknown mode */
-  }
-
-  if (!is_leaf())
-    for (int i = 0; i < num_children(); i++)
-      cnt += get_child(i)->getSubtreeSize(stat);
-
-  return cnt;
-}
-
-void BVHNode::deleteSubtree()
-{
-  for (int i = 0; i < num_children(); i++)
-    if (get_child(i))
-      get_child(i)->deleteSubtree();
-
-  delete this;
-}
-
-float BVHNode::computeSubtreeSAHCost(const BVHParams &p, float probability) const
-{
-  float SAH = probability * p.cost(num_children(), num_triangles());
-
-  for (int i = 0; i < num_children(); i++) {
-    BVHNode *child = get_child(i);
-    SAH += child->computeSubtreeSAHCost(
-        p, probability * child->bounds.safe_area() / bounds.safe_area());
-  }
-
-  return SAH;
-}
-
-uint BVHNode::update_visibility()
-{
-  if (!is_leaf() && visibility == 0) {
-    InnerNode *inner = (InnerNode *)this;
-    BVHNode *child0 = inner->children[0];
-    BVHNode *child1 = inner->children[1];
-
-    visibility = child0->update_visibility() | child1->update_visibility();
-  }
-
-  return visibility;
-}
-
-void BVHNode::update_time()
-{
-  if (!is_leaf()) {
-    InnerNode *inner = (InnerNode *)this;
-    BVHNode *child0 = inner->children[0];
-    BVHNode *child1 = inner->children[1];
-    child0->update_time();
-    child1->update_time();
-    time_from = min(child0->time_from, child1->time_from);
-    time_to = max(child0->time_to, child1->time_to);
-  }
-}
-
-namespace {
-
-struct DumpTraversalContext {
-  /* Descriptor of wile where writing is happening. */
-  FILE *stream;
-  /* Unique identifier of the node current. */
-  int id;
-};
-
-void dump_subtree(DumpTraversalContext *context, const BVHNode *node, const BVHNode *parent = NULL)
-{
-  if (node->is_leaf()) {
-    fprintf(context->stream,
-            "  node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n",
-            node,
-            context->id);
-  }
-  else {
-    fprintf(context->stream,
-            "  node_%p [label=\"%d\",fillcolor=\"#cceecc\",style=filled]\n",
-            node,
-            context->id);
-  }
-  if (parent != NULL) {
-    fprintf(context->stream, "  node_%p -> node_%p;\n", parent, node);
-  }
-  context->id += 1;
-  for (int i = 0; i < node->num_children(); ++i) {
-    dump_subtree(context, node->get_child(i), node);
-  }
-}
-
-}  // namespace
-
-void BVHNode::dump_graph(const char *filename)
-{
-  DumpTraversalContext context;
-  context.stream = fopen(filename, "w");
-  if (context.stream == NULL) {
-    return;
-  }
-  context.id = 0;
-  fprintf(context.stream, "digraph BVH {\n");
-  dump_subtree(&context, this);
-  fprintf(context.stream, "}\n");
-  fclose(context.stream);
-}
-
-/* Inner Node */
-
-void InnerNode::print(int depth) const
-{
-  for (int i = 0; i < depth; i++)
-    printf("  ");
-
-  printf("inner node %p\n", (void *)this);
-
-  if (children[0])
-    children[0]->print(depth + 1);
-  if (children[1])
-    children[1]->print(depth + 1);
-}
-
-void LeafNode::print(int depth) const
-{
-  for (int i = 0; i < depth; i++)
-    printf("  ");
-
-  printf("leaf node %d to %d\n", lo, hi);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_node.h b/intern/cycles/bvh/bvh_node.h
deleted file mode 100644
index b3b5c43a394..00000000000
--- a/intern/cycles/bvh/bvh_node.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_NODE_H__
-#define __BVH_NODE_H__
-
-#include "util/util_boundbox.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-enum BVH_STAT {
-  BVH_STAT_NODE_COUNT,
-  BVH_STAT_INNER_COUNT,
-  BVH_STAT_LEAF_COUNT,
-  BVH_STAT_TRIANGLE_COUNT,
-  BVH_STAT_CHILDNODE_COUNT,
-  BVH_STAT_ALIGNED_COUNT,
-  BVH_STAT_UNALIGNED_COUNT,
-  BVH_STAT_ALIGNED_INNER_COUNT,
-  BVH_STAT_UNALIGNED_INNER_COUNT,
-  BVH_STAT_ALIGNED_LEAF_COUNT,
-  BVH_STAT_UNALIGNED_LEAF_COUNT,
-  BVH_STAT_DEPTH,
-};
-
-class BVHParams;
-
-class BVHNode {
- public:
-  virtual ~BVHNode()
-  {
-    delete aligned_space;
-  }
-
-  virtual bool is_leaf() const = 0;
-  virtual int num_children() const = 0;
-  virtual BVHNode *get_child(int i) const = 0;
-  virtual int num_triangles() const
-  {
-    return 0;
-  }
-  virtual void print(int depth = 0) const = 0;
-
-  inline void set_aligned_space(const Transform &aligned_space)
-  {
-    is_unaligned = true;
-    if (this->aligned_space == NULL) {
-      this->aligned_space = new Transform(aligned_space);
-    }
-    else {
-      *this->aligned_space = aligned_space;
-    }
-  }
-
-  inline Transform get_aligned_space() const
-  {
-    if (aligned_space == NULL) {
-      return transform_identity();
-    }
-    return *aligned_space;
-  }
-
-  inline bool has_unaligned() const
-  {
-    if (is_leaf()) {
-      return false;
-    }
-    for (int i = 0; i < num_children(); ++i) {
-      if (get_child(i)->is_unaligned) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  // Subtree functions
-  int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
-  float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f) const;
-  void deleteSubtree();
-
-  uint update_visibility();
-  void update_time();
-
-  /* Dump the content of the tree as a graphviz file. */
-  void dump_graph(const char *filename);
-
-  // Properties.
-  BoundBox bounds;
-  uint visibility;
-
-  bool is_unaligned;
-
-  /* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
-   * utilities and type defines in util_transform first.
-   */
-  Transform *aligned_space;
-
-  float time_from, time_to;
-
- protected:
-  explicit BVHNode(const BoundBox &bounds)
-      : bounds(bounds),
-        visibility(0),
-        is_unaligned(false),
-        aligned_space(NULL),
-        time_from(0.0f),
-        time_to(1.0f)
-  {
-  }
-
-  explicit BVHNode(const BVHNode &other)
-      : bounds(other.bounds),
-        visibility(other.visibility),
-        is_unaligned(other.is_unaligned),
-        aligned_space(NULL),
-        time_from(other.time_from),
-        time_to(other.time_to)
-  {
-    if (other.aligned_space != NULL) {
-      assert(other.is_unaligned);
-      aligned_space = new Transform();
-      *aligned_space = *other.aligned_space;
-    }
-    else {
-      assert(!other.is_unaligned);
-    }
-  }
-};
-
-class InnerNode : public BVHNode {
- public:
-  static constexpr int kNumMaxChildren = 8;
-
-  InnerNode(const BoundBox &bounds, BVHNode *child0, BVHNode *child1)
-      : BVHNode(bounds), num_children_(2)
-  {
-    children[0] = child0;
-    children[1] = child1;
-    reset_unused_children();
-
-    if (child0 && child1) {
-      visibility = child0->visibility | child1->visibility;
-    }
-    else {
-      /* Happens on build cancel. */
-      visibility = 0;
-    }
-  }
-
-  InnerNode(const BoundBox &bounds, BVHNode **children, const int num_children)
-      : BVHNode(bounds), num_children_(num_children)
-  {
-    visibility = 0;
-    time_from = FLT_MAX;
-    time_to = -FLT_MAX;
-    for (int i = 0; i < num_children; ++i) {
-      assert(children[i] != NULL);
-      visibility |= children[i]->visibility;
-      this->children[i] = children[i];
-      time_from = min(time_from, children[i]->time_from);
-      time_to = max(time_to, children[i]->time_to);
-    }
-    reset_unused_children();
-  }
-
-  /* NOTE: This function is only used during binary BVH builder, and it
-   * supposed to be configured to have 2 children which will be filled-in in a
-   * bit. But this is important to have children reset to NULL. */
-  explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
-  {
-    reset_unused_children();
-    visibility = 0;
-    num_children_ = 2;
-  }
-
-  bool is_leaf() const
-  {
-    return false;
-  }
-  int num_children() const
-  {
-    return num_children_;
-  }
-  BVHNode *get_child(int i) const
-  {
-    assert(i >= 0 && i < num_children_);
-    return children[i];
-  }
-  void print(int depth) const;
-
-  int num_children_;
-  BVHNode *children[kNumMaxChildren];
-
- protected:
-  void reset_unused_children()
-  {
-    for (int i = num_children_; i < kNumMaxChildren; ++i) {
-      children[i] = NULL;
-    }
-  }
-};
-
-class LeafNode : public BVHNode {
- public:
-  LeafNode(const BoundBox &bounds, uint visibility, int lo, int hi)
-      : BVHNode(bounds), lo(lo), hi(hi)
-  {
-    this->bounds = bounds;
-    this->visibility = visibility;
-  }
-
-  LeafNode(const LeafNode &other) : BVHNode(other), lo(other.lo), hi(other.hi)
-  {
-  }
-
-  bool is_leaf() const
-  {
-    return true;
-  }
-  int num_children() const
-  {
-    return 0;
-  }
-  BVHNode *get_child(int) const
-  {
-    return NULL;
-  }
-  int num_triangles() const
-  {
-    return hi - lo;
-  }
-  void print(int depth) const;
-
-  int lo;
-  int hi;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_NODE_H__ */
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
deleted file mode 100644
index cd266f72f89..00000000000
--- a/intern/cycles/bvh/bvh_optix.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2019, NVIDIA Corporation.
- * Copyright 2019, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef WITH_OPTIX
-
-#  include "device/device.h"
-
-#  include "bvh/bvh_optix.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVHOptiX::BVHOptiX(const BVHParams &params_,
-                   const vector<Geometry *> &geometry_,
-                   const vector<Object *> &objects_,
-                   Device *device)
-    : BVH(params_, geometry_, objects_),
-      device(device),
-      traversable_handle(0),
-      as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
-      motion_transform_data(device, "optix motion transform", false)
-{
-}
-
-BVHOptiX::~BVHOptiX()
-{
-  // Acceleration structure memory is delayed freed on device, since deleting the
-  // BVH may happen while still being used for rendering.
-  device->release_optix_bvh(this);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_OPTIX */
diff --git a/intern/cycles/bvh/bvh_optix.h b/intern/cycles/bvh/bvh_optix.h
deleted file mode 100644
index ba5d90471d1..00000000000
--- a/intern/cycles/bvh/bvh_optix.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2019, NVIDIA Corporation.
- * Copyright 2019, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_OPTIX_H__
-#define __BVH_OPTIX_H__
-
-#ifdef WITH_OPTIX
-
-#  include "bvh/bvh.h"
-#  include "bvh/bvh_params.h"
-#  include "device/device_memory.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHOptiX : public BVH {
- public:
-  Device *device;
-  uint64_t traversable_handle;
-  device_only_memory<char> as_data;
-  device_only_memory<char> motion_transform_data;
-
- protected:
-  friend class BVH;
-  BVHOptiX(const BVHParams &params,
-           const vector<Geometry *> &geometry,
-           const vector<Object *> &objects,
-           Device *device);
-  virtual ~BVHOptiX();
-};
-
-CCL_NAMESPACE_END
-
-#endif /* WITH_OPTIX */
-
-#endif /* __BVH_OPTIX_H__ */
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
deleted file mode 100644
index 31b3971c110..00000000000
--- a/intern/cycles/bvh/bvh_params.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_PARAMS_H__
-#define __BVH_PARAMS_H__
-
-#include "util/util_boundbox.h"
-
-#include "kernel/kernel_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Layout of BVH tree.
- *
- * For example, how wide BVH tree is, in terms of number of children
- * per node.
- */
-typedef KernelBVHLayout BVHLayout;
-
-/* Type of BVH, in terms whether it is supported dynamic updates of meshes
- * or whether modifying geometry requires full BVH rebuild.
- */
-enum BVHType {
-  /* BVH supports dynamic updates of geometry.
-   *
-   * Faster for updating BVH tree when doing modifications in viewport,
-   * but slower for rendering.
-   */
-  BVH_TYPE_DYNAMIC = 0,
-  /* BVH tree is calculated for specific scene, updates in geometry
-   * requires full tree rebuild.
-   *
-   * Slower to update BVH tree when modifying objects in viewport, also
-   * slower to build final BVH tree but gives best possible render speed.
-   */
-  BVH_TYPE_STATIC = 1,
-
-  BVH_NUM_TYPES,
-};
-
-/* Names bitflag type to denote which BVH layouts are supported by
- * particular area.
- *
- * Bitflags are the BVH_LAYOUT_* values.
- */
-typedef int BVHLayoutMask;
-
-/* Get human readable name of BVH layout. */
-const char *bvh_layout_name(BVHLayout layout);
-
-/* BVH Parameters */
-
-class BVHParams {
- public:
-  /* spatial split area threshold */
-  bool use_spatial_split;
-  float spatial_split_alpha;
-
-  /* Unaligned nodes creation threshold */
-  float unaligned_split_threshold;
-
-  /* SAH costs */
-  float sah_node_cost;
-  float sah_primitive_cost;
-
-  /* number of primitives in leaf */
-  int min_leaf_size;
-  int max_triangle_leaf_size;
-  int max_motion_triangle_leaf_size;
-  int max_curve_leaf_size;
-  int max_motion_curve_leaf_size;
-
-  /* object or mesh level bvh */
-  bool top_level;
-
-  /* BVH layout to be built. */
-  BVHLayout bvh_layout;
-
-  /* Use unaligned bounding boxes.
-   * Only used for curves BVH.
-   */
-  bool use_unaligned_nodes;
-
-  /* Split time range to this number of steps and create leaf node for each
-   * of this time steps.
-   *
-   * Speeds up rendering of motion curve primitives in the cost of higher
-   * memory usage.
-   */
-  int num_motion_curve_steps;
-
-  /* Same as above, but for triangle primitives. */
-  int num_motion_triangle_steps;
-
-  /* Same as in SceneParams. */
-  int bvh_type;
-
-  /* These are needed for Embree. */
-  int curve_subdivisions;
-
-  /* fixed parameters */
-  enum { MAX_DEPTH = 64, MAX_SPATIAL_DEPTH = 48, NUM_SPATIAL_BINS = 32 };
-
-  BVHParams()
-  {
-    use_spatial_split = true;
-    spatial_split_alpha = 1e-5f;
-
-    unaligned_split_threshold = 0.7f;
-
-    /* todo: see if splitting up primitive cost to be separate for triangles
-     * and curves can help. so far in tests it doesn't help, but why? */
-    sah_node_cost = 1.0f;
-    sah_primitive_cost = 1.0f;
-
-    min_leaf_size = 1;
-    max_triangle_leaf_size = 8;
-    max_motion_triangle_leaf_size = 8;
-    max_curve_leaf_size = 1;
-    max_motion_curve_leaf_size = 4;
-
-    top_level = false;
-    bvh_layout = BVH_LAYOUT_BVH2;
-    use_unaligned_nodes = false;
-
-    num_motion_curve_steps = 0;
-    num_motion_triangle_steps = 0;
-
-    bvh_type = 0;
-
-    curve_subdivisions = 4;
-  }
-
-  /* SAH costs */
-  __forceinline float cost(int num_nodes, int num_primitives) const
-  {
-    return node_cost(num_nodes) + primitive_cost(num_primitives);
-  }
-
-  __forceinline float primitive_cost(int n) const
-  {
-    return n * sah_primitive_cost;
-  }
-
-  __forceinline float node_cost(int n) const
-  {
-    return n * sah_node_cost;
-  }
-
-  __forceinline bool small_enough_for_leaf(int size, int level)
-  {
-    return (size <= min_leaf_size || level >= MAX_DEPTH);
-  }
-
-  /* Gets best matching BVH.
-   *
-   * If the requested layout is supported by the device, it will be used.
-   * Otherwise, widest supported layout below that will be used.
-   */
-  static BVHLayout best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts);
-};
-
-/* BVH Reference
- *
- * Reference to a primitive. Primitive index and object are sneakily packed
- * into BoundBox to reduce memory usage and align nicely */
-
-class BVHReference {
- public:
-  __forceinline BVHReference()
-  {
-  }
-
-  __forceinline BVHReference(const BoundBox &bounds_,
-                             int prim_index_,
-                             int prim_object_,
-                             int prim_type,
-                             float time_from = 0.0f,
-                             float time_to = 1.0f)
-      : rbounds(bounds_), time_from_(time_from), time_to_(time_to)
-  {
-    rbounds.min.w = __int_as_float(prim_index_);
-    rbounds.max.w = __int_as_float(prim_object_);
-    type = prim_type;
-  }
-
-  __forceinline const BoundBox &bounds() const
-  {
-    return rbounds;
-  }
-  __forceinline int prim_index() const
-  {
-    return __float_as_int(rbounds.min.w);
-  }
-  __forceinline int prim_object() const
-  {
-    return __float_as_int(rbounds.max.w);
-  }
-  __forceinline int prim_type() const
-  {
-    return type;
-  }
-  __forceinline float time_from() const
-  {
-    return time_from_;
-  }
-  __forceinline float time_to() const
-  {
-    return time_to_;
-  }
-
-  BVHReference &operator=(const BVHReference &arg)
-  {
-    if (&arg != this) {
-      /* TODO(sergey): Check if it is still faster to memcpy() with
-       * modern compilers.
-       */
-      memcpy((void *)this, &arg, sizeof(BVHReference));
-    }
-    return *this;
-  }
-
- protected:
-  BoundBox rbounds;
-  uint type;
-  float time_from_, time_to_;
-};
-
-/* BVH Range
- *
- * Build range used during construction, to indicate the bounds and place in
- * the reference array of a subset of primitives Again uses trickery to pack
- * integers into BoundBox for alignment purposes. */
-
-class BVHRange {
- public:
-  __forceinline BVHRange()
-  {
-    rbounds.min.w = __int_as_float(0);
-    rbounds.max.w = __int_as_float(0);
-  }
-
-  __forceinline BVHRange(const BoundBox &bounds_, int start_, int size_) : rbounds(bounds_)
-  {
-    rbounds.min.w = __int_as_float(start_);
-    rbounds.max.w = __int_as_float(size_);
-  }
-
-  __forceinline BVHRange(const BoundBox &bounds_, const BoundBox &cbounds_, int start_, int size_)
-      : rbounds(bounds_), cbounds(cbounds_)
-  {
-    rbounds.min.w = __int_as_float(start_);
-    rbounds.max.w = __int_as_float(size_);
-  }
-
-  __forceinline void set_start(int start_)
-  {
-    rbounds.min.w = __int_as_float(start_);
-  }
-
-  __forceinline const BoundBox &bounds() const
-  {
-    return rbounds;
-  }
-  __forceinline const BoundBox &cent_bounds() const
-  {
-    return cbounds;
-  }
-  __forceinline int start() const
-  {
-    return __float_as_int(rbounds.min.w);
-  }
-  __forceinline int size() const
-  {
-    return __float_as_int(rbounds.max.w);
-  }
-  __forceinline int end() const
-  {
-    return start() + size();
-  }
-
- protected:
-  BoundBox rbounds;
-  BoundBox cbounds;
-};
-
-/* BVH Spatial Bin */
-
-struct BVHSpatialBin {
-  BoundBox bounds;
-  int enter;
-  int exit;
-
-  __forceinline BVHSpatialBin()
-  {
-  }
-};
-
-/* BVH Spatial Storage
- *
- * The idea of this storage is have thread-specific storage for the spatial
- * splitters. We can pre-allocate this storage in advance and avoid heavy memory
- * operations during split process.
- */
-
-struct BVHSpatialStorage {
-  /* Accumulated bounds when sweeping from right to left. */
-  vector<BoundBox> right_bounds;
-
-  /* Bins used for histogram when selecting best split plane. */
-  BVHSpatialBin bins[3][BVHParams::NUM_SPATIAL_BINS];
-
-  /* Temporary storage for the new references. Used by spatial split to store
-   * new references in before they're getting inserted into actual array,
-   */
-  vector<BVHReference> new_references;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_PARAMS_H__ */
diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
deleted file mode 100644
index b01785b547a..00000000000
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh_sort.h"
-
-#include "bvh/bvh_build.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_task.h"
-
-CCL_NAMESPACE_BEGIN
-
-static const int BVH_SORT_THRESHOLD = 4096;
-
-struct BVHReferenceCompare {
- public:
-  int dim;
-  const BVHUnaligned *unaligned_heuristic;
-  const Transform *aligned_space;
-
-  BVHReferenceCompare(int dim,
-                      const BVHUnaligned *unaligned_heuristic,
-                      const Transform *aligned_space)
-      : dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
-  {
-  }
-
-  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
-  {
-    return (aligned_space != NULL) ?
-               unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
-               prim.bounds();
-  }
-
-  /* Compare two references.
-   *
-   * Returns value is similar to return value of strcmp().
-   */
-  __forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
-  {
-    BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
-    float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
-    float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
-
-    if (ca < cb)
-      return -1;
-    else if (ca > cb)
-      return 1;
-    else if (ra.prim_object() < rb.prim_object())
-      return -1;
-    else if (ra.prim_object() > rb.prim_object())
-      return 1;
-    else if (ra.prim_index() < rb.prim_index())
-      return -1;
-    else if (ra.prim_index() > rb.prim_index())
-      return 1;
-    else if (ra.prim_type() < rb.prim_type())
-      return -1;
-    else if (ra.prim_type() > rb.prim_type())
-      return 1;
-
-    return 0;
-  }
-
-  bool operator()(const BVHReference &ra, const BVHReference &rb)
-  {
-    return (compare(ra, rb) < 0);
-  }
-};
-
-static void bvh_reference_sort_threaded(TaskPool *task_pool,
-                                        BVHReference *data,
-                                        const int job_start,
-                                        const int job_end,
-                                        const BVHReferenceCompare &compare);
-
-/* Multi-threaded reference sort. */
-static void bvh_reference_sort_threaded(TaskPool *task_pool,
-                                        BVHReference *data,
-                                        const int job_start,
-                                        const int job_end,
-                                        const BVHReferenceCompare &compare)
-{
-  int start = job_start, end = job_end;
-  bool have_work = (start < end);
-  while (have_work) {
-    const int count = job_end - job_start;
-    if (count < BVH_SORT_THRESHOLD) {
-      /* Number of reference low enough, faster to finish the job
-       * in one thread rather than to spawn more threads.
-       */
-      sort(data + job_start, data + job_end + 1, compare);
-      break;
-    }
-    /* Single QSort step.
-     * Use median-of-three method for the pivot point.
-     */
-    int left = start, right = end;
-    int center = (left + right) >> 1;
-    if (compare.compare(data[left], data[center]) > 0) {
-      swap(data[left], data[center]);
-    }
-    if (compare.compare(data[left], data[right]) > 0) {
-      swap(data[left], data[right]);
-    }
-    if (compare.compare(data[center], data[right]) > 0) {
-      swap(data[center], data[right]);
-    }
-    swap(data[center], data[right - 1]);
-    BVHReference median = data[right - 1];
-    do {
-      while (compare.compare(data[left], median) < 0) {
-        ++left;
-      }
-      while (compare.compare(data[right], median) > 0) {
-        --right;
-      }
-      if (left <= right) {
-        swap(data[left], data[right]);
-        ++left;
-        --right;
-      }
-    } while (left <= right);
-    /* We only create one new task here to reduce downside effects of
-     * latency in TaskScheduler.
-     * So generally current thread keeps working on the left part of the
-     * array, and we create new task for the right side.
-     * However, if there's nothing to be done in the left side of the array
-     * we don't create any tasks and make it so current thread works on the
-     * right side.
-     */
-    have_work = false;
-    if (left < end) {
-      if (start < right) {
-        task_pool->push(
-            function_bind(bvh_reference_sort_threaded, task_pool, data, left, end, compare));
-      }
-      else {
-        start = left;
-        have_work = true;
-      }
-    }
-    if (start < right) {
-      end = right;
-      have_work = true;
-    }
-  }
-}
-
-void bvh_reference_sort(int start,
-                        int end,
-                        BVHReference *data,
-                        int dim,
-                        const BVHUnaligned *unaligned_heuristic,
-                        const Transform *aligned_space)
-{
-  const int count = end - start;
-  BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
-  if (count < BVH_SORT_THRESHOLD) {
-    /* It is important to not use any mutex if array is small enough,
-     * otherwise we end up in situation when we're going to sleep far
-     * too often.
-     */
-    sort(data + start, data + end, compare);
-  }
-  else {
-    TaskPool task_pool;
-    bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare);
-    task_pool.wait_work();
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_sort.h b/intern/cycles/bvh/bvh_sort.h
deleted file mode 100644
index 936401d8607..00000000000
--- a/intern/cycles/bvh/bvh_sort.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_SORT_H__
-#define __BVH_SORT_H__
-
-#include <cstddef>
-
-CCL_NAMESPACE_BEGIN
-
-class BVHReference;
-class BVHUnaligned;
-struct Transform;
-
-void bvh_reference_sort(int start,
-                        int end,
-                        BVHReference *data,
-                        int dim,
-                        const BVHUnaligned *unaligned_heuristic = NULL,
-                        const Transform *aligned_space = NULL);
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_SORT_H__ */
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
deleted file mode 100644
index 0e7d36983e7..00000000000
--- a/intern/cycles/bvh/bvh_split.cpp
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh_split.h"
-
-#include "bvh/bvh_build.h"
-#include "bvh/bvh_sort.h"
-
-#include "scene/hair.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-
-#include "util/util_algorithm.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Object Split */
-
-BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
-                               BVHSpatialStorage *storage,
-                               const BVHRange &range,
-                               vector<BVHReference> &references,
-                               float nodeSAH,
-                               const BVHUnaligned *unaligned_heuristic,
-                               const Transform *aligned_space)
-    : sah(FLT_MAX),
-      dim(0),
-      num_left(0),
-      left_bounds(BoundBox::empty),
-      right_bounds(BoundBox::empty),
-      storage_(storage),
-      references_(&references),
-      unaligned_heuristic_(unaligned_heuristic),
-      aligned_space_(aligned_space)
-{
-  const BVHReference *ref_ptr = &references_->at(range.start());
-  float min_sah = FLT_MAX;
-
-  storage_->right_bounds.resize(range.size());
-
-  for (int dim = 0; dim < 3; dim++) {
-    /* Sort references. */
-    bvh_reference_sort(range.start(),
-                       range.end(),
-                       &references_->at(0),
-                       dim,
-                       unaligned_heuristic_,
-                       aligned_space_);
-
-    /* sweep right to left and determine bounds. */
-    BoundBox right_bounds = BoundBox::empty;
-    for (int i = range.size() - 1; i > 0; i--) {
-      BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
-      right_bounds.grow(prim_bounds);
-      storage_->right_bounds[i - 1] = right_bounds;
-    }
-
-    /* sweep left to right and select lowest SAH. */
-    BoundBox left_bounds = BoundBox::empty;
-
-    for (int i = 1; i < range.size(); i++) {
-      BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
-      left_bounds.grow(prim_bounds);
-      right_bounds = storage_->right_bounds[i - 1];
-
-      float sah = nodeSAH + left_bounds.safe_area() * builder->params.primitive_cost(i) +
-                  right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
-
-      if (sah < min_sah) {
-        min_sah = sah;
-
-        this->sah = sah;
-        this->dim = dim;
-        this->num_left = i;
-        this->left_bounds = left_bounds;
-        this->right_bounds = right_bounds;
-      }
-    }
-  }
-}
-
-void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &range)
-{
-  assert(references_->size() > 0);
-  /* sort references according to split */
-  bvh_reference_sort(range.start(),
-                     range.end(),
-                     &references_->at(0),
-                     this->dim,
-                     unaligned_heuristic_,
-                     aligned_space_);
-
-  BoundBox effective_left_bounds, effective_right_bounds;
-  const int num_right = range.size() - this->num_left;
-  if (aligned_space_ == NULL) {
-    effective_left_bounds = left_bounds;
-    effective_right_bounds = right_bounds;
-  }
-  else {
-    effective_left_bounds = BoundBox::empty;
-    effective_right_bounds = BoundBox::empty;
-    for (int i = 0; i < this->num_left; ++i) {
-      BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
-      effective_left_bounds.grow(prim_boundbox);
-    }
-    for (int i = 0; i < num_right; ++i) {
-      BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
-      effective_right_bounds.grow(prim_boundbox);
-    }
-  }
-
-  /* split node ranges */
-  left = BVHRange(effective_left_bounds, range.start(), this->num_left);
-  right = BVHRange(effective_right_bounds, left.end(), num_right);
-}
-
-/* Spatial Split */
-
-BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
-                                 BVHSpatialStorage *storage,
-                                 const BVHRange &range,
-                                 vector<BVHReference> &references,
-                                 float nodeSAH,
-                                 const BVHUnaligned *unaligned_heuristic,
-                                 const Transform *aligned_space)
-    : sah(FLT_MAX),
-      dim(0),
-      pos(0.0f),
-      storage_(storage),
-      references_(&references),
-      unaligned_heuristic_(unaligned_heuristic),
-      aligned_space_(aligned_space)
-{
-  /* initialize bins. */
-  BoundBox range_bounds;
-  if (aligned_space == NULL) {
-    range_bounds = range.bounds();
-  }
-  else {
-    range_bounds = unaligned_heuristic->compute_aligned_boundbox(
-        range, &references_->at(0), *aligned_space);
-  }
-
-  float3 origin = range_bounds.min;
-  float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
-  float3 invBinSize = 1.0f / binSize;
-
-  for (int dim = 0; dim < 3; dim++) {
-    for (int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
-      BVHSpatialBin &bin = storage_->bins[dim][i];
-
-      bin.bounds = BoundBox::empty;
-      bin.enter = 0;
-      bin.exit = 0;
-    }
-  }
-
-  /* chop references into bins. */
-  for (unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
-    const BVHReference &ref = references_->at(refIdx);
-    BoundBox prim_bounds = get_prim_bounds(ref);
-    float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
-    float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
-    int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
-    int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
-
-    firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
-    lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
-
-    for (int dim = 0; dim < 3; dim++) {
-      BVHReference currRef(
-          get_prim_bounds(ref), ref.prim_index(), ref.prim_object(), ref.prim_type());
-
-      for (int i = firstBin[dim]; i < lastBin[dim]; i++) {
-        BVHReference leftRef, rightRef;
-
-        split_reference(
-            builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
-        storage_->bins[dim][i].bounds.grow(leftRef.bounds());
-        currRef = rightRef;
-      }
-
-      storage_->bins[dim][lastBin[dim]].bounds.grow(currRef.bounds());
-      storage_->bins[dim][firstBin[dim]].enter++;
-      storage_->bins[dim][lastBin[dim]].exit++;
-    }
-  }
-
-  /* select best split plane. */
-  storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS);
-  for (int dim = 0; dim < 3; dim++) {
-    /* sweep right to left and determine bounds. */
-    BoundBox right_bounds = BoundBox::empty;
-    for (int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
-      right_bounds.grow(storage_->bins[dim][i].bounds);
-      storage_->right_bounds[i - 1] = right_bounds;
-    }
-
-    /* sweep left to right and select lowest SAH. */
-    BoundBox left_bounds = BoundBox::empty;
-    int leftNum = 0;
-    int rightNum = range.size();
-
-    for (int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
-      left_bounds.grow(storage_->bins[dim][i - 1].bounds);
-      leftNum += storage_->bins[dim][i - 1].enter;
-      rightNum -= storage_->bins[dim][i - 1].exit;
-
-      float sah = nodeSAH + left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
-                  storage_->right_bounds[i - 1].safe_area() *
-                      builder.params.primitive_cost(rightNum);
-
-      if (sah < this->sah) {
-        this->sah = sah;
-        this->dim = dim;
-        this->pos = origin[dim] + binSize[dim] * (float)i;
-      }
-    }
-  }
-}
-
-void BVHSpatialSplit::split(BVHBuild *builder,
-                            BVHRange &left,
-                            BVHRange &right,
-                            const BVHRange &range)
-{
-  /* Categorize references and compute bounds.
-   *
-   * Left-hand side:          [left_start, left_end[
-   * Uncategorized/split:     [left_end, right_start[
-   * Right-hand side:         [right_start, refs.size()[ */
-
-  vector<BVHReference> &refs = *references_;
-  int left_start = range.start();
-  int left_end = left_start;
-  int right_start = range.end();
-  int right_end = range.end();
-  BoundBox left_bounds = BoundBox::empty;
-  BoundBox right_bounds = BoundBox::empty;
-
-  for (int i = left_end; i < right_start; i++) {
-    BoundBox prim_bounds = get_prim_bounds(refs[i]);
-    if (prim_bounds.max[this->dim] <= this->pos) {
-      /* entirely on the left-hand side */
-      left_bounds.grow(prim_bounds);
-      swap(refs[i], refs[left_end++]);
-    }
-    else if (prim_bounds.min[this->dim] >= this->pos) {
-      /* entirely on the right-hand side */
-      right_bounds.grow(prim_bounds);
-      swap(refs[i--], refs[--right_start]);
-    }
-  }
-
-  /* Duplicate or unsplit references intersecting both sides.
-   *
-   * Duplication happens into a temporary pre-allocated vector in order to
-   * reduce number of memmove() calls happening in vector.insert().
-   */
-  vector<BVHReference> &new_refs = storage_->new_references;
-  new_refs.clear();
-  new_refs.reserve(right_start - left_end);
-  while (left_end < right_start) {
-    /* split reference. */
-    BVHReference curr_ref(get_prim_bounds(refs[left_end]),
-                          refs[left_end].prim_index(),
-                          refs[left_end].prim_object(),
-                          refs[left_end].prim_type());
-    BVHReference lref, rref;
-    split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos);
-
-    /* compute SAH for duplicate/unsplit candidates. */
-    BoundBox lub = left_bounds;   // Unsplit to left:     new left-hand bounds.
-    BoundBox rub = right_bounds;  // Unsplit to right:    new right-hand bounds.
-    BoundBox ldb = left_bounds;   // Duplicate:           new left-hand bounds.
-    BoundBox rdb = right_bounds;  // Duplicate:           new right-hand bounds.
-
-    lub.grow(curr_ref.bounds());
-    rub.grow(curr_ref.bounds());
-    ldb.grow(lref.bounds());
-    rdb.grow(rref.bounds());
-
-    float lac = builder->params.primitive_cost(left_end - left_start);
-    float rac = builder->params.primitive_cost(right_end - right_start);
-    float lbc = builder->params.primitive_cost(left_end - left_start + 1);
-    float rbc = builder->params.primitive_cost(right_end - right_start + 1);
-
-    float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
-    float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
-    float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
-    float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
-
-    if (minSAH == unsplitLeftSAH) {
-      /* unsplit to left */
-      left_bounds = lub;
-      left_end++;
-    }
-    else if (minSAH == unsplitRightSAH) {
-      /* unsplit to right */
-      right_bounds = rub;
-      swap(refs[left_end], refs[--right_start]);
-    }
-    else {
-      /* duplicate */
-      left_bounds = ldb;
-      right_bounds = rdb;
-      refs[left_end++] = lref;
-      new_refs.push_back(rref);
-      right_end++;
-    }
-  }
-  /* Insert duplicated references into actual array in one go. */
-  if (new_refs.size() != 0) {
-    refs.insert(refs.begin() + (right_end - new_refs.size()), new_refs.begin(), new_refs.end());
-  }
-  if (aligned_space_ != NULL) {
-    left_bounds = right_bounds = BoundBox::empty;
-    for (int i = left_start; i < left_end - left_start; ++i) {
-      BoundBox prim_boundbox = references_->at(i).bounds();
-      left_bounds.grow(prim_boundbox);
-    }
-    for (int i = right_start; i < right_end - right_start; ++i) {
-      BoundBox prim_boundbox = references_->at(i).bounds();
-      right_bounds.grow(prim_boundbox);
-    }
-  }
-  left = BVHRange(left_bounds, left_start, left_end - left_start);
-  right = BVHRange(right_bounds, right_start, right_end - right_start);
-}
-
-void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
-                                               const Transform *tfm,
-                                               int prim_index,
-                                               int dim,
-                                               float pos,
-                                               BoundBox &left_bounds,
-                                               BoundBox &right_bounds)
-{
-  Mesh::Triangle t = mesh->get_triangle(prim_index);
-  const float3 *verts = &mesh->verts[0];
-  float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
-  v1 = get_unaligned_point(v1);
-
-  for (int i = 0; i < 3; i++) {
-    float3 v0 = v1;
-    int vindex = t.v[i];
-    v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
-    v1 = get_unaligned_point(v1);
-    float v0p = v0[dim];
-    float v1p = v1[dim];
-
-    /* insert vertex to the boxes it belongs to. */
-    if (v0p <= pos)
-      left_bounds.grow(v0);
-
-    if (v0p >= pos)
-      right_bounds.grow(v0);
-
-    /* edge intersects the plane => insert intersection to both boxes. */
-    if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
-      float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
-      left_bounds.grow(t);
-      right_bounds.grow(t);
-    }
-  }
-}
-
-void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
-                                            const Transform *tfm,
-                                            int prim_index,
-                                            int segment_index,
-                                            int dim,
-                                            float pos,
-                                            BoundBox &left_bounds,
-                                            BoundBox &right_bounds)
-{
-  /* curve split: NOTE - Currently ignores curve width and needs to be fixed. */
-  Hair::Curve curve = hair->get_curve(prim_index);
-  const int k0 = curve.first_key + segment_index;
-  const int k1 = k0 + 1;
-  float3 v0 = hair->get_curve_keys()[k0];
-  float3 v1 = hair->get_curve_keys()[k1];
-
-  if (tfm != NULL) {
-    v0 = transform_point(tfm, v0);
-    v1 = transform_point(tfm, v1);
-  }
-  v0 = get_unaligned_point(v0);
-  v1 = get_unaligned_point(v1);
-
-  float v0p = v0[dim];
-  float v1p = v1[dim];
-
-  /* insert vertex to the boxes it belongs to. */
-  if (v0p <= pos)
-    left_bounds.grow(v0);
-
-  if (v0p >= pos)
-    right_bounds.grow(v0);
-
-  if (v1p <= pos)
-    left_bounds.grow(v1);
-
-  if (v1p >= pos)
-    right_bounds.grow(v1);
-
-  /* edge intersects the plane => insert intersection to both boxes. */
-  if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
-    float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
-    left_bounds.grow(t);
-    right_bounds.grow(t);
-  }
-}
-
-void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
-                                               const Mesh *mesh,
-                                               int dim,
-                                               float pos,
-                                               BoundBox &left_bounds,
-                                               BoundBox &right_bounds)
-{
-  split_triangle_primitive(mesh, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
-}
-
-void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
-                                            const Hair *hair,
-                                            int dim,
-                                            float pos,
-                                            BoundBox &left_bounds,
-                                            BoundBox &right_bounds)
-{
-  split_curve_primitive(hair,
-                        NULL,
-                        ref.prim_index(),
-                        PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
-                        dim,
-                        pos,
-                        left_bounds,
-                        right_bounds);
-}
-
-void BVHSpatialSplit::split_object_reference(
-    const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
-{
-  Geometry *geom = object->get_geometry();
-
-  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
-    Mesh *mesh = static_cast<Mesh *>(geom);
-    for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
-      split_triangle_primitive(
-          mesh, &object->get_tfm(), tri_idx, dim, pos, left_bounds, right_bounds);
-    }
-  }
-  else if (geom->geometry_type == Geometry::HAIR) {
-    Hair *hair = static_cast<Hair *>(geom);
-    for (int curve_idx = 0; curve_idx < hair->num_curves(); ++curve_idx) {
-      Hair::Curve curve = hair->get_curve(curve_idx);
-      for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
-        split_curve_primitive(
-            hair, &object->get_tfm(), curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
-      }
-    }
-  }
-}
-
-void BVHSpatialSplit::split_reference(const BVHBuild &builder,
-                                      BVHReference &left,
-                                      BVHReference &right,
-                                      const BVHReference &ref,
-                                      int dim,
-                                      float pos)
-{
-  /* initialize boundboxes */
-  BoundBox left_bounds = BoundBox::empty;
-  BoundBox right_bounds = BoundBox::empty;
-
-  /* loop over vertices/edges. */
-  const Object *ob = builder.objects[ref.prim_object()];
-
-  if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
-    Mesh *mesh = static_cast<Mesh *>(ob->get_geometry());
-    split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
-  }
-  else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
-    Hair *hair = static_cast<Hair *>(ob->get_geometry());
-    split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
-  }
-  else {
-    split_object_reference(ob, dim, pos, left_bounds, right_bounds);
-  }
-
-  /* intersect with original bounds. */
-  left_bounds.max[dim] = pos;
-  right_bounds.min[dim] = pos;
-
-  left_bounds.intersect(ref.bounds());
-  right_bounds.intersect(ref.bounds());
-
-  /* set references */
-  left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
-  right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
deleted file mode 100644
index 5582d90bf83..00000000000
--- a/intern/cycles/bvh/bvh_split.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_SPLIT_H__
-#define __BVH_SPLIT_H__
-
-#include "bvh/bvh_build.h"
-#include "bvh/bvh_params.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHBuild;
-class Hair;
-class Mesh;
-struct Transform;
-
-/* Object Split */
-
-class BVHObjectSplit {
- public:
-  float sah;
-  int dim;
-  int num_left;
-  BoundBox left_bounds;
-  BoundBox right_bounds;
-
-  BVHObjectSplit()
-  {
-  }
-  BVHObjectSplit(BVHBuild *builder,
-                 BVHSpatialStorage *storage,
-                 const BVHRange &range,
-                 vector<BVHReference> &references,
-                 float nodeSAH,
-                 const BVHUnaligned *unaligned_heuristic = NULL,
-                 const Transform *aligned_space = NULL);
-
-  void split(BVHRange &left, BVHRange &right, const BVHRange &range);
-
- protected:
-  BVHSpatialStorage *storage_;
-  vector<BVHReference> *references_;
-  const BVHUnaligned *unaligned_heuristic_;
-  const Transform *aligned_space_;
-
-  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
-  {
-    if (aligned_space_ == NULL) {
-      return prim.bounds();
-    }
-    else {
-      return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
-    }
-  }
-};
-
-/* Spatial Split */
-
-class BVHSpatialSplit {
- public:
-  float sah;
-  int dim;
-  float pos;
-
-  BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f), storage_(NULL), references_(NULL)
-  {
-  }
-  BVHSpatialSplit(const BVHBuild &builder,
-                  BVHSpatialStorage *storage,
-                  const BVHRange &range,
-                  vector<BVHReference> &references,
-                  float nodeSAH,
-                  const BVHUnaligned *unaligned_heuristic = NULL,
-                  const Transform *aligned_space = NULL);
-
-  void split(BVHBuild *builder, BVHRange &left, BVHRange &right, const BVHRange &range);
-
-  void split_reference(const BVHBuild &builder,
-                       BVHReference &left,
-                       BVHReference &right,
-                       const BVHReference &ref,
-                       int dim,
-                       float pos);
-
- protected:
-  BVHSpatialStorage *storage_;
-  vector<BVHReference> *references_;
-  const BVHUnaligned *unaligned_heuristic_;
-  const Transform *aligned_space_;
-
-  /* Lower-level functions which calculates boundaries of left and right nodes
-   * needed for spatial split.
-   *
-   * Operates directly with primitive specified by its index, reused by higher
-   * level splitting functions.
-   */
-  void split_triangle_primitive(const Mesh *mesh,
-                                const Transform *tfm,
-                                int prim_index,
-                                int dim,
-                                float pos,
-                                BoundBox &left_bounds,
-                                BoundBox &right_bounds);
-  void split_curve_primitive(const Hair *hair,
-                             const Transform *tfm,
-                             int prim_index,
-                             int segment_index,
-                             int dim,
-                             float pos,
-                             BoundBox &left_bounds,
-                             BoundBox &right_bounds);
-
-  /* Lower-level functions which calculates boundaries of left and right nodes
-   * needed for spatial split.
-   *
-   * Operates with BVHReference, internally uses lower level API functions.
-   */
-  void split_triangle_reference(const BVHReference &ref,
-                                const Mesh *mesh,
-                                int dim,
-                                float pos,
-                                BoundBox &left_bounds,
-                                BoundBox &right_bounds);
-  void split_curve_reference(const BVHReference &ref,
-                             const Hair *hair,
-                             int dim,
-                             float pos,
-                             BoundBox &left_bounds,
-                             BoundBox &right_bounds);
-  void split_object_reference(
-      const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
-
-  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
-  {
-    if (aligned_space_ == NULL) {
-      return prim.bounds();
-    }
-    else {
-      return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
-    }
-  }
-
-  __forceinline float3 get_unaligned_point(const float3 &point) const
-  {
-    if (aligned_space_ == NULL) {
-      return point;
-    }
-    else {
-      return transform_point(aligned_space_, point);
-    }
-  }
-};
-
-/* Mixed Object-Spatial Split */
-
-class BVHMixedSplit {
- public:
-  BVHObjectSplit object;
-  BVHSpatialSplit spatial;
-
-  float leafSAH;
-  float nodeSAH;
-  float minSAH;
-
-  bool no_split;
-
-  BoundBox bounds;
-
-  BVHMixedSplit()
-  {
-  }
-
-  __forceinline BVHMixedSplit(BVHBuild *builder,
-                              BVHSpatialStorage *storage,
-                              const BVHRange &range,
-                              vector<BVHReference> &references,
-                              int level,
-                              const BVHUnaligned *unaligned_heuristic = NULL,
-                              const Transform *aligned_space = NULL)
-  {
-    if (aligned_space == NULL) {
-      bounds = range.bounds();
-    }
-    else {
-      bounds = unaligned_heuristic->compute_aligned_boundbox(
-          range, &references.at(0), *aligned_space);
-    }
-    /* find split candidates. */
-    float area = bounds.safe_area();
-
-    leafSAH = area * builder->params.primitive_cost(range.size());
-    nodeSAH = area * builder->params.node_cost(2);
-
-    object = BVHObjectSplit(
-        builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
-
-    if (builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
-      BoundBox overlap = object.left_bounds;
-      overlap.intersect(object.right_bounds);
-
-      if (overlap.safe_area() >= builder->spatial_min_overlap) {
-        spatial = BVHSpatialSplit(
-            *builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
-      }
-    }
-
-    /* leaf SAH is the lowest => create leaf. */
-    minSAH = min(min(leafSAH, object.sah), spatial.sah);
-    no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, references));
-  }
-
-  __forceinline void split(BVHBuild *builder,
-                           BVHRange &left,
-                           BVHRange &right,
-                           const BVHRange &range)
-  {
-    if (builder->params.use_spatial_split && minSAH == spatial.sah)
-      spatial.split(builder, left, right, range);
-    if (!left.size() || !right.size())
-      object.split(left, right, range);
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_SPLIT_H__ */
diff --git a/intern/cycles/bvh/bvh_unaligned.cpp b/intern/cycles/bvh/bvh_unaligned.cpp
deleted file mode 100644
index ce95aa7aa74..00000000000
--- a/intern/cycles/bvh/bvh_unaligned.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh_unaligned.h"
-
-#include "scene/hair.h"
-#include "scene/object.h"
-
-#include "bvh/bvh_binning.h"
-#include "bvh_params.h"
-
-#include "util/util_boundbox.h"
-#include "util/util_transform.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVHUnaligned::BVHUnaligned(const vector<Object *> &objects) : objects_(objects)
-{
-}
-
-Transform BVHUnaligned::compute_aligned_space(const BVHObjectBinning &range,
-                                              const BVHReference *references) const
-{
-  for (int i = range.start(); i < range.end(); ++i) {
-    const BVHReference &ref = references[i];
-    Transform aligned_space;
-    /* Use first primitive which defines correct direction to define
-     * the orientation space.
-     */
-    if (compute_aligned_space(ref, &aligned_space)) {
-      return aligned_space;
-    }
-  }
-  return transform_identity();
-}
-
-Transform BVHUnaligned::compute_aligned_space(const BVHRange &range,
-                                              const BVHReference *references) const
-{
-  for (int i = range.start(); i < range.end(); ++i) {
-    const BVHReference &ref = references[i];
-    Transform aligned_space;
-    /* Use first primitive which defines correct direction to define
-     * the orientation space.
-     */
-    if (compute_aligned_space(ref, &aligned_space)) {
-      return aligned_space;
-    }
-  }
-  return transform_identity();
-}
-
-bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const
-{
-  const Object *object = objects_[ref.prim_object()];
-  const int packed_type = ref.prim_type();
-  const int type = (packed_type & PRIMITIVE_ALL);
-  /* No motion blur curves here, we can't fit them to aligned boxes well. */
-  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
-    const int curve_index = ref.prim_index();
-    const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
-    const Hair *hair = static_cast<const Hair *>(object->get_geometry());
-    const Hair::Curve &curve = hair->get_curve(curve_index);
-    const int key = curve.first_key + segment;
-    const float3 v1 = hair->get_curve_keys()[key], v2 = hair->get_curve_keys()[key + 1];
-    float length;
-    const float3 axis = normalize_len(v2 - v1, &length);
-    if (length > 1e-6f) {
-      *aligned_space = make_transform_frame(axis);
-      return true;
-    }
-  }
-  *aligned_space = transform_identity();
-  return false;
-}
-
-BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
-                                                     const Transform &aligned_space) const
-{
-  BoundBox bounds = BoundBox::empty;
-  const Object *object = objects_[prim.prim_object()];
-  const int packed_type = prim.prim_type();
-  const int type = (packed_type & PRIMITIVE_ALL);
-  /* No motion blur curves here, we can't fit them to aligned boxes well. */
-  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
-    const int curve_index = prim.prim_index();
-    const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
-    const Hair *hair = static_cast<const Hair *>(object->get_geometry());
-    const Hair::Curve &curve = hair->get_curve(curve_index);
-    curve.bounds_grow(
-        segment, &hair->get_curve_keys()[0], &hair->get_curve_radius()[0], aligned_space, bounds);
-  }
-  else {
-    bounds = prim.bounds().transformed(&aligned_space);
-  }
-  return bounds;
-}
-
-BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHObjectBinning &range,
-                                                const BVHReference *references,
-                                                const Transform &aligned_space,
-                                                BoundBox *cent_bounds) const
-{
-  BoundBox bounds = BoundBox::empty;
-  if (cent_bounds != NULL) {
-    *cent_bounds = BoundBox::empty;
-  }
-  for (int i = range.start(); i < range.end(); ++i) {
-    const BVHReference &ref = references[i];
-    BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
-    bounds.grow(ref_bounds);
-    if (cent_bounds != NULL) {
-      cent_bounds->grow(ref_bounds.center2());
-    }
-  }
-  return bounds;
-}
-
-BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHRange &range,
-                                                const BVHReference *references,
-                                                const Transform &aligned_space,
-                                                BoundBox *cent_bounds) const
-{
-  BoundBox bounds = BoundBox::empty;
-  if (cent_bounds != NULL) {
-    *cent_bounds = BoundBox::empty;
-  }
-  for (int i = range.start(); i < range.end(); ++i) {
-    const BVHReference &ref = references[i];
-    BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
-    bounds.grow(ref_bounds);
-    if (cent_bounds != NULL) {
-      cent_bounds->grow(ref_bounds.center2());
-    }
-  }
-  return bounds;
-}
-
-Transform BVHUnaligned::compute_node_transform(const BoundBox &bounds,
-                                               const Transform &aligned_space)
-{
-  Transform space = aligned_space;
-  space.x.w -= bounds.min.x;
-  space.y.w -= bounds.min.y;
-  space.z.w -= bounds.min.z;
-  float3 dim = bounds.max - bounds.min;
-  return transform_scale(
-             1.0f / max(1e-18f, dim.x), 1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.z)) *
-         space;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_unaligned.h b/intern/cycles/bvh/bvh_unaligned.h
deleted file mode 100644
index e8a9a25daa8..00000000000
--- a/intern/cycles/bvh/bvh_unaligned.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH_UNALIGNED_H__
-#define __BVH_UNALIGNED_H__
-
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BoundBox;
-class BVHObjectBinning;
-class BVHRange;
-class BVHReference;
-struct Transform;
-class Object;
-
-/* Helper class to perform calculations needed for unaligned nodes. */
-class BVHUnaligned {
- public:
-  BVHUnaligned(const vector<Object *> &objects);
-
-  /* Calculate alignment for the oriented node for a given range. */
-  Transform compute_aligned_space(const BVHObjectBinning &range,
-                                  const BVHReference *references) const;
-  Transform compute_aligned_space(const BVHRange &range, const BVHReference *references) const;
-
-  /* Calculate alignment for the oriented node for a given reference.
-   *
-   * Return true when space was calculated successfully.
-   */
-  bool compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const;
-
-  /* Calculate primitive's bounding box in given space. */
-  BoundBox compute_aligned_prim_boundbox(const BVHReference &prim,
-                                         const Transform &aligned_space) const;
-
-  /* Calculate bounding box in given space. */
-  BoundBox compute_aligned_boundbox(const BVHObjectBinning &range,
-                                    const BVHReference *references,
-                                    const Transform &aligned_space,
-                                    BoundBox *cent_bounds = NULL) const;
-  BoundBox compute_aligned_boundbox(const BVHRange &range,
-                                    const BVHReference *references,
-                                    const Transform &aligned_space,
-                                    BoundBox *cent_bounds = NULL) const;
-
-  /* Calculate affine transform for node packing.
-   * Bounds will be in the range of 0..1.
-   */
-  static Transform compute_node_transform(const BoundBox &bounds, const Transform &aligned_space);
-
- protected:
-  /* List of objects BVH is being created for. */
-  const vector<Object *> &objects_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH_UNALIGNED_H__ */
diff --git a/intern/cycles/bvh/embree.cpp b/intern/cycles/bvh/embree.cpp
new file mode 100644
index 00000000000..944a84ce0da
--- /dev/null
+++ b/intern/cycles/bvh/embree.cpp
@@ -0,0 +1,728 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This class implements a ray accelerator for Cycles using Intel's Embree library.
+ * It supports triangles, curves, object and deformation blur and instancing.
+ *
+ * Since Embree allows object to be either curves or triangles but not both, Cycles object IDs are
+ * mapped to Embree IDs by multiplying by two and adding one for curves.
+ *
+ * This implementation shares RTCDevices between Cycles instances. Eventually each instance should
+ * get a separate RTCDevice to correctly keep track of memory usage.
+ *
+ * Vertex and index buffers are duplicated between Cycles device arrays and Embree. These could be
+ * merged, which would require changes to intersection refinement, shader setup, mesh light
+ * sampling and a few other places in Cycles where direct access to vertex data is required.
+ */
+
+#ifdef WITH_EMBREE
+
+#  include <embree3/rtcore_geometry.h>
+
+#  include "bvh/embree.h"
+
+/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
+ */
+#  include "kernel/bvh/embree.h"
+#  include "kernel/bvh/util.h"
+#  include "kernel/device/cpu/compat.h"
+#  include "kernel/device/cpu/globals.h"
+#  include "kernel/sample/lcg.h"
+
+#  include "scene/hair.h"
+#  include "scene/mesh.h"
+#  include "scene/object.h"
+
+#  include "util/foreach.h"
+#  include "util/log.h"
+#  include "util/progress.h"
+#  include "util/stats.h"
+
+CCL_NAMESPACE_BEGIN
+
+static_assert(Object::MAX_MOTION_STEPS <= RTC_MAX_TIME_STEP_COUNT,
+              "Object and Embree max motion steps inconsistent");
+static_assert(Object::MAX_MOTION_STEPS == Geometry::MAX_MOTION_STEPS,
+              "Object and Geometry max motion steps inconsistent");
+
+#  define IS_HAIR(x) (x & 1)
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls.
+ */
+static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
+{
+  /* Current implementation in Cycles assumes only single-ray intersection queries. */
+  assert(args->N == 1);
+
+  const RTCRay *ray = (RTCRay *)args->ray;
+  RTCHit *hit = (RTCHit *)args->hit;
+  CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+  const KernelGlobalsCPU *kg = ctx->kg;
+
+  switch (ctx->type) {
+    case CCLIntersectContext::RAY_SHADOW_ALL: {
+      Intersection current_isect;
+      kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+
+      /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
+      const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
+      if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
+        ctx->opaque_hit = true;
+        return;
+      }
+
+      ++ctx->num_hits;
+
+      /* Always use baked shadow transparency for curves. */
+      if (current_isect.type & PRIMITIVE_ALL_CURVE) {
+        ctx->throughput *= intersection_curve_shadow_transparency(
+            kg, current_isect.object, current_isect.prim, current_isect.u);
+
+        if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+          ctx->opaque_hit = true;
+          return;
+        }
+        else {
+          *args->valid = 0;
+          return;
+        }
+      }
+
+      /* Test if we need to record this transparent intersection. */
+      const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+      if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
+        /* If maximum number of hits was reached, replace the intersection with the
+         * highest distance. We want to find the N closest intersections. */
+        const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
+        uint isect_index = num_recorded_hits;
+        if (num_recorded_hits + 1 >= max_record_hits) {
+          float max_t = ctx->isect_s[0].t;
+          uint max_recorded_hit = 0;
+
+          for (uint i = 1; i < num_recorded_hits; ++i) {
+            if (ctx->isect_s[i].t > max_t) {
+              max_recorded_hit = i;
+              max_t = ctx->isect_s[i].t;
+            }
+          }
+
+          if (num_recorded_hits >= max_record_hits) {
+            isect_index = max_recorded_hit;
+          }
+
+          /* Limit the ray distance and stop counting hits beyond this.
+           * TODO: is there some way we can tell Embree to stop intersecting beyond
+           * this distance when max number of hits is reached?. Or maybe it will
+           * become irrelevant if we make max_hits a very high number on the CPU. */
+          ctx->max_t = max(current_isect.t, max_t);
+        }
+
+        ctx->isect_s[isect_index] = current_isect;
+      }
+
+      /* Always increase the number of recorded hits, even beyond the maximum,
+       * so that we can detect this and trace another ray if needed. */
+      ++ctx->num_recorded_hits;
+
+      /* This tells Embree to continue tracing. */
+      *args->valid = 0;
+      break;
+    }
+    case CCLIntersectContext::RAY_LOCAL:
+    case CCLIntersectContext::RAY_SSS: {
+      /* Check if it's hitting the correct object. */
+      Intersection current_isect;
+      if (ctx->type == CCLIntersectContext::RAY_SSS) {
+        kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
+      }
+      else {
+        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+        if (ctx->local_object_id != current_isect.object) {
+          /* This tells Embree to continue tracing. */
+          *args->valid = 0;
+          break;
+        }
+      }
+
+      /* No intersection information requested, just return a hit. */
+      if (ctx->max_hits == 0) {
+        break;
+      }
+
+      /* Ignore curves. */
+      if (IS_HAIR(hit->geomID)) {
+        /* This tells Embree to continue tracing. */
+        *args->valid = 0;
+        break;
+      }
+
+      LocalIntersection *local_isect = ctx->local_isect;
+      int hit_idx = 0;
+
+      if (ctx->lcg_state) {
+        /* See triangle_intersect_subsurface() for the native equivalent. */
+        for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+          if (local_isect->hits[i].t == ray->tfar) {
+            /* This tells Embree to continue tracing. */
+            *args->valid = 0;
+            return;
+          }
+        }
+
+        local_isect->num_hits++;
+
+        if (local_isect->num_hits <= ctx->max_hits) {
+          hit_idx = local_isect->num_hits - 1;
+        }
+        else {
+          /* reservoir sampling: if we are at the maximum number of
+           * hits, randomly replace element or skip it */
+          hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
+
+          if (hit_idx >= ctx->max_hits) {
+            /* This tells Embree to continue tracing. */
+            *args->valid = 0;
+            return;
+          }
+        }
+      }
+      else {
+        /* Record closest intersection only. */
+        if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
+          *args->valid = 0;
+          return;
+        }
+
+        local_isect->num_hits = 1;
+      }
+
+      /* record intersection */
+      local_isect->hits[hit_idx] = current_isect;
+      local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
+      /* This tells Embree to continue tracing. */
+      *args->valid = 0;
+      break;
+    }
+    case CCLIntersectContext::RAY_VOLUME_ALL: {
+      /* Append the intersection to the end of the array. */
+      if (ctx->num_hits < ctx->max_hits) {
+        Intersection current_isect;
+        kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+        Intersection *isect = &ctx->isect_s[ctx->num_hits];
+        ++ctx->num_hits;
+        *isect = current_isect;
+        /* Only primitives from volume object. */
+        uint tri_object = isect->object;
+        int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+        if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+          --ctx->num_hits;
+        }
+        /* This tells Embree to continue tracing. */
+        *args->valid = 0;
+        break;
+      }
+    }
+    case CCLIntersectContext::RAY_REGULAR:
+    default:
+      /* Nothing to do here. */
+      break;
+  }
+}
+
+static void rtc_filter_func_thick_curve(const RTCFilterFunctionNArguments *args)
+{
+  const RTCRay *ray = (RTCRay *)args->ray;
+  RTCHit *hit = (RTCHit *)args->hit;
+
+  /* Always ignore back-facing intersections. */
+  if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+          make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+    *args->valid = 0;
+    return;
+  }
+}
+
+static void rtc_filter_occluded_func_thick_curve(const RTCFilterFunctionNArguments *args)
+{
+  const RTCRay *ray = (RTCRay *)args->ray;
+  RTCHit *hit = (RTCHit *)args->hit;
+
+  /* Always ignore back-facing intersections. */
+  if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+          make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+    *args->valid = 0;
+    return;
+  }
+
+  rtc_filter_occluded_func(args);
+}
+
+static size_t unaccounted_mem = 0;
+
+static bool rtc_memory_monitor_func(void *userPtr, const ssize_t bytes, const bool)
+{
+  Stats *stats = (Stats *)userPtr;
+  if (stats) {
+    if (bytes > 0) {
+      stats->mem_alloc(bytes);
+    }
+    else {
+      stats->mem_free(-bytes);
+    }
+  }
+  else {
+    /* A stats pointer may not yet be available. Keep track of the memory usage for later. */
+    if (bytes >= 0) {
+      atomic_add_and_fetch_z(&unaccounted_mem, bytes);
+    }
+    else {
+      atomic_sub_and_fetch_z(&unaccounted_mem, -bytes);
+    }
+  }
+  return true;
+}
+
+static void rtc_error_func(void *, enum RTCError, const char *str)
+{
+  VLOG(1) << str;
+}
+
+static double progress_start_time = 0.0f;
+
+static bool rtc_progress_func(void *user_ptr, const double n)
+{
+  Progress *progress = (Progress *)user_ptr;
+
+  if (time_dt() - progress_start_time < 0.25) {
+    return true;
+  }
+
+  string msg = string_printf("Building BVH %.0f%%", n * 100.0);
+  progress->set_substatus(msg);
+  progress_start_time = time_dt();
+
+  return !progress->get_cancel();
+}
+
+BVHEmbree::BVHEmbree(const BVHParams &params_,
+                     const vector<Geometry *> &geometry_,
+                     const vector<Object *> &objects_)
+    : BVH(params_, geometry_, objects_),
+      scene(NULL),
+      rtc_device(NULL),
+      build_quality(RTC_BUILD_QUALITY_REFIT)
+{
+  SIMD_SET_FLUSH_TO_ZERO;
+}
+
+BVHEmbree::~BVHEmbree()
+{
+  if (scene) {
+    rtcReleaseScene(scene);
+  }
+}
+
+void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
+{
+  rtc_device = rtc_device_;
+  assert(rtc_device);
+
+  rtcSetDeviceErrorFunction(rtc_device, rtc_error_func, NULL);
+  rtcSetDeviceMemoryMonitorFunction(rtc_device, rtc_memory_monitor_func, stats);
+
+  progress.set_substatus("Building BVH");
+
+  if (scene) {
+    rtcReleaseScene(scene);
+    scene = NULL;
+  }
+
+  const bool dynamic = params.bvh_type == BVH_TYPE_DYNAMIC;
+
+  scene = rtcNewScene(rtc_device);
+  const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |
+                                    RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST;
+  rtcSetSceneFlags(scene, scene_flags);
+  build_quality = dynamic ? RTC_BUILD_QUALITY_LOW :
+                            (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH :
+                                                        RTC_BUILD_QUALITY_MEDIUM);
+  rtcSetSceneBuildQuality(scene, build_quality);
+
+  int i = 0;
+  foreach (Object *ob, objects) {
+    if (params.top_level) {
+      if (!ob->is_traceable()) {
+        ++i;
+        continue;
+      }
+      if (!ob->get_geometry()->is_instanced()) {
+        add_object(ob, i);
+      }
+      else {
+        add_instance(ob, i);
+      }
+    }
+    else {
+      add_object(ob, i);
+    }
+    ++i;
+    if (progress.get_cancel())
+      return;
+  }
+
+  if (progress.get_cancel()) {
+    return;
+  }
+
+  rtcSetSceneProgressMonitorFunction(scene, rtc_progress_func, &progress);
+  rtcCommitScene(scene);
+}
+
+void BVHEmbree::add_object(Object *ob, int i)
+{
+  Geometry *geom = ob->get_geometry();
+
+  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
+    Mesh *mesh = static_cast<Mesh *>(geom);
+    if (mesh->num_triangles() > 0) {
+      add_triangles(ob, mesh, i);
+    }
+  }
+  else if (geom->geometry_type == Geometry::HAIR) {
+    Hair *hair = static_cast<Hair *>(geom);
+    if (hair->num_curves() > 0) {
+      add_curves(ob, hair, i);
+    }
+  }
+}
+
+void BVHEmbree::add_instance(Object *ob, int i)
+{
+  BVHEmbree *instance_bvh = (BVHEmbree *)(ob->get_geometry()->bvh);
+  assert(instance_bvh != NULL);
+
+  const size_t num_object_motion_steps = ob->use_motion() ? ob->get_motion().size() : 1;
+  const size_t num_motion_steps = min(num_object_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+  assert(num_object_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+
+  RTCGeometry geom_id = rtcNewGeometry(rtc_device, RTC_GEOMETRY_TYPE_INSTANCE);
+  rtcSetGeometryInstancedScene(geom_id, instance_bvh->scene);
+  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+  if (ob->use_motion()) {
+    array<DecomposedTransform> decomp(ob->get_motion().size());
+    transform_motion_decompose(decomp.data(), ob->get_motion().data(), ob->get_motion().size());
+    for (size_t step = 0; step < num_motion_steps; ++step) {
+      RTCQuaternionDecomposition rtc_decomp;
+      rtcInitQuaternionDecomposition(&rtc_decomp);
+      rtcQuaternionDecompositionSetQuaternion(
+          &rtc_decomp, decomp[step].x.w, decomp[step].x.x, decomp[step].x.y, decomp[step].x.z);
+      rtcQuaternionDecompositionSetScale(
+          &rtc_decomp, decomp[step].y.w, decomp[step].z.w, decomp[step].w.w);
+      rtcQuaternionDecompositionSetTranslation(
+          &rtc_decomp, decomp[step].y.x, decomp[step].y.y, decomp[step].y.z);
+      rtcQuaternionDecompositionSetSkew(
+          &rtc_decomp, decomp[step].z.x, decomp[step].z.y, decomp[step].w.x);
+      rtcSetGeometryTransformQuaternion(geom_id, step, &rtc_decomp);
+    }
+  }
+  else {
+    rtcSetGeometryTransform(
+        geom_id, 0, RTC_FORMAT_FLOAT3X4_ROW_MAJOR, (const float *)&ob->get_tfm());
+  }
+
+  rtcSetGeometryUserData(geom_id, (void *)instance_bvh->scene);
+  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
+
+  rtcCommitGeometry(geom_id);
+  rtcAttachGeometryByID(scene, geom_id, i * 2);
+  rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i)
+{
+  size_t prim_offset = mesh->prim_offset;
+
+  const Attribute *attr_mP = NULL;
+  size_t num_motion_steps = 1;
+  if (mesh->has_motion_blur()) {
+    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+    if (attr_mP) {
+      num_motion_steps = mesh->get_motion_steps();
+    }
+  }
+
+  assert(num_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+  num_motion_steps = min(num_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+
+  const size_t num_triangles = mesh->num_triangles();
+
+  RTCGeometry geom_id = rtcNewGeometry(rtc_device, RTC_GEOMETRY_TYPE_TRIANGLE);
+  rtcSetGeometryBuildQuality(geom_id, build_quality);
+  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+  unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
+      geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, sizeof(int) * 3, num_triangles);
+  assert(rtc_indices);
+  if (!rtc_indices) {
+    VLOG(1) << "Embree could not create new geometry buffer for mesh " << mesh->name.c_str()
+            << ".\n";
+    return;
+  }
+  for (size_t j = 0; j < num_triangles; ++j) {
+    Mesh::Triangle t = mesh->get_triangle(j);
+    rtc_indices[j * 3] = t.v[0];
+    rtc_indices[j * 3 + 1] = t.v[1];
+    rtc_indices[j * 3 + 2] = t.v[2];
+  }
+
+  set_tri_vertex_buffer(geom_id, mesh, false);
+
+  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
+  rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
+
+  rtcCommitGeometry(geom_id);
+  rtcAttachGeometryByID(scene, geom_id, i * 2);
+  rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update)
+{
+  const Attribute *attr_mP = NULL;
+  size_t num_motion_steps = 1;
+  int t_mid = 0;
+  if (mesh->has_motion_blur()) {
+    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+    if (attr_mP) {
+      num_motion_steps = mesh->get_motion_steps();
+      t_mid = (num_motion_steps - 1) / 2;
+      if (num_motion_steps > RTC_MAX_TIME_STEP_COUNT) {
+        assert(0);
+        num_motion_steps = RTC_MAX_TIME_STEP_COUNT;
+      }
+    }
+  }
+  const size_t num_verts = mesh->get_verts().size();
+
+  for (int t = 0; t < num_motion_steps; ++t) {
+    const float3 *verts;
+    if (t == t_mid) {
+      verts = mesh->get_verts().data();
+    }
+    else {
+      int t_ = (t > t_mid) ? (t - 1) : t;
+      verts = &attr_mP->data_float3()[t_ * num_verts];
+    }
+
+    float *rtc_verts = (update) ?
+                           (float *)rtcGetGeometryBufferData(geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
+                           (float *)rtcSetNewGeometryBuffer(geom_id,
+                                                            RTC_BUFFER_TYPE_VERTEX,
+                                                            t,
+                                                            RTC_FORMAT_FLOAT3,
+                                                            sizeof(float) * 3,
+                                                            num_verts + 1);
+
+    assert(rtc_verts);
+    if (rtc_verts) {
+      for (size_t j = 0; j < num_verts; ++j) {
+        rtc_verts[0] = verts[j].x;
+        rtc_verts[1] = verts[j].y;
+        rtc_verts[2] = verts[j].z;
+        rtc_verts += 3;
+      }
+    }
+
+    if (update) {
+      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
+    }
+  }
+}
+
+void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update)
+{
+  const Attribute *attr_mP = NULL;
+  size_t num_motion_steps = 1;
+  if (hair->has_motion_blur()) {
+    attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+    if (attr_mP) {
+      num_motion_steps = hair->get_motion_steps();
+    }
+  }
+
+  const size_t num_curves = hair->num_curves();
+  size_t num_keys = 0;
+  for (size_t j = 0; j < num_curves; ++j) {
+    const Hair::Curve c = hair->get_curve(j);
+    num_keys += c.num_keys;
+  }
+
+  /* Catmull-Rom splines need extra CVs at the beginning and end of each curve. */
+  size_t num_keys_embree = num_keys;
+  num_keys_embree += num_curves * 2;
+
+  /* Copy the CV data to Embree */
+  const int t_mid = (num_motion_steps - 1) / 2;
+  const float *curve_radius = &hair->get_curve_radius()[0];
+  for (int t = 0; t < num_motion_steps; ++t) {
+    const float3 *verts;
+    if (t == t_mid || attr_mP == NULL) {
+      verts = &hair->get_curve_keys()[0];
+    }
+    else {
+      int t_ = (t > t_mid) ? (t - 1) : t;
+      verts = &attr_mP->data_float3()[t_ * num_keys];
+    }
+
+    float4 *rtc_verts = (update) ? (float4 *)rtcGetGeometryBufferData(
+                                       geom_id, RTC_BUFFER_TYPE_VERTEX, t) :
+                                   (float4 *)rtcSetNewGeometryBuffer(geom_id,
+                                                                     RTC_BUFFER_TYPE_VERTEX,
+                                                                     t,
+                                                                     RTC_FORMAT_FLOAT4,
+                                                                     sizeof(float) * 4,
+                                                                     num_keys_embree);
+
+    assert(rtc_verts);
+    if (rtc_verts) {
+      const size_t num_curves = hair->num_curves();
+      for (size_t j = 0; j < num_curves; ++j) {
+        Hair::Curve c = hair->get_curve(j);
+        int fk = c.first_key;
+        int k = 1;
+        for (; k < c.num_keys + 1; ++k, ++fk) {
+          rtc_verts[k] = float3_to_float4(verts[fk]);
+          rtc_verts[k].w = curve_radius[fk];
+        }
+        /* Duplicate Embree's Catmull-Rom spline CVs at the start and end of each curve. */
+        rtc_verts[0] = rtc_verts[1];
+        rtc_verts[k] = rtc_verts[k - 1];
+        rtc_verts += c.num_keys + 2;
+      }
+    }
+
+    if (update) {
+      rtcUpdateGeometryBuffer(geom_id, RTC_BUFFER_TYPE_VERTEX, t);
+    }
+  }
+}
+
+void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i)
+{
+  size_t prim_offset = hair->curve_segment_offset;
+
+  const Attribute *attr_mP = NULL;
+  size_t num_motion_steps = 1;
+  if (hair->has_motion_blur()) {
+    attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+    if (attr_mP) {
+      num_motion_steps = hair->get_motion_steps();
+    }
+  }
+
+  assert(num_motion_steps <= RTC_MAX_TIME_STEP_COUNT);
+  num_motion_steps = min(num_motion_steps, RTC_MAX_TIME_STEP_COUNT);
+
+  const size_t num_curves = hair->num_curves();
+  size_t num_segments = 0;
+  for (size_t j = 0; j < num_curves; ++j) {
+    Hair::Curve c = hair->get_curve(j);
+    assert(c.num_segments() > 0);
+    num_segments += c.num_segments();
+  }
+
+  enum RTCGeometryType type = (hair->curve_shape == CURVE_RIBBON ?
+                                   RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE :
+                                   RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE);
+
+  RTCGeometry geom_id = rtcNewGeometry(rtc_device, type);
+  rtcSetGeometryTessellationRate(geom_id, params.curve_subdivisions + 1);
+  unsigned *rtc_indices = (unsigned *)rtcSetNewGeometryBuffer(
+      geom_id, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT, sizeof(int), num_segments);
+  size_t rtc_index = 0;
+  for (size_t j = 0; j < num_curves; ++j) {
+    Hair::Curve c = hair->get_curve(j);
+    for (size_t k = 0; k < c.num_segments(); ++k) {
+      rtc_indices[rtc_index] = c.first_key + k;
+      /* Room for extra CVs at Catmull-Rom splines. */
+      rtc_indices[rtc_index] += j * 2;
+
+      ++rtc_index;
+    }
+  }
+
+  rtcSetGeometryBuildQuality(geom_id, build_quality);
+  rtcSetGeometryTimeStepCount(geom_id, num_motion_steps);
+
+  set_curve_vertex_buffer(geom_id, hair, false);
+
+  rtcSetGeometryUserData(geom_id, (void *)prim_offset);
+  if (hair->curve_shape == CURVE_RIBBON) {
+    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func);
+  }
+  else {
+    rtcSetGeometryIntersectFilterFunction(geom_id, rtc_filter_func_thick_curve);
+    rtcSetGeometryOccludedFilterFunction(geom_id, rtc_filter_occluded_func_thick_curve);
+  }
+  rtcSetGeometryMask(geom_id, ob->visibility_for_tracing());
+
+  rtcCommitGeometry(geom_id);
+  rtcAttachGeometryByID(scene, geom_id, i * 2 + 1);
+  rtcReleaseGeometry(geom_id);
+}
+
+void BVHEmbree::refit(Progress &progress)
+{
+  progress.set_substatus("Refitting BVH nodes");
+
+  /* Update all vertex buffers, then tell Embree to rebuild/-fit the BVHs. */
+  unsigned geom_id = 0;
+  foreach (Object *ob, objects) {
+    if (!params.top_level || (ob->is_traceable() && !ob->get_geometry()->is_instanced())) {
+      Geometry *geom = ob->get_geometry();
+
+      if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
+        Mesh *mesh = static_cast<Mesh *>(geom);
+        if (mesh->num_triangles() > 0) {
+          RTCGeometry geom = rtcGetGeometry(scene, geom_id);
+          set_tri_vertex_buffer(geom, mesh, true);
+          rtcSetGeometryUserData(geom, (void *)mesh->prim_offset);
+          rtcCommitGeometry(geom);
+        }
+      }
+      else if (geom->geometry_type == Geometry::HAIR) {
+        Hair *hair = static_cast<Hair *>(geom);
+        if (hair->num_curves() > 0) {
+          RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1);
+          set_curve_vertex_buffer(geom, hair, true);
+          rtcSetGeometryUserData(geom, (void *)hair->curve_segment_offset);
+          rtcCommitGeometry(geom);
+        }
+      }
+    }
+    geom_id += 2;
+  }
+
+  rtcCommitScene(scene);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_EMBREE */
diff --git a/intern/cycles/bvh/embree.h b/intern/cycles/bvh/embree.h
new file mode 100644
index 00000000000..746ca97b504
--- /dev/null
+++ b/intern/cycles/bvh/embree.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_EMBREE_H__
+#define __BVH_EMBREE_H__
+
+#ifdef WITH_EMBREE
+
+#  include <embree3/rtcore.h>
+#  include <embree3/rtcore_scene.h>
+
+#  include "bvh/bvh.h"
+#  include "bvh/params.h"
+
+#  include "util/thread.h"
+#  include "util/types.h"
+#  include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Hair;
+class Mesh;
+
+class BVHEmbree : public BVH {
+ public:
+  void build(Progress &progress, Stats *stats, RTCDevice rtc_device);
+  void refit(Progress &progress);
+
+  RTCScene scene;
+
+ protected:
+  friend class BVH;
+  BVHEmbree(const BVHParams &params,
+            const vector<Geometry *> &geometry,
+            const vector<Object *> &objects);
+  virtual ~BVHEmbree();
+
+  void add_object(Object *ob, int i);
+  void add_instance(Object *ob, int i);
+  void add_curves(const Object *ob, const Hair *hair, int i);
+  void add_triangles(const Object *ob, const Mesh *mesh, int i);
+
+ private:
+  void set_tri_vertex_buffer(RTCGeometry geom_id, const Mesh *mesh, const bool update);
+  void set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, const bool update);
+
+  RTCDevice rtc_device;
+  enum RTCBuildQuality build_quality;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_EMBREE */
+
+#endif /* __BVH_EMBREE_H__ */
diff --git a/intern/cycles/bvh/multi.cpp b/intern/cycles/bvh/multi.cpp
new file mode 100644
index 00000000000..db0ff5c7847
--- /dev/null
+++ b/intern/cycles/bvh/multi.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2020, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/multi.h"
+
+#include "util/foreach.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVHMulti::BVHMulti(const BVHParams &params_,
+                   const vector<Geometry *> &geometry_,
+                   const vector<Object *> &objects_)
+    : BVH(params_, geometry_, objects_)
+{
+}
+
+BVHMulti::~BVHMulti()
+{
+  foreach (BVH *bvh, sub_bvhs) {
+    delete bvh;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/multi.h b/intern/cycles/bvh/multi.h
new file mode 100644
index 00000000000..88a459605c3
--- /dev/null
+++ b/intern/cycles/bvh/multi.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2020, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_MULTI_H__
+#define __BVH_MULTI_H__
+
+#include "bvh/bvh.h"
+#include "bvh/params.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHMulti : public BVH {
+ public:
+  vector<BVH *> sub_bvhs;
+
+ protected:
+  friend class BVH;
+  BVHMulti(const BVHParams &params,
+           const vector<Geometry *> &geometry,
+           const vector<Object *> &objects);
+  virtual ~BVHMulti();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_MULTI_H__ */
diff --git a/intern/cycles/bvh/node.cpp b/intern/cycles/bvh/node.cpp
new file mode 100644
index 00000000000..d3a665adfe7
--- /dev/null
+++ b/intern/cycles/bvh/node.cpp
@@ -0,0 +1,224 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/node.h"
+
+#include "bvh/build.h"
+#include "bvh/bvh.h"
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* BVH Node */
+
+int BVHNode::getSubtreeSize(BVH_STAT stat) const
+{
+  int cnt = 0;
+
+  switch (stat) {
+    case BVH_STAT_NODE_COUNT:
+      cnt = 1;
+      break;
+    case BVH_STAT_LEAF_COUNT:
+      cnt = is_leaf() ? 1 : 0;
+      break;
+    case BVH_STAT_INNER_COUNT:
+      cnt = is_leaf() ? 0 : 1;
+      break;
+    case BVH_STAT_TRIANGLE_COUNT:
+      cnt = is_leaf() ? reinterpret_cast<const LeafNode *>(this)->num_triangles() : 0;
+      break;
+    case BVH_STAT_CHILDNODE_COUNT:
+      cnt = num_children();
+      break;
+    case BVH_STAT_ALIGNED_COUNT:
+      if (!is_unaligned) {
+        cnt = 1;
+      }
+      break;
+    case BVH_STAT_UNALIGNED_COUNT:
+      if (is_unaligned) {
+        cnt = 1;
+      }
+      break;
+    case BVH_STAT_ALIGNED_INNER_COUNT:
+      if (!is_leaf()) {
+        bool has_unaligned = false;
+        for (int j = 0; j < num_children(); j++) {
+          has_unaligned |= get_child(j)->is_unaligned;
+        }
+        cnt += has_unaligned ? 0 : 1;
+      }
+      break;
+    case BVH_STAT_UNALIGNED_INNER_COUNT:
+      if (!is_leaf()) {
+        bool has_unaligned = false;
+        for (int j = 0; j < num_children(); j++) {
+          has_unaligned |= get_child(j)->is_unaligned;
+        }
+        cnt += has_unaligned ? 1 : 0;
+      }
+      break;
+    case BVH_STAT_ALIGNED_LEAF_COUNT:
+      cnt = (is_leaf() && !is_unaligned) ? 1 : 0;
+      break;
+    case BVH_STAT_UNALIGNED_LEAF_COUNT:
+      cnt = (is_leaf() && is_unaligned) ? 1 : 0;
+      break;
+    case BVH_STAT_DEPTH:
+      if (is_leaf()) {
+        cnt = 1;
+      }
+      else {
+        for (int i = 0; i < num_children(); i++) {
+          cnt = max(cnt, get_child(i)->getSubtreeSize(stat));
+        }
+        cnt += 1;
+      }
+      return cnt;
+    default:
+      assert(0); /* unknown mode */
+  }
+
+  if (!is_leaf())
+    for (int i = 0; i < num_children(); i++)
+      cnt += get_child(i)->getSubtreeSize(stat);
+
+  return cnt;
+}
+
+void BVHNode::deleteSubtree()
+{
+  for (int i = 0; i < num_children(); i++)
+    if (get_child(i))
+      get_child(i)->deleteSubtree();
+
+  delete this;
+}
+
+float BVHNode::computeSubtreeSAHCost(const BVHParams &p, float probability) const
+{
+  float SAH = probability * p.cost(num_children(), num_triangles());
+
+  for (int i = 0; i < num_children(); i++) {
+    BVHNode *child = get_child(i);
+    SAH += child->computeSubtreeSAHCost(
+        p, probability * child->bounds.safe_area() / bounds.safe_area());
+  }
+
+  return SAH;
+}
+
+uint BVHNode::update_visibility()
+{
+  if (!is_leaf() && visibility == 0) {
+    InnerNode *inner = (InnerNode *)this;
+    BVHNode *child0 = inner->children[0];
+    BVHNode *child1 = inner->children[1];
+
+    visibility = child0->update_visibility() | child1->update_visibility();
+  }
+
+  return visibility;
+}
+
+void BVHNode::update_time()
+{
+  if (!is_leaf()) {
+    InnerNode *inner = (InnerNode *)this;
+    BVHNode *child0 = inner->children[0];
+    BVHNode *child1 = inner->children[1];
+    child0->update_time();
+    child1->update_time();
+    time_from = min(child0->time_from, child1->time_from);
+    time_to = max(child0->time_to, child1->time_to);
+  }
+}
+
+namespace {
+
+struct DumpTraversalContext {
+  /* Descriptor of wile where writing is happening. */
+  FILE *stream;
+  /* Unique identifier of the node current. */
+  int id;
+};
+
+void dump_subtree(DumpTraversalContext *context, const BVHNode *node, const BVHNode *parent = NULL)
+{
+  if (node->is_leaf()) {
+    fprintf(context->stream,
+            "  node_%p [label=\"%d\",fillcolor=\"#ccccee\",style=filled]\n",
+            node,
+            context->id);
+  }
+  else {
+    fprintf(context->stream,
+            "  node_%p [label=\"%d\",fillcolor=\"#cceecc\",style=filled]\n",
+            node,
+            context->id);
+  }
+  if (parent != NULL) {
+    fprintf(context->stream, "  node_%p -> node_%p;\n", parent, node);
+  }
+  context->id += 1;
+  for (int i = 0; i < node->num_children(); ++i) {
+    dump_subtree(context, node->get_child(i), node);
+  }
+}
+
+}  // namespace
+
+void BVHNode::dump_graph(const char *filename)
+{
+  DumpTraversalContext context;
+  context.stream = fopen(filename, "w");
+  if (context.stream == NULL) {
+    return;
+  }
+  context.id = 0;
+  fprintf(context.stream, "digraph BVH {\n");
+  dump_subtree(&context, this);
+  fprintf(context.stream, "}\n");
+  fclose(context.stream);
+}
+
+/* Inner Node */
+
+void InnerNode::print(int depth) const
+{
+  for (int i = 0; i < depth; i++)
+    printf("  ");
+
+  printf("inner node %p\n", (void *)this);
+
+  if (children[0])
+    children[0]->print(depth + 1);
+  if (children[1])
+    children[1]->print(depth + 1);
+}
+
+void LeafNode::print(int depth) const
+{
+  for (int i = 0; i < depth; i++)
+    printf("  ");
+
+  printf("leaf node %d to %d\n", lo, hi);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/node.h b/intern/cycles/bvh/node.h
new file mode 100644
index 00000000000..d5de9e062fc
--- /dev/null
+++ b/intern/cycles/bvh/node.h
@@ -0,0 +1,255 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_NODE_H__
+#define __BVH_NODE_H__
+
+#include "util/boundbox.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+enum BVH_STAT {
+  BVH_STAT_NODE_COUNT,
+  BVH_STAT_INNER_COUNT,
+  BVH_STAT_LEAF_COUNT,
+  BVH_STAT_TRIANGLE_COUNT,
+  BVH_STAT_CHILDNODE_COUNT,
+  BVH_STAT_ALIGNED_COUNT,
+  BVH_STAT_UNALIGNED_COUNT,
+  BVH_STAT_ALIGNED_INNER_COUNT,
+  BVH_STAT_UNALIGNED_INNER_COUNT,
+  BVH_STAT_ALIGNED_LEAF_COUNT,
+  BVH_STAT_UNALIGNED_LEAF_COUNT,
+  BVH_STAT_DEPTH,
+};
+
+class BVHParams;
+
+class BVHNode {
+ public:
+  virtual ~BVHNode()
+  {
+    delete aligned_space;
+  }
+
+  virtual bool is_leaf() const = 0;
+  virtual int num_children() const = 0;
+  virtual BVHNode *get_child(int i) const = 0;
+  virtual int num_triangles() const
+  {
+    return 0;
+  }
+  virtual void print(int depth = 0) const = 0;
+
+  inline void set_aligned_space(const Transform &aligned_space)
+  {
+    is_unaligned = true;
+    if (this->aligned_space == NULL) {
+      this->aligned_space = new Transform(aligned_space);
+    }
+    else {
+      *this->aligned_space = aligned_space;
+    }
+  }
+
+  inline Transform get_aligned_space() const
+  {
+    if (aligned_space == NULL) {
+      return transform_identity();
+    }
+    return *aligned_space;
+  }
+
+  inline bool has_unaligned() const
+  {
+    if (is_leaf()) {
+      return false;
+    }
+    for (int i = 0; i < num_children(); ++i) {
+      if (get_child(i)->is_unaligned) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Subtree functions
+  int getSubtreeSize(BVH_STAT stat = BVH_STAT_NODE_COUNT) const;
+  float computeSubtreeSAHCost(const BVHParams &p, float probability = 1.0f) const;
+  void deleteSubtree();
+
+  uint update_visibility();
+  void update_time();
+
+  /* Dump the content of the tree as a graphviz file. */
+  void dump_graph(const char *filename);
+
+  // Properties.
+  BoundBox bounds;
+  uint visibility;
+
+  bool is_unaligned;
+
+  /* TODO(sergey): Can be stored as 3x3 matrix, but better to have some
+   * utilities and type defines in util_transform first.
+   */
+  Transform *aligned_space;
+
+  float time_from, time_to;
+
+ protected:
+  explicit BVHNode(const BoundBox &bounds)
+      : bounds(bounds),
+        visibility(0),
+        is_unaligned(false),
+        aligned_space(NULL),
+        time_from(0.0f),
+        time_to(1.0f)
+  {
+  }
+
+  explicit BVHNode(const BVHNode &other)
+      : bounds(other.bounds),
+        visibility(other.visibility),
+        is_unaligned(other.is_unaligned),
+        aligned_space(NULL),
+        time_from(other.time_from),
+        time_to(other.time_to)
+  {
+    if (other.aligned_space != NULL) {
+      assert(other.is_unaligned);
+      aligned_space = new Transform();
+      *aligned_space = *other.aligned_space;
+    }
+    else {
+      assert(!other.is_unaligned);
+    }
+  }
+};
+
+class InnerNode : public BVHNode {
+ public:
+  static constexpr int kNumMaxChildren = 8;
+
+  InnerNode(const BoundBox &bounds, BVHNode *child0, BVHNode *child1)
+      : BVHNode(bounds), num_children_(2)
+  {
+    children[0] = child0;
+    children[1] = child1;
+    reset_unused_children();
+
+    if (child0 && child1) {
+      visibility = child0->visibility | child1->visibility;
+    }
+    else {
+      /* Happens on build cancel. */
+      visibility = 0;
+    }
+  }
+
+  InnerNode(const BoundBox &bounds, BVHNode **children, const int num_children)
+      : BVHNode(bounds), num_children_(num_children)
+  {
+    visibility = 0;
+    time_from = FLT_MAX;
+    time_to = -FLT_MAX;
+    for (int i = 0; i < num_children; ++i) {
+      assert(children[i] != NULL);
+      visibility |= children[i]->visibility;
+      this->children[i] = children[i];
+      time_from = min(time_from, children[i]->time_from);
+      time_to = max(time_to, children[i]->time_to);
+    }
+    reset_unused_children();
+  }
+
+  /* NOTE: This function is only used during binary BVH builder, and it
+   * supposed to be configured to have 2 children which will be filled-in in a
+   * bit. But this is important to have children reset to NULL. */
+  explicit InnerNode(const BoundBox &bounds) : BVHNode(bounds), num_children_(0)
+  {
+    reset_unused_children();
+    visibility = 0;
+    num_children_ = 2;
+  }
+
+  bool is_leaf() const
+  {
+    return false;
+  }
+  int num_children() const
+  {
+    return num_children_;
+  }
+  BVHNode *get_child(int i) const
+  {
+    assert(i >= 0 && i < num_children_);
+    return children[i];
+  }
+  void print(int depth) const;
+
+  int num_children_;
+  BVHNode *children[kNumMaxChildren];
+
+ protected:
+  void reset_unused_children()
+  {
+    for (int i = num_children_; i < kNumMaxChildren; ++i) {
+      children[i] = NULL;
+    }
+  }
+};
+
+class LeafNode : public BVHNode {
+ public:
+  LeafNode(const BoundBox &bounds, uint visibility, int lo, int hi)
+      : BVHNode(bounds), lo(lo), hi(hi)
+  {
+    this->bounds = bounds;
+    this->visibility = visibility;
+  }
+
+  LeafNode(const LeafNode &other) : BVHNode(other), lo(other.lo), hi(other.hi)
+  {
+  }
+
+  bool is_leaf() const
+  {
+    return true;
+  }
+  int num_children() const
+  {
+    return 0;
+  }
+  BVHNode *get_child(int) const
+  {
+    return NULL;
+  }
+  int num_triangles() const
+  {
+    return hi - lo;
+  }
+  void print(int depth) const;
+
+  int lo;
+  int hi;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_NODE_H__ */
diff --git a/intern/cycles/bvh/optix.cpp b/intern/cycles/bvh/optix.cpp
new file mode 100644
index 00000000000..ebc3fa68b97
--- /dev/null
+++ b/intern/cycles/bvh/optix.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPTIX
+
+#  include "device/device.h"
+
+#  include "bvh/optix.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVHOptiX::BVHOptiX(const BVHParams &params_,
+                   const vector<Geometry *> &geometry_,
+                   const vector<Object *> &objects_,
+                   Device *device)
+    : BVH(params_, geometry_, objects_),
+      device(device),
+      traversable_handle(0),
+      as_data(device, params_.top_level ? "optix tlas" : "optix blas", false),
+      motion_transform_data(device, "optix motion transform", false)
+{
+}
+
+BVHOptiX::~BVHOptiX()
+{
+  // Acceleration structure memory is delayed freed on device, since deleting the
+  // BVH may happen while still being used for rendering.
+  device->release_optix_bvh(this);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_OPTIX */
diff --git a/intern/cycles/bvh/optix.h b/intern/cycles/bvh/optix.h
new file mode 100644
index 00000000000..037e54980bd
--- /dev/null
+++ b/intern/cycles/bvh/optix.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2019, NVIDIA Corporation.
+ * Copyright 2019, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_OPTIX_H__
+#define __BVH_OPTIX_H__
+
+#ifdef WITH_OPTIX
+
+#  include "bvh/bvh.h"
+#  include "bvh/params.h"
+
+#  include "device/memory.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHOptiX : public BVH {
+ public:
+  Device *device;
+  uint64_t traversable_handle;
+  device_only_memory<char> as_data;
+  device_only_memory<char> motion_transform_data;
+
+ protected:
+  friend class BVH;
+  BVHOptiX(const BVHParams &params,
+           const vector<Geometry *> &geometry,
+           const vector<Object *> &objects,
+           Device *device);
+  virtual ~BVHOptiX();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* WITH_OPTIX */
+
+#endif /* __BVH_OPTIX_H__ */
diff --git a/intern/cycles/bvh/params.h b/intern/cycles/bvh/params.h
new file mode 100644
index 00000000000..8f185a2640f
--- /dev/null
+++ b/intern/cycles/bvh/params.h
@@ -0,0 +1,335 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_PARAMS_H__
+#define __BVH_PARAMS_H__
+
+#include "util/boundbox.h"
+
+#include "kernel/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Layout of BVH tree.
+ *
+ * For example, how wide BVH tree is, in terms of number of children
+ * per node.
+ */
+typedef KernelBVHLayout BVHLayout;
+
+/* Type of BVH, in terms whether it is supported dynamic updates of meshes
+ * or whether modifying geometry requires full BVH rebuild.
+ */
+enum BVHType {
+  /* BVH supports dynamic updates of geometry.
+   *
+   * Faster for updating BVH tree when doing modifications in viewport,
+   * but slower for rendering.
+   */
+  BVH_TYPE_DYNAMIC = 0,
+  /* BVH tree is calculated for specific scene, updates in geometry
+   * requires full tree rebuild.
+   *
+   * Slower to update BVH tree when modifying objects in viewport, also
+   * slower to build final BVH tree but gives best possible render speed.
+   */
+  BVH_TYPE_STATIC = 1,
+
+  BVH_NUM_TYPES,
+};
+
+/* Names bitflag type to denote which BVH layouts are supported by
+ * particular area.
+ *
+ * Bitflags are the BVH_LAYOUT_* values.
+ */
+typedef int BVHLayoutMask;
+
+/* Get human readable name of BVH layout. */
+const char *bvh_layout_name(BVHLayout layout);
+
+/* BVH Parameters */
+
+class BVHParams {
+ public:
+  /* spatial split area threshold */
+  bool use_spatial_split;
+  float spatial_split_alpha;
+
+  /* Unaligned nodes creation threshold */
+  float unaligned_split_threshold;
+
+  /* SAH costs */
+  float sah_node_cost;
+  float sah_primitive_cost;
+
+  /* number of primitives in leaf */
+  int min_leaf_size;
+  int max_triangle_leaf_size;
+  int max_motion_triangle_leaf_size;
+  int max_curve_leaf_size;
+  int max_motion_curve_leaf_size;
+
+  /* object or mesh level bvh */
+  bool top_level;
+
+  /* BVH layout to be built. */
+  BVHLayout bvh_layout;
+
+  /* Use unaligned bounding boxes.
+   * Only used for curves BVH.
+   */
+  bool use_unaligned_nodes;
+
+  /* Split time range to this number of steps and create leaf node for each
+   * of this time steps.
+   *
+   * Speeds up rendering of motion curve primitives in the cost of higher
+   * memory usage.
+   */
+  int num_motion_curve_steps;
+
+  /* Same as above, but for triangle primitives. */
+  int num_motion_triangle_steps;
+
+  /* Same as in SceneParams. */
+  int bvh_type;
+
+  /* These are needed for Embree. */
+  int curve_subdivisions;
+
+  /* fixed parameters */
+  enum { MAX_DEPTH = 64, MAX_SPATIAL_DEPTH = 48, NUM_SPATIAL_BINS = 32 };
+
+  BVHParams()
+  {
+    use_spatial_split = true;
+    spatial_split_alpha = 1e-5f;
+
+    unaligned_split_threshold = 0.7f;
+
+    /* todo: see if splitting up primitive cost to be separate for triangles
+     * and curves can help. so far in tests it doesn't help, but why? */
+    sah_node_cost = 1.0f;
+    sah_primitive_cost = 1.0f;
+
+    min_leaf_size = 1;
+    max_triangle_leaf_size = 8;
+    max_motion_triangle_leaf_size = 8;
+    max_curve_leaf_size = 1;
+    max_motion_curve_leaf_size = 4;
+
+    top_level = false;
+    bvh_layout = BVH_LAYOUT_BVH2;
+    use_unaligned_nodes = false;
+
+    num_motion_curve_steps = 0;
+    num_motion_triangle_steps = 0;
+
+    bvh_type = 0;
+
+    curve_subdivisions = 4;
+  }
+
+  /* SAH costs */
+  __forceinline float cost(int num_nodes, int num_primitives) const
+  {
+    return node_cost(num_nodes) + primitive_cost(num_primitives);
+  }
+
+  __forceinline float primitive_cost(int n) const
+  {
+    return n * sah_primitive_cost;
+  }
+
+  __forceinline float node_cost(int n) const
+  {
+    return n * sah_node_cost;
+  }
+
+  __forceinline bool small_enough_for_leaf(int size, int level)
+  {
+    return (size <= min_leaf_size || level >= MAX_DEPTH);
+  }
+
+  /* Gets best matching BVH.
+   *
+   * If the requested layout is supported by the device, it will be used.
+   * Otherwise, widest supported layout below that will be used.
+   */
+  static BVHLayout best_bvh_layout(BVHLayout requested_layout, BVHLayoutMask supported_layouts);
+};
+
+/* BVH Reference
+ *
+ * Reference to a primitive. Primitive index and object are sneakily packed
+ * into BoundBox to reduce memory usage and align nicely */
+
+class BVHReference {
+ public:
+  __forceinline BVHReference()
+  {
+  }
+
+  __forceinline BVHReference(const BoundBox &bounds_,
+                             int prim_index_,
+                             int prim_object_,
+                             int prim_type,
+                             float time_from = 0.0f,
+                             float time_to = 1.0f)
+      : rbounds(bounds_), time_from_(time_from), time_to_(time_to)
+  {
+    rbounds.min.w = __int_as_float(prim_index_);
+    rbounds.max.w = __int_as_float(prim_object_);
+    type = prim_type;
+  }
+
+  __forceinline const BoundBox &bounds() const
+  {
+    return rbounds;
+  }
+  __forceinline int prim_index() const
+  {
+    return __float_as_int(rbounds.min.w);
+  }
+  __forceinline int prim_object() const
+  {
+    return __float_as_int(rbounds.max.w);
+  }
+  __forceinline int prim_type() const
+  {
+    return type;
+  }
+  __forceinline float time_from() const
+  {
+    return time_from_;
+  }
+  __forceinline float time_to() const
+  {
+    return time_to_;
+  }
+
+  BVHReference &operator=(const BVHReference &arg)
+  {
+    if (&arg != this) {
+      /* TODO(sergey): Check if it is still faster to memcpy() with
+       * modern compilers.
+       */
+      memcpy((void *)this, &arg, sizeof(BVHReference));
+    }
+    return *this;
+  }
+
+ protected:
+  BoundBox rbounds;
+  uint type;
+  float time_from_, time_to_;
+};
+
+/* BVH Range
+ *
+ * Build range used during construction, to indicate the bounds and place in
+ * the reference array of a subset of primitives Again uses trickery to pack
+ * integers into BoundBox for alignment purposes. */
+
+class BVHRange {
+ public:
+  __forceinline BVHRange()
+  {
+    rbounds.min.w = __int_as_float(0);
+    rbounds.max.w = __int_as_float(0);
+  }
+
+  __forceinline BVHRange(const BoundBox &bounds_, int start_, int size_) : rbounds(bounds_)
+  {
+    rbounds.min.w = __int_as_float(start_);
+    rbounds.max.w = __int_as_float(size_);
+  }
+
+  __forceinline BVHRange(const BoundBox &bounds_, const BoundBox &cbounds_, int start_, int size_)
+      : rbounds(bounds_), cbounds(cbounds_)
+  {
+    rbounds.min.w = __int_as_float(start_);
+    rbounds.max.w = __int_as_float(size_);
+  }
+
+  __forceinline void set_start(int start_)
+  {
+    rbounds.min.w = __int_as_float(start_);
+  }
+
+  __forceinline const BoundBox &bounds() const
+  {
+    return rbounds;
+  }
+  __forceinline const BoundBox &cent_bounds() const
+  {
+    return cbounds;
+  }
+  __forceinline int start() const
+  {
+    return __float_as_int(rbounds.min.w);
+  }
+  __forceinline int size() const
+  {
+    return __float_as_int(rbounds.max.w);
+  }
+  __forceinline int end() const
+  {
+    return start() + size();
+  }
+
+ protected:
+  BoundBox rbounds;
+  BoundBox cbounds;
+};
+
+/* BVH Spatial Bin */
+
+struct BVHSpatialBin {
+  BoundBox bounds;
+  int enter;
+  int exit;
+
+  __forceinline BVHSpatialBin()
+  {
+  }
+};
+
+/* BVH Spatial Storage
+ *
+ * The idea of this storage is have thread-specific storage for the spatial
+ * splitters. We can pre-allocate this storage in advance and avoid heavy memory
+ * operations during split process.
+ */
+
+struct BVHSpatialStorage {
+  /* Accumulated bounds when sweeping from right to left. */
+  vector<BoundBox> right_bounds;
+
+  /* Bins used for histogram when selecting best split plane. */
+  BVHSpatialBin bins[3][BVHParams::NUM_SPATIAL_BINS];
+
+  /* Temporary storage for the new references. Used by spatial split to store
+   * new references in before they're getting inserted into actual array,
+   */
+  vector<BVHReference> new_references;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_PARAMS_H__ */
diff --git a/intern/cycles/bvh/sort.cpp b/intern/cycles/bvh/sort.cpp
new file mode 100644
index 00000000000..a9975ce6bb2
--- /dev/null
+++ b/intern/cycles/bvh/sort.cpp
@@ -0,0 +1,187 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/sort.h"
+
+#include "bvh/build.h"
+
+#include "util/algorithm.h"
+#include "util/task.h"
+
+CCL_NAMESPACE_BEGIN
+
+static const int BVH_SORT_THRESHOLD = 4096;
+
+struct BVHReferenceCompare {
+ public:
+  int dim;
+  const BVHUnaligned *unaligned_heuristic;
+  const Transform *aligned_space;
+
+  BVHReferenceCompare(int dim,
+                      const BVHUnaligned *unaligned_heuristic,
+                      const Transform *aligned_space)
+      : dim(dim), unaligned_heuristic(unaligned_heuristic), aligned_space(aligned_space)
+  {
+  }
+
+  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
+  {
+    return (aligned_space != NULL) ?
+               unaligned_heuristic->compute_aligned_prim_boundbox(prim, *aligned_space) :
+               prim.bounds();
+  }
+
+  /* Compare two references.
+   *
+   * Returns value is similar to return value of strcmp().
+   */
+  __forceinline int compare(const BVHReference &ra, const BVHReference &rb) const
+  {
+    BoundBox ra_bounds = get_prim_bounds(ra), rb_bounds = get_prim_bounds(rb);
+    float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
+    float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
+
+    if (ca < cb)
+      return -1;
+    else if (ca > cb)
+      return 1;
+    else if (ra.prim_object() < rb.prim_object())
+      return -1;
+    else if (ra.prim_object() > rb.prim_object())
+      return 1;
+    else if (ra.prim_index() < rb.prim_index())
+      return -1;
+    else if (ra.prim_index() > rb.prim_index())
+      return 1;
+    else if (ra.prim_type() < rb.prim_type())
+      return -1;
+    else if (ra.prim_type() > rb.prim_type())
+      return 1;
+
+    return 0;
+  }
+
+  bool operator()(const BVHReference &ra, const BVHReference &rb)
+  {
+    return (compare(ra, rb) < 0);
+  }
+};
+
+static void bvh_reference_sort_threaded(TaskPool *task_pool,
+                                        BVHReference *data,
+                                        const int job_start,
+                                        const int job_end,
+                                        const BVHReferenceCompare &compare);
+
+/* Multi-threaded reference sort. */
+static void bvh_reference_sort_threaded(TaskPool *task_pool,
+                                        BVHReference *data,
+                                        const int job_start,
+                                        const int job_end,
+                                        const BVHReferenceCompare &compare)
+{
+  int start = job_start, end = job_end;
+  bool have_work = (start < end);
+  while (have_work) {
+    const int count = job_end - job_start;
+    if (count < BVH_SORT_THRESHOLD) {
+      /* Number of reference low enough, faster to finish the job
+       * in one thread rather than to spawn more threads.
+       */
+      sort(data + job_start, data + job_end + 1, compare);
+      break;
+    }
+    /* Single QSort step.
+     * Use median-of-three method for the pivot point.
+     */
+    int left = start, right = end;
+    int center = (left + right) >> 1;
+    if (compare.compare(data[left], data[center]) > 0) {
+      swap(data[left], data[center]);
+    }
+    if (compare.compare(data[left], data[right]) > 0) {
+      swap(data[left], data[right]);
+    }
+    if (compare.compare(data[center], data[right]) > 0) {
+      swap(data[center], data[right]);
+    }
+    swap(data[center], data[right - 1]);
+    BVHReference median = data[right - 1];
+    do {
+      while (compare.compare(data[left], median) < 0) {
+        ++left;
+      }
+      while (compare.compare(data[right], median) > 0) {
+        --right;
+      }
+      if (left <= right) {
+        swap(data[left], data[right]);
+        ++left;
+        --right;
+      }
+    } while (left <= right);
+    /* We only create one new task here to reduce downside effects of
+     * latency in TaskScheduler.
+     * So generally current thread keeps working on the left part of the
+     * array, and we create new task for the right side.
+     * However, if there's nothing to be done in the left side of the array
+     * we don't create any tasks and make it so current thread works on the
+     * right side.
+     */
+    have_work = false;
+    if (left < end) {
+      if (start < right) {
+        task_pool->push(
+            function_bind(bvh_reference_sort_threaded, task_pool, data, left, end, compare));
+      }
+      else {
+        start = left;
+        have_work = true;
+      }
+    }
+    if (start < right) {
+      end = right;
+      have_work = true;
+    }
+  }
+}
+
+void bvh_reference_sort(int start,
+                        int end,
+                        BVHReference *data,
+                        int dim,
+                        const BVHUnaligned *unaligned_heuristic,
+                        const Transform *aligned_space)
+{
+  const int count = end - start;
+  BVHReferenceCompare compare(dim, unaligned_heuristic, aligned_space);
+  if (count < BVH_SORT_THRESHOLD) {
+    /* It is important to not use any mutex if array is small enough,
+     * otherwise we end up in situation when we're going to sleep far
+     * too often.
+     */
+    sort(data + start, data + end, compare);
+  }
+  else {
+    TaskPool task_pool;
+    bvh_reference_sort_threaded(&task_pool, data, start, end - 1, compare);
+    task_pool.wait_work();
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/sort.h b/intern/cycles/bvh/sort.h
new file mode 100644
index 00000000000..936401d8607
--- /dev/null
+++ b/intern/cycles/bvh/sort.h
@@ -0,0 +1,38 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_SORT_H__
+#define __BVH_SORT_H__
+
+#include <cstddef>
+
+CCL_NAMESPACE_BEGIN
+
+class BVHReference;
+class BVHUnaligned;
+struct Transform;
+
+void bvh_reference_sort(int start,
+                        int end,
+                        BVHReference *data,
+                        int dim,
+                        const BVHUnaligned *unaligned_heuristic = NULL,
+                        const Transform *aligned_space = NULL);
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_SORT_H__ */
diff --git a/intern/cycles/bvh/split.cpp b/intern/cycles/bvh/split.cpp
new file mode 100644
index 00000000000..102c50e2979
--- /dev/null
+++ b/intern/cycles/bvh/split.cpp
@@ -0,0 +1,518 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/split.h"
+
+#include "bvh/build.h"
+#include "bvh/sort.h"
+
+#include "scene/hair.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+
+#include "util/algorithm.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Object Split */
+
+BVHObjectSplit::BVHObjectSplit(BVHBuild *builder,
+                               BVHSpatialStorage *storage,
+                               const BVHRange &range,
+                               vector<BVHReference> &references,
+                               float nodeSAH,
+                               const BVHUnaligned *unaligned_heuristic,
+                               const Transform *aligned_space)
+    : sah(FLT_MAX),
+      dim(0),
+      num_left(0),
+      left_bounds(BoundBox::empty),
+      right_bounds(BoundBox::empty),
+      storage_(storage),
+      references_(&references),
+      unaligned_heuristic_(unaligned_heuristic),
+      aligned_space_(aligned_space)
+{
+  const BVHReference *ref_ptr = &references_->at(range.start());
+  float min_sah = FLT_MAX;
+
+  storage_->right_bounds.resize(range.size());
+
+  for (int dim = 0; dim < 3; dim++) {
+    /* Sort references. */
+    bvh_reference_sort(range.start(),
+                       range.end(),
+                       &references_->at(0),
+                       dim,
+                       unaligned_heuristic_,
+                       aligned_space_);
+
+    /* sweep right to left and determine bounds. */
+    BoundBox right_bounds = BoundBox::empty;
+    for (int i = range.size() - 1; i > 0; i--) {
+      BoundBox prim_bounds = get_prim_bounds(ref_ptr[i]);
+      right_bounds.grow(prim_bounds);
+      storage_->right_bounds[i - 1] = right_bounds;
+    }
+
+    /* sweep left to right and select lowest SAH. */
+    BoundBox left_bounds = BoundBox::empty;
+
+    for (int i = 1; i < range.size(); i++) {
+      BoundBox prim_bounds = get_prim_bounds(ref_ptr[i - 1]);
+      left_bounds.grow(prim_bounds);
+      right_bounds = storage_->right_bounds[i - 1];
+
+      float sah = nodeSAH + left_bounds.safe_area() * builder->params.primitive_cost(i) +
+                  right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
+
+      if (sah < min_sah) {
+        min_sah = sah;
+
+        this->sah = sah;
+        this->dim = dim;
+        this->num_left = i;
+        this->left_bounds = left_bounds;
+        this->right_bounds = right_bounds;
+      }
+    }
+  }
+}
+
+void BVHObjectSplit::split(BVHRange &left, BVHRange &right, const BVHRange &range)
+{
+  assert(references_->size() > 0);
+  /* sort references according to split */
+  bvh_reference_sort(range.start(),
+                     range.end(),
+                     &references_->at(0),
+                     this->dim,
+                     unaligned_heuristic_,
+                     aligned_space_);
+
+  BoundBox effective_left_bounds, effective_right_bounds;
+  const int num_right = range.size() - this->num_left;
+  if (aligned_space_ == NULL) {
+    effective_left_bounds = left_bounds;
+    effective_right_bounds = right_bounds;
+  }
+  else {
+    effective_left_bounds = BoundBox::empty;
+    effective_right_bounds = BoundBox::empty;
+    for (int i = 0; i < this->num_left; ++i) {
+      BoundBox prim_boundbox = references_->at(range.start() + i).bounds();
+      effective_left_bounds.grow(prim_boundbox);
+    }
+    for (int i = 0; i < num_right; ++i) {
+      BoundBox prim_boundbox = references_->at(range.start() + this->num_left + i).bounds();
+      effective_right_bounds.grow(prim_boundbox);
+    }
+  }
+
+  /* split node ranges */
+  left = BVHRange(effective_left_bounds, range.start(), this->num_left);
+  right = BVHRange(effective_right_bounds, left.end(), num_right);
+}
+
+/* Spatial Split */
+
+BVHSpatialSplit::BVHSpatialSplit(const BVHBuild &builder,
+                                 BVHSpatialStorage *storage,
+                                 const BVHRange &range,
+                                 vector<BVHReference> &references,
+                                 float nodeSAH,
+                                 const BVHUnaligned *unaligned_heuristic,
+                                 const Transform *aligned_space)
+    : sah(FLT_MAX),
+      dim(0),
+      pos(0.0f),
+      storage_(storage),
+      references_(&references),
+      unaligned_heuristic_(unaligned_heuristic),
+      aligned_space_(aligned_space)
+{
+  /* initialize bins. */
+  BoundBox range_bounds;
+  if (aligned_space == NULL) {
+    range_bounds = range.bounds();
+  }
+  else {
+    range_bounds = unaligned_heuristic->compute_aligned_boundbox(
+        range, &references_->at(0), *aligned_space);
+  }
+
+  float3 origin = range_bounds.min;
+  float3 binSize = (range_bounds.max - origin) * (1.0f / (float)BVHParams::NUM_SPATIAL_BINS);
+  float3 invBinSize = 1.0f / binSize;
+
+  for (int dim = 0; dim < 3; dim++) {
+    for (int i = 0; i < BVHParams::NUM_SPATIAL_BINS; i++) {
+      BVHSpatialBin &bin = storage_->bins[dim][i];
+
+      bin.bounds = BoundBox::empty;
+      bin.enter = 0;
+      bin.exit = 0;
+    }
+  }
+
+  /* chop references into bins. */
+  for (unsigned int refIdx = range.start(); refIdx < range.end(); refIdx++) {
+    const BVHReference &ref = references_->at(refIdx);
+    BoundBox prim_bounds = get_prim_bounds(ref);
+    float3 firstBinf = (prim_bounds.min - origin) * invBinSize;
+    float3 lastBinf = (prim_bounds.max - origin) * invBinSize;
+    int3 firstBin = make_int3((int)firstBinf.x, (int)firstBinf.y, (int)firstBinf.z);
+    int3 lastBin = make_int3((int)lastBinf.x, (int)lastBinf.y, (int)lastBinf.z);
+
+    firstBin = clamp(firstBin, 0, BVHParams::NUM_SPATIAL_BINS - 1);
+    lastBin = clamp(lastBin, firstBin, BVHParams::NUM_SPATIAL_BINS - 1);
+
+    for (int dim = 0; dim < 3; dim++) {
+      BVHReference currRef(
+          get_prim_bounds(ref), ref.prim_index(), ref.prim_object(), ref.prim_type());
+
+      for (int i = firstBin[dim]; i < lastBin[dim]; i++) {
+        BVHReference leftRef, rightRef;
+
+        split_reference(
+            builder, leftRef, rightRef, currRef, dim, origin[dim] + binSize[dim] * (float)(i + 1));
+        storage_->bins[dim][i].bounds.grow(leftRef.bounds());
+        currRef = rightRef;
+      }
+
+      storage_->bins[dim][lastBin[dim]].bounds.grow(currRef.bounds());
+      storage_->bins[dim][firstBin[dim]].enter++;
+      storage_->bins[dim][lastBin[dim]].exit++;
+    }
+  }
+
+  /* select best split plane. */
+  storage_->right_bounds.resize(BVHParams::NUM_SPATIAL_BINS);
+  for (int dim = 0; dim < 3; dim++) {
+    /* sweep right to left and determine bounds. */
+    BoundBox right_bounds = BoundBox::empty;
+    for (int i = BVHParams::NUM_SPATIAL_BINS - 1; i > 0; i--) {
+      right_bounds.grow(storage_->bins[dim][i].bounds);
+      storage_->right_bounds[i - 1] = right_bounds;
+    }
+
+    /* sweep left to right and select lowest SAH. */
+    BoundBox left_bounds = BoundBox::empty;
+    int leftNum = 0;
+    int rightNum = range.size();
+
+    for (int i = 1; i < BVHParams::NUM_SPATIAL_BINS; i++) {
+      left_bounds.grow(storage_->bins[dim][i - 1].bounds);
+      leftNum += storage_->bins[dim][i - 1].enter;
+      rightNum -= storage_->bins[dim][i - 1].exit;
+
+      float sah = nodeSAH + left_bounds.safe_area() * builder.params.primitive_cost(leftNum) +
+                  storage_->right_bounds[i - 1].safe_area() *
+                      builder.params.primitive_cost(rightNum);
+
+      if (sah < this->sah) {
+        this->sah = sah;
+        this->dim = dim;
+        this->pos = origin[dim] + binSize[dim] * (float)i;
+      }
+    }
+  }
+}
+
+void BVHSpatialSplit::split(BVHBuild *builder,
+                            BVHRange &left,
+                            BVHRange &right,
+                            const BVHRange &range)
+{
+  /* Categorize references and compute bounds.
+   *
+   * Left-hand side:          [left_start, left_end[
+   * Uncategorized/split:     [left_end, right_start[
+   * Right-hand side:         [right_start, refs.size()[ */
+
+  vector<BVHReference> &refs = *references_;
+  int left_start = range.start();
+  int left_end = left_start;
+  int right_start = range.end();
+  int right_end = range.end();
+  BoundBox left_bounds = BoundBox::empty;
+  BoundBox right_bounds = BoundBox::empty;
+
+  for (int i = left_end; i < right_start; i++) {
+    BoundBox prim_bounds = get_prim_bounds(refs[i]);
+    if (prim_bounds.max[this->dim] <= this->pos) {
+      /* entirely on the left-hand side */
+      left_bounds.grow(prim_bounds);
+      swap(refs[i], refs[left_end++]);
+    }
+    else if (prim_bounds.min[this->dim] >= this->pos) {
+      /* entirely on the right-hand side */
+      right_bounds.grow(prim_bounds);
+      swap(refs[i--], refs[--right_start]);
+    }
+  }
+
+  /* Duplicate or unsplit references intersecting both sides.
+   *
+   * Duplication happens into a temporary pre-allocated vector in order to
+   * reduce number of memmove() calls happening in vector.insert().
+   */
+  vector<BVHReference> &new_refs = storage_->new_references;
+  new_refs.clear();
+  new_refs.reserve(right_start - left_end);
+  while (left_end < right_start) {
+    /* split reference. */
+    BVHReference curr_ref(get_prim_bounds(refs[left_end]),
+                          refs[left_end].prim_index(),
+                          refs[left_end].prim_object(),
+                          refs[left_end].prim_type());
+    BVHReference lref, rref;
+    split_reference(*builder, lref, rref, curr_ref, this->dim, this->pos);
+
+    /* compute SAH for duplicate/unsplit candidates. */
+    BoundBox lub = left_bounds;   // Unsplit to left:     new left-hand bounds.
+    BoundBox rub = right_bounds;  // Unsplit to right:    new right-hand bounds.
+    BoundBox ldb = left_bounds;   // Duplicate:           new left-hand bounds.
+    BoundBox rdb = right_bounds;  // Duplicate:           new right-hand bounds.
+
+    lub.grow(curr_ref.bounds());
+    rub.grow(curr_ref.bounds());
+    ldb.grow(lref.bounds());
+    rdb.grow(rref.bounds());
+
+    float lac = builder->params.primitive_cost(left_end - left_start);
+    float rac = builder->params.primitive_cost(right_end - right_start);
+    float lbc = builder->params.primitive_cost(left_end - left_start + 1);
+    float rbc = builder->params.primitive_cost(right_end - right_start + 1);
+
+    float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
+    float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
+    float duplicateSAH = ldb.safe_area() * lbc + rdb.safe_area() * rbc;
+    float minSAH = min(min(unsplitLeftSAH, unsplitRightSAH), duplicateSAH);
+
+    if (minSAH == unsplitLeftSAH) {
+      /* unsplit to left */
+      left_bounds = lub;
+      left_end++;
+    }
+    else if (minSAH == unsplitRightSAH) {
+      /* unsplit to right */
+      right_bounds = rub;
+      swap(refs[left_end], refs[--right_start]);
+    }
+    else {
+      /* duplicate */
+      left_bounds = ldb;
+      right_bounds = rdb;
+      refs[left_end++] = lref;
+      new_refs.push_back(rref);
+      right_end++;
+    }
+  }
+  /* Insert duplicated references into actual array in one go. */
+  if (new_refs.size() != 0) {
+    refs.insert(refs.begin() + (right_end - new_refs.size()), new_refs.begin(), new_refs.end());
+  }
+  if (aligned_space_ != NULL) {
+    left_bounds = right_bounds = BoundBox::empty;
+    for (int i = left_start; i < left_end - left_start; ++i) {
+      BoundBox prim_boundbox = references_->at(i).bounds();
+      left_bounds.grow(prim_boundbox);
+    }
+    for (int i = right_start; i < right_end - right_start; ++i) {
+      BoundBox prim_boundbox = references_->at(i).bounds();
+      right_bounds.grow(prim_boundbox);
+    }
+  }
+  left = BVHRange(left_bounds, left_start, left_end - left_start);
+  right = BVHRange(right_bounds, right_start, right_end - right_start);
+}
+
+void BVHSpatialSplit::split_triangle_primitive(const Mesh *mesh,
+                                               const Transform *tfm,
+                                               int prim_index,
+                                               int dim,
+                                               float pos,
+                                               BoundBox &left_bounds,
+                                               BoundBox &right_bounds)
+{
+  Mesh::Triangle t = mesh->get_triangle(prim_index);
+  const float3 *verts = &mesh->verts[0];
+  float3 v1 = tfm ? transform_point(tfm, verts[t.v[2]]) : verts[t.v[2]];
+  v1 = get_unaligned_point(v1);
+
+  for (int i = 0; i < 3; i++) {
+    float3 v0 = v1;
+    int vindex = t.v[i];
+    v1 = tfm ? transform_point(tfm, verts[vindex]) : verts[vindex];
+    v1 = get_unaligned_point(v1);
+    float v0p = v0[dim];
+    float v1p = v1[dim];
+
+    /* insert vertex to the boxes it belongs to. */
+    if (v0p <= pos)
+      left_bounds.grow(v0);
+
+    if (v0p >= pos)
+      right_bounds.grow(v0);
+
+    /* edge intersects the plane => insert intersection to both boxes. */
+    if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
+      float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
+      left_bounds.grow(t);
+      right_bounds.grow(t);
+    }
+  }
+}
+
+void BVHSpatialSplit::split_curve_primitive(const Hair *hair,
+                                            const Transform *tfm,
+                                            int prim_index,
+                                            int segment_index,
+                                            int dim,
+                                            float pos,
+                                            BoundBox &left_bounds,
+                                            BoundBox &right_bounds)
+{
+  /* curve split: NOTE - Currently ignores curve width and needs to be fixed. */
+  Hair::Curve curve = hair->get_curve(prim_index);
+  const int k0 = curve.first_key + segment_index;
+  const int k1 = k0 + 1;
+  float3 v0 = hair->get_curve_keys()[k0];
+  float3 v1 = hair->get_curve_keys()[k1];
+
+  if (tfm != NULL) {
+    v0 = transform_point(tfm, v0);
+    v1 = transform_point(tfm, v1);
+  }
+  v0 = get_unaligned_point(v0);
+  v1 = get_unaligned_point(v1);
+
+  float v0p = v0[dim];
+  float v1p = v1[dim];
+
+  /* insert vertex to the boxes it belongs to. */
+  if (v0p <= pos)
+    left_bounds.grow(v0);
+
+  if (v0p >= pos)
+    right_bounds.grow(v0);
+
+  if (v1p <= pos)
+    left_bounds.grow(v1);
+
+  if (v1p >= pos)
+    right_bounds.grow(v1);
+
+  /* edge intersects the plane => insert intersection to both boxes. */
+  if ((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
+    float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
+    left_bounds.grow(t);
+    right_bounds.grow(t);
+  }
+}
+
+void BVHSpatialSplit::split_triangle_reference(const BVHReference &ref,
+                                               const Mesh *mesh,
+                                               int dim,
+                                               float pos,
+                                               BoundBox &left_bounds,
+                                               BoundBox &right_bounds)
+{
+  split_triangle_primitive(mesh, NULL, ref.prim_index(), dim, pos, left_bounds, right_bounds);
+}
+
+void BVHSpatialSplit::split_curve_reference(const BVHReference &ref,
+                                            const Hair *hair,
+                                            int dim,
+                                            float pos,
+                                            BoundBox &left_bounds,
+                                            BoundBox &right_bounds)
+{
+  split_curve_primitive(hair,
+                        NULL,
+                        ref.prim_index(),
+                        PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()),
+                        dim,
+                        pos,
+                        left_bounds,
+                        right_bounds);
+}
+
+void BVHSpatialSplit::split_object_reference(
+    const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds)
+{
+  Geometry *geom = object->get_geometry();
+
+  if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
+    Mesh *mesh = static_cast<Mesh *>(geom);
+    for (int tri_idx = 0; tri_idx < mesh->num_triangles(); ++tri_idx) {
+      split_triangle_primitive(
+          mesh, &object->get_tfm(), tri_idx, dim, pos, left_bounds, right_bounds);
+    }
+  }
+  else if (geom->geometry_type == Geometry::HAIR) {
+    Hair *hair = static_cast<Hair *>(geom);
+    for (int curve_idx = 0; curve_idx < hair->num_curves(); ++curve_idx) {
+      Hair::Curve curve = hair->get_curve(curve_idx);
+      for (int segment_idx = 0; segment_idx < curve.num_keys - 1; ++segment_idx) {
+        split_curve_primitive(
+            hair, &object->get_tfm(), curve_idx, segment_idx, dim, pos, left_bounds, right_bounds);
+      }
+    }
+  }
+}
+
+void BVHSpatialSplit::split_reference(const BVHBuild &builder,
+                                      BVHReference &left,
+                                      BVHReference &right,
+                                      const BVHReference &ref,
+                                      int dim,
+                                      float pos)
+{
+  /* initialize boundboxes */
+  BoundBox left_bounds = BoundBox::empty;
+  BoundBox right_bounds = BoundBox::empty;
+
+  /* loop over vertices/edges. */
+  const Object *ob = builder.objects[ref.prim_object()];
+
+  if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
+    Mesh *mesh = static_cast<Mesh *>(ob->get_geometry());
+    split_triangle_reference(ref, mesh, dim, pos, left_bounds, right_bounds);
+  }
+  else if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+    Hair *hair = static_cast<Hair *>(ob->get_geometry());
+    split_curve_reference(ref, hair, dim, pos, left_bounds, right_bounds);
+  }
+  else {
+    split_object_reference(ob, dim, pos, left_bounds, right_bounds);
+  }
+
+  /* intersect with original bounds. */
+  left_bounds.max[dim] = pos;
+  right_bounds.min[dim] = pos;
+
+  left_bounds.intersect(ref.bounds());
+  right_bounds.intersect(ref.bounds());
+
+  /* set references */
+  left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
+  right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/split.h b/intern/cycles/bvh/split.h
new file mode 100644
index 00000000000..2650a500ea9
--- /dev/null
+++ b/intern/cycles/bvh/split.h
@@ -0,0 +1,240 @@
+/*
+ * Adapted from code copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_SPLIT_H__
+#define __BVH_SPLIT_H__
+
+#include "bvh/build.h"
+#include "bvh/params.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BVHBuild;
+class Hair;
+class Mesh;
+struct Transform;
+
+/* Object Split */
+
+class BVHObjectSplit {
+ public:
+  float sah;
+  int dim;
+  int num_left;
+  BoundBox left_bounds;
+  BoundBox right_bounds;
+
+  BVHObjectSplit()
+  {
+  }
+  BVHObjectSplit(BVHBuild *builder,
+                 BVHSpatialStorage *storage,
+                 const BVHRange &range,
+                 vector<BVHReference> &references,
+                 float nodeSAH,
+                 const BVHUnaligned *unaligned_heuristic = NULL,
+                 const Transform *aligned_space = NULL);
+
+  void split(BVHRange &left, BVHRange &right, const BVHRange &range);
+
+ protected:
+  BVHSpatialStorage *storage_;
+  vector<BVHReference> *references_;
+  const BVHUnaligned *unaligned_heuristic_;
+  const Transform *aligned_space_;
+
+  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
+  {
+    if (aligned_space_ == NULL) {
+      return prim.bounds();
+    }
+    else {
+      return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
+    }
+  }
+};
+
+/* Spatial Split */
+
+class BVHSpatialSplit {
+ public:
+  float sah;
+  int dim;
+  float pos;
+
+  BVHSpatialSplit() : sah(FLT_MAX), dim(0), pos(0.0f), storage_(NULL), references_(NULL)
+  {
+  }
+  BVHSpatialSplit(const BVHBuild &builder,
+                  BVHSpatialStorage *storage,
+                  const BVHRange &range,
+                  vector<BVHReference> &references,
+                  float nodeSAH,
+                  const BVHUnaligned *unaligned_heuristic = NULL,
+                  const Transform *aligned_space = NULL);
+
+  void split(BVHBuild *builder, BVHRange &left, BVHRange &right, const BVHRange &range);
+
+  void split_reference(const BVHBuild &builder,
+                       BVHReference &left,
+                       BVHReference &right,
+                       const BVHReference &ref,
+                       int dim,
+                       float pos);
+
+ protected:
+  BVHSpatialStorage *storage_;
+  vector<BVHReference> *references_;
+  const BVHUnaligned *unaligned_heuristic_;
+  const Transform *aligned_space_;
+
+  /* Lower-level functions which calculates boundaries of left and right nodes
+   * needed for spatial split.
+   *
+   * Operates directly with primitive specified by its index, reused by higher
+   * level splitting functions.
+   */
+  void split_triangle_primitive(const Mesh *mesh,
+                                const Transform *tfm,
+                                int prim_index,
+                                int dim,
+                                float pos,
+                                BoundBox &left_bounds,
+                                BoundBox &right_bounds);
+  void split_curve_primitive(const Hair *hair,
+                             const Transform *tfm,
+                             int prim_index,
+                             int segment_index,
+                             int dim,
+                             float pos,
+                             BoundBox &left_bounds,
+                             BoundBox &right_bounds);
+
+  /* Lower-level functions which calculates boundaries of left and right nodes
+   * needed for spatial split.
+   *
+   * Operates with BVHReference, internally uses lower level API functions.
+   */
+  void split_triangle_reference(const BVHReference &ref,
+                                const Mesh *mesh,
+                                int dim,
+                                float pos,
+                                BoundBox &left_bounds,
+                                BoundBox &right_bounds);
+  void split_curve_reference(const BVHReference &ref,
+                             const Hair *hair,
+                             int dim,
+                             float pos,
+                             BoundBox &left_bounds,
+                             BoundBox &right_bounds);
+  void split_object_reference(
+      const Object *object, int dim, float pos, BoundBox &left_bounds, BoundBox &right_bounds);
+
+  __forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
+  {
+    if (aligned_space_ == NULL) {
+      return prim.bounds();
+    }
+    else {
+      return unaligned_heuristic_->compute_aligned_prim_boundbox(prim, *aligned_space_);
+    }
+  }
+
+  __forceinline float3 get_unaligned_point(const float3 &point) const
+  {
+    if (aligned_space_ == NULL) {
+      return point;
+    }
+    else {
+      return transform_point(aligned_space_, point);
+    }
+  }
+};
+
+/* Mixed Object-Spatial Split */
+
+class BVHMixedSplit {
+ public:
+  BVHObjectSplit object;
+  BVHSpatialSplit spatial;
+
+  float leafSAH;
+  float nodeSAH;
+  float minSAH;
+
+  bool no_split;
+
+  BoundBox bounds;
+
+  BVHMixedSplit()
+  {
+  }
+
+  __forceinline BVHMixedSplit(BVHBuild *builder,
+                              BVHSpatialStorage *storage,
+                              const BVHRange &range,
+                              vector<BVHReference> &references,
+                              int level,
+                              const BVHUnaligned *unaligned_heuristic = NULL,
+                              const Transform *aligned_space = NULL)
+  {
+    if (aligned_space == NULL) {
+      bounds = range.bounds();
+    }
+    else {
+      bounds = unaligned_heuristic->compute_aligned_boundbox(
+          range, &references.at(0), *aligned_space);
+    }
+    /* find split candidates. */
+    float area = bounds.safe_area();
+
+    leafSAH = area * builder->params.primitive_cost(range.size());
+    nodeSAH = area * builder->params.node_cost(2);
+
+    object = BVHObjectSplit(
+        builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
+
+    if (builder->params.use_spatial_split && level < BVHParams::MAX_SPATIAL_DEPTH) {
+      BoundBox overlap = object.left_bounds;
+      overlap.intersect(object.right_bounds);
+
+      if (overlap.safe_area() >= builder->spatial_min_overlap) {
+        spatial = BVHSpatialSplit(
+            *builder, storage, range, references, nodeSAH, unaligned_heuristic, aligned_space);
+      }
+    }
+
+    /* leaf SAH is the lowest => create leaf. */
+    minSAH = min(min(leafSAH, object.sah), spatial.sah);
+    no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range, references));
+  }
+
+  __forceinline void split(BVHBuild *builder,
+                           BVHRange &left,
+                           BVHRange &right,
+                           const BVHRange &range)
+  {
+    if (builder->params.use_spatial_split && minSAH == spatial.sah)
+      spatial.split(builder, left, right, range);
+    if (!left.size() || !right.size())
+      object.split(left, right, range);
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_SPLIT_H__ */
diff --git a/intern/cycles/bvh/unaligned.cpp b/intern/cycles/bvh/unaligned.cpp
new file mode 100644
index 00000000000..3c4a600fe58
--- /dev/null
+++ b/intern/cycles/bvh/unaligned.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bvh/unaligned.h"
+
+#include "scene/hair.h"
+#include "scene/object.h"
+
+#include "bvh/binning.h"
+#include "bvh/params.h"
+
+#include "util/boundbox.h"
+#include "util/transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+BVHUnaligned::BVHUnaligned(const vector<Object *> &objects) : objects_(objects)
+{
+}
+
+Transform BVHUnaligned::compute_aligned_space(const BVHObjectBinning &range,
+                                              const BVHReference *references) const
+{
+  for (int i = range.start(); i < range.end(); ++i) {
+    const BVHReference &ref = references[i];
+    Transform aligned_space;
+    /* Use first primitive which defines correct direction to define
+     * the orientation space.
+     */
+    if (compute_aligned_space(ref, &aligned_space)) {
+      return aligned_space;
+    }
+  }
+  return transform_identity();
+}
+
+Transform BVHUnaligned::compute_aligned_space(const BVHRange &range,
+                                              const BVHReference *references) const
+{
+  for (int i = range.start(); i < range.end(); ++i) {
+    const BVHReference &ref = references[i];
+    Transform aligned_space;
+    /* Use first primitive which defines correct direction to define
+     * the orientation space.
+     */
+    if (compute_aligned_space(ref, &aligned_space)) {
+      return aligned_space;
+    }
+  }
+  return transform_identity();
+}
+
+bool BVHUnaligned::compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const
+{
+  const Object *object = objects_[ref.prim_object()];
+  const int packed_type = ref.prim_type();
+  const int type = (packed_type & PRIMITIVE_ALL);
+  /* No motion blur curves here, we can't fit them to aligned boxes well. */
+  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
+    const int curve_index = ref.prim_index();
+    const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
+    const Hair *hair = static_cast<const Hair *>(object->get_geometry());
+    const Hair::Curve &curve = hair->get_curve(curve_index);
+    const int key = curve.first_key + segment;
+    const float3 v1 = hair->get_curve_keys()[key], v2 = hair->get_curve_keys()[key + 1];
+    float length;
+    const float3 axis = normalize_len(v2 - v1, &length);
+    if (length > 1e-6f) {
+      *aligned_space = make_transform_frame(axis);
+      return true;
+    }
+  }
+  *aligned_space = transform_identity();
+  return false;
+}
+
+BoundBox BVHUnaligned::compute_aligned_prim_boundbox(const BVHReference &prim,
+                                                     const Transform &aligned_space) const
+{
+  BoundBox bounds = BoundBox::empty;
+  const Object *object = objects_[prim.prim_object()];
+  const int packed_type = prim.prim_type();
+  const int type = (packed_type & PRIMITIVE_ALL);
+  /* No motion blur curves here, we can't fit them to aligned boxes well. */
+  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_CURVE_THICK)) {
+    const int curve_index = prim.prim_index();
+    const int segment = PRIMITIVE_UNPACK_SEGMENT(packed_type);
+    const Hair *hair = static_cast<const Hair *>(object->get_geometry());
+    const Hair::Curve &curve = hair->get_curve(curve_index);
+    curve.bounds_grow(
+        segment, &hair->get_curve_keys()[0], &hair->get_curve_radius()[0], aligned_space, bounds);
+  }
+  else {
+    bounds = prim.bounds().transformed(&aligned_space);
+  }
+  return bounds;
+}
+
+BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHObjectBinning &range,
+                                                const BVHReference *references,
+                                                const Transform &aligned_space,
+                                                BoundBox *cent_bounds) const
+{
+  BoundBox bounds = BoundBox::empty;
+  if (cent_bounds != NULL) {
+    *cent_bounds = BoundBox::empty;
+  }
+  for (int i = range.start(); i < range.end(); ++i) {
+    const BVHReference &ref = references[i];
+    BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
+    bounds.grow(ref_bounds);
+    if (cent_bounds != NULL) {
+      cent_bounds->grow(ref_bounds.center2());
+    }
+  }
+  return bounds;
+}
+
+BoundBox BVHUnaligned::compute_aligned_boundbox(const BVHRange &range,
+                                                const BVHReference *references,
+                                                const Transform &aligned_space,
+                                                BoundBox *cent_bounds) const
+{
+  BoundBox bounds = BoundBox::empty;
+  if (cent_bounds != NULL) {
+    *cent_bounds = BoundBox::empty;
+  }
+  for (int i = range.start(); i < range.end(); ++i) {
+    const BVHReference &ref = references[i];
+    BoundBox ref_bounds = compute_aligned_prim_boundbox(ref, aligned_space);
+    bounds.grow(ref_bounds);
+    if (cent_bounds != NULL) {
+      cent_bounds->grow(ref_bounds.center2());
+    }
+  }
+  return bounds;
+}
+
+Transform BVHUnaligned::compute_node_transform(const BoundBox &bounds,
+                                               const Transform &aligned_space)
+{
+  Transform space = aligned_space;
+  space.x.w -= bounds.min.x;
+  space.y.w -= bounds.min.y;
+  space.z.w -= bounds.min.z;
+  float3 dim = bounds.max - bounds.min;
+  return transform_scale(
+             1.0f / max(1e-18f, dim.x), 1.0f / max(1e-18f, dim.y), 1.0f / max(1e-18f, dim.z)) *
+         space;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/unaligned.h b/intern/cycles/bvh/unaligned.h
new file mode 100644
index 00000000000..33e584ea8ed
--- /dev/null
+++ b/intern/cycles/bvh/unaligned.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BVH_UNALIGNED_H__
+#define __BVH_UNALIGNED_H__
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BoundBox;
+class BVHObjectBinning;
+class BVHRange;
+class BVHReference;
+struct Transform;
+class Object;
+
+/* Helper class to perform calculations needed for unaligned nodes. */
+class BVHUnaligned {
+ public:
+  BVHUnaligned(const vector<Object *> &objects);
+
+  /* Calculate alignment for the oriented node for a given range. */
+  Transform compute_aligned_space(const BVHObjectBinning &range,
+                                  const BVHReference *references) const;
+  Transform compute_aligned_space(const BVHRange &range, const BVHReference *references) const;
+
+  /* Calculate alignment for the oriented node for a given reference.
+   *
+   * Return true when space was calculated successfully.
+   */
+  bool compute_aligned_space(const BVHReference &ref, Transform *aligned_space) const;
+
+  /* Calculate primitive's bounding box in given space. */
+  BoundBox compute_aligned_prim_boundbox(const BVHReference &prim,
+                                         const Transform &aligned_space) const;
+
+  /* Calculate bounding box in given space. */
+  BoundBox compute_aligned_boundbox(const BVHObjectBinning &range,
+                                    const BVHReference *references,
+                                    const Transform &aligned_space,
+                                    BoundBox *cent_bounds = NULL) const;
+  BoundBox compute_aligned_boundbox(const BVHRange &range,
+                                    const BVHReference *references,
+                                    const Transform &aligned_space,
+                                    BoundBox *cent_bounds = NULL) const;
+
+  /* Calculate affine transform for node packing.
+   * Bounds will be in the range of 0..1.
+   */
+  static Transform compute_node_transform(const BoundBox &bounds, const Transform &aligned_space);
+
+ protected:
+  /* List of objects BVH is being created for. */
+  const vector<Object *> &objects_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BVH_UNALIGNED_H__ */
diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 6d33a6f107f..39de4bec799 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -45,11 +45,11 @@ endif()
 
 set(SRC
   device.cpp
-  device_denoise.cpp
-  device_graphics_interop.cpp
-  device_kernel.cpp
-  device_memory.cpp
-  device_queue.cpp
+  denoise.cpp
+  graphics_interop.cpp
+  kernel.cpp
+  memory.cpp
+  queue.cpp
 )
 
 set(SRC_CPU
@@ -116,11 +116,11 @@ set(SRC_OPTIX
 
 set(SRC_HEADERS
   device.h
-  device_denoise.h
-  device_graphics_interop.h
-  device_memory.h
-  device_kernel.h
-  device_queue.h
+  denoise.h
+  graphics_interop.h
+  memory.h
+  kernel.h
+  queue.h
 )
 
 set(LIB
diff --git a/intern/cycles/device/cpu/device.cpp b/intern/cycles/device/cpu/device.cpp
index 68ca8e8bb22..f11b49ef65f 100644
--- a/intern/cycles/device/cpu/device.cpp
+++ b/intern/cycles/device/cpu/device.cpp
@@ -20,7 +20,7 @@
 /* Used for `info.denoisers`. */
 /* TODO(sergey): The denoisers are probably to be moved completely out of the device into their
  * own class. But until then keep API consistent with how it used to work before. */
-#include "util/util_openimagedenoise.h"
+#include "util/openimagedenoise.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cpu/device.h b/intern/cycles/device/cpu/device.h
index 9cb2e80068d..c53bc338127 100644
--- a/intern/cycles/device/cpu/device.h
+++ b/intern/cycles/device/cpu/device.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp
index 98d637b5f8a..dbad332f896 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -22,7 +22,7 @@
 /* So ImathMath is included before our kernel_cpu_compat. */
 #ifdef WITH_OSL
 /* So no context pollution happens from indirectly included windows.h */
-#  include "util/util_windows.h"
+#  include "util/windows.h"
 #  include <OSL/oslexec.h>
 #endif
 
@@ -39,27 +39,27 @@
 #include "kernel/device/cpu/compat.h"
 #include "kernel/device/cpu/globals.h"
 #include "kernel/device/cpu/kernel.h"
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "kernel/osl/osl_shader.h"
-#include "kernel/osl/osl_globals.h"
+#include "kernel/osl/shader.h"
+#include "kernel/osl/globals.h"
 // clang-format on
 
-#include "bvh/bvh_embree.h"
+#include "bvh/embree.h"
 
 #include "session/buffers.h"
 
-#include "util/util_debug.h"
-#include "util/util_foreach.h"
-#include "util/util_function.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_openimagedenoise.h"
-#include "util/util_optimization.h"
-#include "util/util_progress.h"
-#include "util/util_system.h"
-#include "util/util_task.h"
-#include "util/util_thread.h"
+#include "util/debug.h"
+#include "util/foreach.h"
+#include "util/function.h"
+#include "util/log.h"
+#include "util/map.h"
+#include "util/openimagedenoise.h"
+#include "util/optimization.h"
+#include "util/progress.h"
+#include "util/system.h"
+#include "util/task.h"
+#include "util/thread.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cpu/device_impl.h b/intern/cycles/device/cpu/device_impl.h
index 944c61e29f7..553728ccc3b 100644
--- a/intern/cycles/device/cpu/device_impl.h
+++ b/intern/cycles/device/cpu/device_impl.h
@@ -19,7 +19,7 @@
 /* So ImathMath is included before our kernel_cpu_compat. */
 #ifdef WITH_OSL
 /* So no context pollution happens from indirectly included windows.h */
-#  include "util/util_windows.h"
+#  include "util/windows.h"
 #  include <OSL/oslexec.h>
 #endif
 
@@ -29,15 +29,15 @@
 
 #include "device/cpu/kernel.h"
 #include "device/device.h"
-#include "device/device_memory.h"
+#include "device/memory.h"
 
 // clang-format off
 #include "kernel/device/cpu/compat.h"
 #include "kernel/device/cpu/kernel.h"
 #include "kernel/device/cpu/globals.h"
 
-#include "kernel/osl/osl_shader.h"
-#include "kernel/osl/osl_globals.h"
+#include "kernel/osl/shader.h"
+#include "kernel/osl/globals.h"
 // clang-format on
 
 CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h
index 5f9cb85389f..5beeaf148a1 100644
--- a/intern/cycles/device/cpu/kernel.h
+++ b/intern/cycles/device/cpu/kernel.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include "device/cpu/kernel_function.h"
-#include "util/util_types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cpu/kernel_function.h b/intern/cycles/device/cpu/kernel_function.h
index aa18720cc24..5ff55499d48 100644
--- a/intern/cycles/device/cpu/kernel_function.h
+++ b/intern/cycles/device/cpu/kernel_function.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_debug.h"
-#include "util/util_system.h"
+#include "util/debug.h"
+#include "util/system.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp b/intern/cycles/device/cpu/kernel_thread_globals.cpp
index 44735beb88d..739b6460318 100644
--- a/intern/cycles/device/cpu/kernel_thread_globals.cpp
+++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp
@@ -17,11 +17,11 @@
 #include "device/cpu/kernel_thread_globals.h"
 
 // clang-format off
-#include "kernel/osl/osl_shader.h"
-#include "kernel/osl/osl_globals.h"
+#include "kernel/osl/shader.h"
+#include "kernel/osl/globals.h"
 // clang-format on
 
-#include "util/util_profiling.h"
+#include "util/profiling.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cuda/device.cpp b/intern/cycles/device/cuda/device.cpp
index 84becd6d081..af2bdc6e29c 100644
--- a/intern/cycles/device/cuda/device.cpp
+++ b/intern/cycles/device/cuda/device.cpp
@@ -16,14 +16,14 @@
 
 #include "device/cuda/device.h"
 
-#include "util/util_logging.h"
+#include "util/log.h"
 
 #ifdef WITH_CUDA
 #  include "device/cuda/device_impl.h"
 #  include "device/device.h"
 
-#  include "util/util_string.h"
-#  include "util/util_windows.h"
+#  include "util/string.h"
+#  include "util/windows.h"
 #endif /* WITH_CUDA */
 
 CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/device/cuda/device.h b/intern/cycles/device/cuda/device.h
index b0484904d1a..7142ad19857 100644
--- a/intern/cycles/device/cuda/device.h
+++ b/intern/cycles/device/cuda/device.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index 40f407b4fb3..2f9a1394ad8 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -24,17 +24,17 @@
 
 #  include "device/cuda/device_impl.h"
 
-#  include "util/util_debug.h"
-#  include "util/util_foreach.h"
-#  include "util/util_logging.h"
-#  include "util/util_map.h"
-#  include "util/util_md5.h"
-#  include "util/util_path.h"
-#  include "util/util_string.h"
-#  include "util/util_system.h"
-#  include "util/util_time.h"
-#  include "util/util_types.h"
-#  include "util/util_windows.h"
+#  include "util/debug.h"
+#  include "util/foreach.h"
+#  include "util/log.h"
+#  include "util/map.h"
+#  include "util/md5.h"
+#  include "util/path.h"
+#  include "util/string.h"
+#  include "util/system.h"
+#  include "util/time.h"
+#  include "util/types.h"
+#  include "util/windows.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/cuda/device_impl.h b/intern/cycles/device/cuda/device_impl.h
index c0316d18ba0..72d4108d1bf 100644
--- a/intern/cycles/device/cuda/device_impl.h
+++ b/intern/cycles/device/cuda/device_impl.h
@@ -21,7 +21,7 @@
 #  include "device/cuda/util.h"
 #  include "device/device.h"
 
-#  include "util/util_map.h"
+#  include "util/map.h"
 
 #  ifdef WITH_CUDA_DYNLOAD
 #    include "cuew.h"
diff --git a/intern/cycles/device/cuda/graphics_interop.h b/intern/cycles/device/cuda/graphics_interop.h
index ec480f20c86..a00a837efea 100644
--- a/intern/cycles/device/cuda/graphics_interop.h
+++ b/intern/cycles/device/cuda/graphics_interop.h
@@ -16,7 +16,7 @@
 
 #ifdef WITH_CUDA
 
-#  include "device/device_graphics_interop.h"
+#  include "device/graphics_interop.h"
 
 #  ifdef WITH_CUDA_DYNLOAD
 #    include "cuew.h"
diff --git a/intern/cycles/device/cuda/kernel.h b/intern/cycles/device/cuda/kernel.h
index b489547a350..e1650ea5b5b 100644
--- a/intern/cycles/device/cuda/kernel.h
+++ b/intern/cycles/device/cuda/kernel.h
@@ -18,7 +18,7 @@
 
 #ifdef WITH_CUDA
 
-#  include "device/device_kernel.h"
+#  include "device/kernel.h"
 
 #  ifdef WITH_CUDA_DYNLOAD
 #    include "cuew.h"
diff --git a/intern/cycles/device/cuda/queue.h b/intern/cycles/device/cuda/queue.h
index 4d1995ed69e..28613cda071 100644
--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -18,9 +18,9 @@
 
 #ifdef WITH_CUDA
 
-#  include "device/device_kernel.h"
-#  include "device/device_memory.h"
-#  include "device/device_queue.h"
+#  include "device/kernel.h"
+#  include "device/memory.h"
+#  include "device/queue.h"
 
 #  include "device/cuda/util.h"
 
diff --git a/intern/cycles/device/denoise.cpp b/intern/cycles/device/denoise.cpp
new file mode 100644
index 00000000000..c291a7a0adb
--- /dev/null
+++ b/intern/cycles/device/denoise.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/denoise.h"
+
+CCL_NAMESPACE_BEGIN
+
+const char *denoiserTypeToHumanReadable(DenoiserType type)
+{
+  switch (type) {
+    case DENOISER_OPTIX:
+      return "OptiX";
+    case DENOISER_OPENIMAGEDENOISE:
+      return "OpenImageDenoise";
+
+    case DENOISER_NUM:
+    case DENOISER_NONE:
+    case DENOISER_ALL:
+      return "UNKNOWN";
+  }
+
+  return "UNKNOWN";
+}
+
+const NodeEnum *DenoiseParams::get_type_enum()
+{
+  static NodeEnum type_enum;
+
+  if (type_enum.empty()) {
+    type_enum.insert("optix", DENOISER_OPTIX);
+    type_enum.insert("openimageio", DENOISER_OPENIMAGEDENOISE);
+  }
+
+  return &type_enum;
+}
+
+const NodeEnum *DenoiseParams::get_prefilter_enum()
+{
+  static NodeEnum prefilter_enum;
+
+  if (prefilter_enum.empty()) {
+    prefilter_enum.insert("none", DENOISER_PREFILTER_NONE);
+    prefilter_enum.insert("fast", DENOISER_PREFILTER_FAST);
+    prefilter_enum.insert("accurate", DENOISER_PREFILTER_ACCURATE);
+  }
+
+  return &prefilter_enum;
+}
+
+NODE_DEFINE(DenoiseParams)
+{
+  NodeType *type = NodeType::add("denoise_params", create);
+
+  const NodeEnum *type_enum = get_type_enum();
+  const NodeEnum *prefilter_enum = get_prefilter_enum();
+
+  SOCKET_BOOLEAN(use, "Use", false);
+
+  SOCKET_ENUM(type, "Type", *type_enum, DENOISER_OPENIMAGEDENOISE);
+
+  SOCKET_INT(start_sample, "Start Sample", 0);
+
+  SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
+  SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);
+
+  SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);
+
+  return type;
+}
+
+DenoiseParams::DenoiseParams() : Node(get_node_type())
+{
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/denoise.h b/intern/cycles/device/denoise.h
new file mode 100644
index 00000000000..3f30506ae06
--- /dev/null
+++ b/intern/cycles/device/denoise.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "device/memory.h"
+#include "graph/node.h"
+#include "session/buffers.h"
+
+CCL_NAMESPACE_BEGIN
+
+enum DenoiserType {
+  DENOISER_OPTIX = 2,
+  DENOISER_OPENIMAGEDENOISE = 4,
+  DENOISER_NUM,
+
+  DENOISER_NONE = 0,
+  DENOISER_ALL = ~0,
+};
+
+/* COnstruct human-readable string which denotes the denoiser type. */
+const char *denoiserTypeToHumanReadable(DenoiserType type);
+
+typedef int DenoiserTypeMask;
+
+enum DenoiserPrefilter {
+  /* Best quality of the result without extra processing time, but requires guiding passes to be
+   * noise-free. */
+  DENOISER_PREFILTER_NONE = 1,
+
+  /* Denoise color and guiding passes together.
+   * Improves quality when guiding passes are noisy using least amount of extra processing time. */
+  DENOISER_PREFILTER_FAST = 2,
+
+  /* Prefilter noisy guiding passes before denoising color.
+   * Improves quality when guiding passes are noisy using extra processing time. */
+  DENOISER_PREFILTER_ACCURATE = 3,
+
+  DENOISER_PREFILTER_NUM,
+};
+
+/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization.
+ * The default values here do not really matter as they are always initialized from the
+ * Integrator node. */
+class DenoiseParams : public Node {
+ public:
+  NODE_DECLARE
+
+  /* Apply denoiser to image. */
+  bool use = false;
+
+  /* Denoiser type. */
+  DenoiserType type = DENOISER_OPENIMAGEDENOISE;
+
+  /* Viewport start sample. */
+  int start_sample = 0;
+
+  /* Auxiliary passes. */
+  bool use_pass_albedo = true;
+  bool use_pass_normal = true;
+
+  DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;
+
+  static const NodeEnum *get_type_enum();
+  static const NodeEnum *get_prefilter_enum();
+
+  DenoiseParams();
+
+  bool modified(const DenoiseParams &other) const
+  {
+    return !(use == other.use && type == other.type && start_sample == other.start_sample &&
+             use_pass_albedo == other.use_pass_albedo &&
+             use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
+  }
+};
+
+/* All the parameters needed to perform buffer denoising on a device.
+ * Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
+ * more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
+ * single place where they are all listed, so that it's not required to modify all device methods
+ * when these parameters do change. */
+class DeviceDenoiseTask {
+ public:
+  DenoiseParams params;
+
+  int num_samples;
+
+  RenderBuffers *render_buffers;
+  BufferParams buffer_params;
+
+  /* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
+   * lower the memory footprint of the denoiser but will make input passes "invalid" (from path
+   * tracer) point of view. */
+  bool allow_inplace_modification;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 81574e8b184..5179f3bacdb 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -20,7 +20,7 @@
 #include "bvh/bvh2.h"
 
 #include "device/device.h"
-#include "device/device_queue.h"
+#include "device/queue.h"
 
 #include "device/cpu/device.h"
 #include "device/cuda/device.h"
@@ -29,15 +29,15 @@
 #include "device/multi/device.h"
 #include "device/optix/device.h"
 
-#include "util/util_foreach.h"
-#include "util/util_half.h"
-#include "util/util_logging.h"
-#include "util/util_math.h"
-#include "util/util_string.h"
-#include "util/util_system.h"
-#include "util/util_time.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/foreach.h"
+#include "util/half.h"
+#include "util/log.h"
+#include "util/math.h"
+#include "util/string.h"
+#include "util/system.h"
+#include "util/time.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index c73d74cdccc..3cb177adde7 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -19,21 +19,21 @@
 
 #include <stdlib.h>
 
-#include "bvh/bvh_params.h"
-
-#include "device/device_denoise.h"
-#include "device/device_memory.h"
-
-#include "util/util_function.h"
-#include "util/util_list.h"
-#include "util/util_logging.h"
-#include "util/util_stats.h"
-#include "util/util_string.h"
-#include "util/util_texture.h"
-#include "util/util_thread.h"
-#include "util/util_types.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
+#include "bvh/params.h"
+
+#include "device/denoise.h"
+#include "device/memory.h"
+
+#include "util/function.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/stats.h"
+#include "util/string.h"
+#include "util/texture.h"
+#include "util/thread.h"
+#include "util/types.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/device_denoise.cpp b/intern/cycles/device/device_denoise.cpp
deleted file mode 100644
index aea7868f65d..00000000000
--- a/intern/cycles/device/device_denoise.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_denoise.h"
-
-CCL_NAMESPACE_BEGIN
-
-const char *denoiserTypeToHumanReadable(DenoiserType type)
-{
-  switch (type) {
-    case DENOISER_OPTIX:
-      return "OptiX";
-    case DENOISER_OPENIMAGEDENOISE:
-      return "OpenImageDenoise";
-
-    case DENOISER_NUM:
-    case DENOISER_NONE:
-    case DENOISER_ALL:
-      return "UNKNOWN";
-  }
-
-  return "UNKNOWN";
-}
-
-const NodeEnum *DenoiseParams::get_type_enum()
-{
-  static NodeEnum type_enum;
-
-  if (type_enum.empty()) {
-    type_enum.insert("optix", DENOISER_OPTIX);
-    type_enum.insert("openimageio", DENOISER_OPENIMAGEDENOISE);
-  }
-
-  return &type_enum;
-}
-
-const NodeEnum *DenoiseParams::get_prefilter_enum()
-{
-  static NodeEnum prefilter_enum;
-
-  if (prefilter_enum.empty()) {
-    prefilter_enum.insert("none", DENOISER_PREFILTER_NONE);
-    prefilter_enum.insert("fast", DENOISER_PREFILTER_FAST);
-    prefilter_enum.insert("accurate", DENOISER_PREFILTER_ACCURATE);
-  }
-
-  return &prefilter_enum;
-}
-
-NODE_DEFINE(DenoiseParams)
-{
-  NodeType *type = NodeType::add("denoise_params", create);
-
-  const NodeEnum *type_enum = get_type_enum();
-  const NodeEnum *prefilter_enum = get_prefilter_enum();
-
-  SOCKET_BOOLEAN(use, "Use", false);
-
-  SOCKET_ENUM(type, "Type", *type_enum, DENOISER_OPENIMAGEDENOISE);
-
-  SOCKET_INT(start_sample, "Start Sample", 0);
-
-  SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
-  SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);
-
-  SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);
-
-  return type;
-}
-
-DenoiseParams::DenoiseParams() : Node(get_node_type())
-{
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_denoise.h b/intern/cycles/device/device_denoise.h
deleted file mode 100644
index 4e09f1a1ba3..00000000000
--- a/intern/cycles/device/device_denoise.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "device/device_memory.h"
-#include "graph/node.h"
-#include "session/buffers.h"
-
-CCL_NAMESPACE_BEGIN
-
-enum DenoiserType {
-  DENOISER_OPTIX = 2,
-  DENOISER_OPENIMAGEDENOISE = 4,
-  DENOISER_NUM,
-
-  DENOISER_NONE = 0,
-  DENOISER_ALL = ~0,
-};
-
-/* COnstruct human-readable string which denotes the denoiser type. */
-const char *denoiserTypeToHumanReadable(DenoiserType type);
-
-typedef int DenoiserTypeMask;
-
-enum DenoiserPrefilter {
-  /* Best quality of the result without extra processing time, but requires guiding passes to be
-   * noise-free. */
-  DENOISER_PREFILTER_NONE = 1,
-
-  /* Denoise color and guiding passes together.
-   * Improves quality when guiding passes are noisy using least amount of extra processing time. */
-  DENOISER_PREFILTER_FAST = 2,
-
-  /* Prefilter noisy guiding passes before denoising color.
-   * Improves quality when guiding passes are noisy using extra processing time. */
-  DENOISER_PREFILTER_ACCURATE = 3,
-
-  DENOISER_PREFILTER_NUM,
-};
-
-/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization.
- * The default values here do not really matter as they are always initialized from the
- * Integrator node. */
-class DenoiseParams : public Node {
- public:
-  NODE_DECLARE
-
-  /* Apply denoiser to image. */
-  bool use = false;
-
-  /* Denoiser type. */
-  DenoiserType type = DENOISER_OPENIMAGEDENOISE;
-
-  /* Viewport start sample. */
-  int start_sample = 0;
-
-  /* Auxiliary passes. */
-  bool use_pass_albedo = true;
-  bool use_pass_normal = true;
-
-  DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;
-
-  static const NodeEnum *get_type_enum();
-  static const NodeEnum *get_prefilter_enum();
-
-  DenoiseParams();
-
-  bool modified(const DenoiseParams &other) const
-  {
-    return !(use == other.use && type == other.type && start_sample == other.start_sample &&
-             use_pass_albedo == other.use_pass_albedo &&
-             use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
-  }
-};
-
-/* All the parameters needed to perform buffer denoising on a device.
- * Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
- * more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
- * single place where they are all listed, so that it's not required to modify all device methods
- * when these parameters do change. */
-class DeviceDenoiseTask {
- public:
-  DenoiseParams params;
-
-  int num_samples;
-
-  RenderBuffers *render_buffers;
-  BufferParams buffer_params;
-
-  /* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
-   * lower the memory footprint of the denoiser but will make input passes "invalid" (from path
-   * tracer) point of view. */
-  bool allow_inplace_modification;
-};
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_graphics_interop.cpp b/intern/cycles/device/device_graphics_interop.cpp
deleted file mode 100644
index a80a236759f..00000000000
--- a/intern/cycles/device/device_graphics_interop.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_graphics_interop.h"
-
-CCL_NAMESPACE_BEGIN
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_graphics_interop.h b/intern/cycles/device/device_graphics_interop.h
deleted file mode 100644
index e5c97fe5a1e..00000000000
--- a/intern/cycles/device/device_graphics_interop.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "session/display_driver.h"
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Device-side graphics interoperability support.
- *
- * Takes care of holding all the handlers needed by the device to implement interoperability with
- * the graphics library. */
-class DeviceGraphicsInterop {
- public:
-  DeviceGraphicsInterop() = default;
-  virtual ~DeviceGraphicsInterop() = default;
-
-  /* Update this device-side graphics interoperability object with the given destination resource
-   * information. */
-  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
-
-  virtual device_ptr map() = 0;
-  virtual void unmap() = 0;
-};
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_kernel.cpp b/intern/cycles/device/device_kernel.cpp
deleted file mode 100644
index 1e282aac57e..00000000000
--- a/intern/cycles/device/device_kernel.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_kernel.h"
-
-#include "util/util_logging.h"
-
-CCL_NAMESPACE_BEGIN
-
-const char *device_kernel_as_string(DeviceKernel kernel)
-{
-  switch (kernel) {
-    /* Integrator. */
-    case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA:
-      return "integrator_init_from_camera";
-    case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE:
-      return "integrator_init_from_bake";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
-      return "integrator_intersect_closest";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-      return "integrator_intersect_shadow";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
-      return "integrator_intersect_subsurface";
-    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
-      return "integrator_intersect_volume_stack";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
-      return "integrator_shade_background";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
-      return "integrator_shade_light";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
-      return "integrator_shade_shadow";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
-      return "integrator_shade_surface";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-      return "integrator_shade_surface_raytrace";
-    case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
-      return "integrator_shade_volume";
-    case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
-      return "integrator_megakernel";
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
-      return "integrator_queued_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
-      return "integrator_queued_shadow_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
-      return "integrator_active_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
-      return "integrator_terminated_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
-      return "integrator_sorted_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
-      return "integrator_compact_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
-      return "integrator_compact_states";
-    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
-      return "integrator_terminated_shadow_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
-      return "integrator_compact_shadow_paths_array";
-    case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES:
-      return "integrator_compact_shadow_states";
-    case DEVICE_KERNEL_INTEGRATOR_RESET:
-      return "integrator_reset";
-    case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS:
-      return "integrator_shadow_catcher_count_possible_splits";
-
-    /* Shader evaluation. */
-    case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
-      return "shader_eval_displace";
-    case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
-      return "shader_eval_background";
-    case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
-      return "shader_eval_curve_shadow_transparency";
-
-      /* Film. */
-
-#define FILM_CONVERT_KERNEL_AS_STRING(variant, variant_lowercase) \
-  case DEVICE_KERNEL_FILM_CONVERT_##variant: \
-    return "film_convert_" #variant_lowercase; \
-  case DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA: \
-    return "film_convert_" #variant_lowercase "_half_rgba";
-
-      FILM_CONVERT_KERNEL_AS_STRING(DEPTH, depth)
-      FILM_CONVERT_KERNEL_AS_STRING(MIST, mist)
-      FILM_CONVERT_KERNEL_AS_STRING(SAMPLE_COUNT, sample_count)
-      FILM_CONVERT_KERNEL_AS_STRING(FLOAT, float)
-      FILM_CONVERT_KERNEL_AS_STRING(LIGHT_PATH, light_path)
-      FILM_CONVERT_KERNEL_AS_STRING(FLOAT3, float3)
-      FILM_CONVERT_KERNEL_AS_STRING(MOTION, motion)
-      FILM_CONVERT_KERNEL_AS_STRING(CRYPTOMATTE, cryptomatte)
-      FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER, shadow_catcher)
-      FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER_MATTE_WITH_SHADOW,
-                                    shadow_catcher_matte_with_shadow)
-      FILM_CONVERT_KERNEL_AS_STRING(COMBINED, combined)
-      FILM_CONVERT_KERNEL_AS_STRING(FLOAT4, float4)
-
-#undef FILM_CONVERT_KERNEL_AS_STRING
-
-    /* Adaptive sampling. */
-    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK:
-      return "adaptive_sampling_convergence_check";
-    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X:
-      return "adaptive_sampling_filter_x";
-    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y:
-      return "adaptive_sampling_filter_y";
-
-    /* Denoising. */
-    case DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS:
-      return "filter_guiding_preprocess";
-    case DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO:
-      return "filter_guiding_set_fake_albedo";
-    case DEVICE_KERNEL_FILTER_COLOR_PREPROCESS:
-      return "filter_color_preprocess";
-    case DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS:
-      return "filter_color_postprocess";
-
-    /* Cryptomatte. */
-    case DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS:
-      return "cryptomatte_postprocess";
-
-    /* Generic */
-    case DEVICE_KERNEL_PREFIX_SUM:
-      return "prefix_sum";
-
-    case DEVICE_KERNEL_NUM:
-      break;
-  };
-  LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
-  return "UNKNOWN";
-}
-
-std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
-{
-  os << device_kernel_as_string(kernel);
-  return os;
-}
-
-string device_kernel_mask_as_string(DeviceKernelMask mask)
-{
-  string str;
-
-  for (uint64_t i = 0; i < sizeof(DeviceKernelMask) * 8; i++) {
-    if (mask & (uint64_t(1) << i)) {
-      if (!str.empty()) {
-        str += " ";
-      }
-      str += device_kernel_as_string((DeviceKernel)i);
-    }
-  }
-
-  return str;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_kernel.h b/intern/cycles/device/device_kernel.h
deleted file mode 100644
index 83d959ca87b..00000000000
--- a/intern/cycles/device/device_kernel.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/kernel_types.h"
-
-#include "util/util_string.h"
-
-#include <ostream>  // NOLINT
-
-CCL_NAMESPACE_BEGIN
-
-const char *device_kernel_as_string(DeviceKernel kernel);
-std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
-
-typedef uint64_t DeviceKernelMask;
-string device_kernel_mask_as_string(DeviceKernelMask mask);
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
deleted file mode 100644
index c0ab2e17cae..00000000000
--- a/intern/cycles/device/device_memory.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_memory.h"
-#include "device/device.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Device Memory */
-
-device_memory::device_memory(Device *device, const char *name, MemoryType type)
-    : data_type(device_type_traits<uchar>::data_type),
-      data_elements(device_type_traits<uchar>::num_elements_cpu),
-      data_size(0),
-      device_size(0),
-      data_width(0),
-      data_height(0),
-      data_depth(0),
-      type(type),
-      name(name),
-      device(device),
-      device_pointer(0),
-      host_pointer(0),
-      shared_pointer(0),
-      shared_counter(0),
-      original_device_ptr(0),
-      original_device_size(0),
-      original_device(0),
-      need_realloc_(false),
-      modified(false)
-{
-}
-
-device_memory::device_memory(device_memory &&other) noexcept
-    : data_type(other.data_type),
-      data_elements(other.data_elements),
-      data_size(other.data_size),
-      device_size(other.device_size),
-      data_width(other.data_width),
-      data_height(other.data_height),
-      data_depth(other.data_depth),
-      type(other.type),
-      name(other.name),
-      device(other.device),
-      device_pointer(other.device_pointer),
-      host_pointer(other.host_pointer),
-      shared_pointer(other.shared_pointer),
-      shared_counter(other.shared_counter),
-      original_device_ptr(other.original_device_ptr),
-      original_device_size(other.original_device_size),
-      original_device(other.original_device),
-      need_realloc_(other.need_realloc_),
-      modified(other.modified)
-{
-  other.data_elements = 0;
-  other.data_size = 0;
-  other.device_size = 0;
-  other.data_width = 0;
-  other.data_height = 0;
-  other.data_depth = 0;
-  other.device = 0;
-  other.device_pointer = 0;
-  other.host_pointer = 0;
-  other.shared_pointer = 0;
-  other.shared_counter = 0;
-  other.original_device_ptr = 0;
-  other.original_device_size = 0;
-  other.original_device = 0;
-  other.need_realloc_ = false;
-  other.modified = false;
-}
-
-device_memory::~device_memory()
-{
-  assert(shared_pointer == 0);
-  assert(shared_counter == 0);
-}
-
-void *device_memory::host_alloc(size_t size)
-{
-  if (!size) {
-    return 0;
-  }
-
-  void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
-
-  if (ptr) {
-    util_guarded_mem_alloc(size);
-  }
-  else {
-    throw std::bad_alloc();
-  }
-
-  return ptr;
-}
-
-void device_memory::host_free()
-{
-  if (host_pointer) {
-    util_guarded_mem_free(memory_size());
-    util_aligned_free((void *)host_pointer);
-    host_pointer = 0;
-  }
-}
-
-void device_memory::device_alloc()
-{
-  assert(!device_pointer && type != MEM_TEXTURE && type != MEM_GLOBAL);
-  device->mem_alloc(*this);
-}
-
-void device_memory::device_free()
-{
-  if (device_pointer) {
-    device->mem_free(*this);
-  }
-}
-
-void device_memory::device_copy_to()
-{
-  if (host_pointer) {
-    device->mem_copy_to(*this);
-  }
-}
-
-void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
-{
-  assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
-  device->mem_copy_from(*this, y, w, h, elem);
-}
-
-void device_memory::device_zero()
-{
-  if (data_size) {
-    device->mem_zero(*this);
-  }
-}
-
-bool device_memory::device_is_cpu()
-{
-  return (device->info.type == DEVICE_CPU);
-}
-
-void device_memory::swap_device(Device *new_device,
-                                size_t new_device_size,
-                                device_ptr new_device_ptr)
-{
-  original_device = device;
-  original_device_size = device_size;
-  original_device_ptr = device_pointer;
-
-  device = new_device;
-  device_size = new_device_size;
-  device_pointer = new_device_ptr;
-}
-
-void device_memory::restore_device()
-{
-  device = original_device;
-  device_size = original_device_size;
-  device_pointer = original_device_ptr;
-}
-
-bool device_memory::is_resident(Device *sub_device) const
-{
-  return device->is_resident(device_pointer, sub_device);
-}
-
-/* Device Sub Ptr */
-
-device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
-{
-  ptr = device->mem_alloc_sub_ptr(mem, offset, size);
-}
-
-device_sub_ptr::~device_sub_ptr()
-{
-  device->mem_free_sub_ptr(ptr);
-}
-
-/* Device Texture */
-
-device_texture::device_texture(Device *device,
-                               const char *name,
-                               const uint slot,
-                               ImageDataType image_data_type,
-                               InterpolationType interpolation,
-                               ExtensionType extension)
-    : device_memory(device, name, MEM_TEXTURE), slot(slot)
-{
-  switch (image_data_type) {
-    case IMAGE_DATA_TYPE_FLOAT4:
-      data_type = TYPE_FLOAT;
-      data_elements = 4;
-      break;
-    case IMAGE_DATA_TYPE_FLOAT:
-      data_type = TYPE_FLOAT;
-      data_elements = 1;
-      break;
-    case IMAGE_DATA_TYPE_BYTE4:
-      data_type = TYPE_UCHAR;
-      data_elements = 4;
-      break;
-    case IMAGE_DATA_TYPE_BYTE:
-    case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
-    case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
-      data_type = TYPE_UCHAR;
-      data_elements = 1;
-      break;
-    case IMAGE_DATA_TYPE_HALF4:
-      data_type = TYPE_HALF;
-      data_elements = 4;
-      break;
-    case IMAGE_DATA_TYPE_HALF:
-      data_type = TYPE_HALF;
-      data_elements = 1;
-      break;
-    case IMAGE_DATA_TYPE_USHORT4:
-      data_type = TYPE_UINT16;
-      data_elements = 4;
-      break;
-    case IMAGE_DATA_TYPE_USHORT:
-      data_type = TYPE_UINT16;
-      data_elements = 1;
-      break;
-    case IMAGE_DATA_NUM_TYPES:
-      assert(0);
-      return;
-  }
-
-  memset(&info, 0, sizeof(info));
-  info.data_type = image_data_type;
-  info.interpolation = interpolation;
-  info.extension = extension;
-}
-
-device_texture::~device_texture()
-{
-  device_free();
-  host_free();
-}
-
-/* Host memory allocation. */
-void *device_texture::alloc(const size_t width, const size_t height, const size_t depth)
-{
-  const size_t new_size = size(width, height, depth);
-
-  if (new_size != data_size) {
-    device_free();
-    host_free();
-    host_pointer = host_alloc(data_elements * datatype_size(data_type) * new_size);
-    assert(device_pointer == 0);
-  }
-
-  data_size = new_size;
-  data_width = width;
-  data_height = height;
-  data_depth = depth;
-
-  info.width = width;
-  info.height = height;
-  info.depth = depth;
-
-  return host_pointer;
-}
-
-void device_texture::copy_to_device()
-{
-  device_copy_to();
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
deleted file mode 100644
index be6123e09b2..00000000000
--- a/intern/cycles/device/device_memory.h
+++ /dev/null
@@ -1,650 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __DEVICE_MEMORY_H__
-#define __DEVICE_MEMORY_H__
-
-/* Device Memory
- *
- * Data types for allocating, copying and freeing device memory. */
-
-#include "util/util_array.h"
-#include "util/util_half.h"
-#include "util/util_string.h"
-#include "util/util_texture.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Device;
-
-enum MemoryType {
-  MEM_READ_ONLY,
-  MEM_READ_WRITE,
-  MEM_DEVICE_ONLY,
-  MEM_GLOBAL,
-  MEM_TEXTURE,
-};
-
-/* Supported Data Types */
-
-enum DataType {
-  TYPE_UNKNOWN,
-  TYPE_UCHAR,
-  TYPE_UINT16,
-  TYPE_UINT,
-  TYPE_INT,
-  TYPE_FLOAT,
-  TYPE_HALF,
-  TYPE_UINT64,
-};
-
-static constexpr size_t datatype_size(DataType datatype)
-{
-  switch (datatype) {
-    case TYPE_UNKNOWN:
-      return 1;
-    case TYPE_UCHAR:
-      return sizeof(uchar);
-    case TYPE_FLOAT:
-      return sizeof(float);
-    case TYPE_UINT:
-      return sizeof(uint);
-    case TYPE_UINT16:
-      return sizeof(uint16_t);
-    case TYPE_INT:
-      return sizeof(int);
-    case TYPE_HALF:
-      return sizeof(half);
-    case TYPE_UINT64:
-      return sizeof(uint64_t);
-    default:
-      return 0;
-  }
-}
-
-/* Traits for data types */
-
-template<typename T> struct device_type_traits {
-  static const DataType data_type = TYPE_UNKNOWN;
-  static const size_t num_elements_cpu = sizeof(T);
-  static const size_t num_elements_gpu = sizeof(T);
-};
-
-template<> struct device_type_traits<uchar> {
-  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uchar2> {
-  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uchar3> {
-  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 3;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uchar4> {
-  static const DataType data_type = TYPE_UCHAR;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uint> {
-  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uint2> {
-  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uint3> {
-  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 3;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uint4> {
-  static const DataType data_type = TYPE_UINT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<int> {
-  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<int2> {
-  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<int3> {
-  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<int4> {
-  static const DataType data_type = TYPE_INT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<float> {
-  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<float2> {
-  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 2;
-  static const size_t num_elements_gpu = 2;
-  static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<float3> {
-  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 3;
-  static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<float4> {
-  static const DataType data_type = TYPE_FLOAT;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<half> {
-  static const DataType data_type = TYPE_HALF;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<ushort4> {
-  static const DataType data_type = TYPE_UINT16;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uint16_t> {
-  static const DataType data_type = TYPE_UINT16;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<half4> {
-  static const DataType data_type = TYPE_HALF;
-  static const size_t num_elements_cpu = 4;
-  static const size_t num_elements_gpu = 4;
-  static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
-};
-
-template<> struct device_type_traits<uint64_t> {
-  static const DataType data_type = TYPE_UINT64;
-  static const size_t num_elements_cpu = 1;
-  static const size_t num_elements_gpu = 1;
-  static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
-};
-
-/* Device Memory
- *
- * Base class for all device memory. This should not be allocated directly,
- * instead the appropriate subclass can be used. */
-
-class device_memory {
- public:
-  size_t memory_size()
-  {
-    return data_size * data_elements * datatype_size(data_type);
-  }
-  size_t memory_elements_size(int elements)
-  {
-    return elements * data_elements * datatype_size(data_type);
-  }
-
-  /* Data information. */
-  DataType data_type;
-  int data_elements;
-  size_t data_size;
-  size_t device_size;
-  size_t data_width;
-  size_t data_height;
-  size_t data_depth;
-  MemoryType type;
-  const char *name;
-
-  /* Pointers. */
-  Device *device;
-  device_ptr device_pointer;
-  void *host_pointer;
-  void *shared_pointer;
-  /* reference counter for shared_pointer */
-  int shared_counter;
-
-  virtual ~device_memory();
-
-  void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
-  void restore_device();
-
-  bool is_resident(Device *sub_device) const;
-
- protected:
-  friend class CUDADevice;
-  friend class OptiXDevice;
-  friend class HIPDevice;
-
-  /* Only create through subclasses. */
-  device_memory(Device *device, const char *name, MemoryType type);
-  device_memory(device_memory &&other) noexcept;
-
-  /* No copying allowed. */
-  device_memory(const device_memory &) = delete;
-  device_memory &operator=(const device_memory &) = delete;
-
-  /* Host allocation on the device. All host_pointer memory should be
-   * allocated with these functions, for devices that support using
-   * the same pointer for host and device. */
-  void *host_alloc(size_t size);
-  void host_free();
-
-  /* Device memory allocation and copying. */
-  void device_alloc();
-  void device_free();
-  void device_copy_to();
-  void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
-  void device_zero();
-
-  bool device_is_cpu();
-
-  device_ptr original_device_ptr;
-  size_t original_device_size;
-  Device *original_device;
-  bool need_realloc_;
-  bool modified;
-};
-
-/* Device Only Memory
- *
- * Working memory only needed by the device, with no corresponding allocation
- * on the host. Only used internally in the device implementations. */
-
-template<typename T> class device_only_memory : public device_memory {
- public:
-  device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false)
-      : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
-  {
-    data_type = device_type_traits<T>::data_type;
-    data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
-                                          device_type_traits<T>::num_elements_gpu,
-                        1);
-  }
-
-  device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
-  {
-  }
-
-  virtual ~device_only_memory()
-  {
-    free();
-  }
-
-  void alloc_to_device(size_t num, bool shrink_to_fit = true)
-  {
-    size_t new_size = num;
-    bool reallocate;
-
-    if (shrink_to_fit) {
-      reallocate = (data_size != new_size);
-    }
-    else {
-      reallocate = (data_size < new_size);
-    }
-
-    if (reallocate) {
-      device_free();
-      data_size = new_size;
-      device_alloc();
-    }
-  }
-
-  void free()
-  {
-    device_free();
-    data_size = 0;
-  }
-
-  void zero_to_device()
-  {
-    device_zero();
-  }
-};
-
-/* Device Vector
- *
- * Data vector to exchange data between host and device. Memory will be
- * allocated on the host first with alloc() and resize, and then filled
- * in and copied to the device with copy_to_device(). Or alternatively
- * allocated and set to zero on the device with zero_to_device().
- *
- * When using memory type MEM_GLOBAL, a pointer to this memory will be
- * automatically attached to kernel globals, using the provided name
- * matching an entry in kernel_textures.h. */
-
-template<typename T> class device_vector : public device_memory {
- public:
-  /* Can only use this for types that have the same size on CPU and GPU. */
-  static_assert(device_type_traits<T>::num_elements_cpu ==
-                device_type_traits<T>::num_elements_gpu);
-
-  device_vector(Device *device, const char *name, MemoryType type)
-      : device_memory(device, name, type)
-  {
-    data_type = device_type_traits<T>::data_type;
-    data_elements = device_type_traits<T>::num_elements_cpu;
-    modified = true;
-    need_realloc_ = true;
-
-    assert(data_elements > 0);
-  }
-
-  virtual ~device_vector()
-  {
-    free();
-  }
-
-  /* Host memory allocation. */
-  T *alloc(size_t width, size_t height = 0, size_t depth = 0)
-  {
-    size_t new_size = size(width, height, depth);
-
-    if (new_size != data_size) {
-      device_free();
-      host_free();
-      host_pointer = host_alloc(sizeof(T) * new_size);
-      modified = true;
-      assert(device_pointer == 0);
-    }
-
-    data_size = new_size;
-    data_width = width;
-    data_height = height;
-    data_depth = depth;
-
-    return data();
-  }
-
-  /* Host memory resize. Only use this if the original data needs to be
-   * preserved, it is faster to call alloc() if it can be discarded. */
-  T *resize(size_t width, size_t height = 0, size_t depth = 0)
-  {
-    size_t new_size = size(width, height, depth);
-
-    if (new_size != data_size) {
-      void *new_ptr = host_alloc(sizeof(T) * new_size);
-
-      if (new_size && data_size) {
-        size_t min_size = ((new_size < data_size) ? new_size : data_size);
-        memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
-      }
-
-      device_free();
-      host_free();
-      host_pointer = new_ptr;
-      assert(device_pointer == 0);
-    }
-
-    data_size = new_size;
-    data_width = width;
-    data_height = height;
-    data_depth = depth;
-
-    return data();
-  }
-
-  /* Take over data from an existing array. */
-  void steal_data(array<T> &from)
-  {
-    device_free();
-    host_free();
-
-    data_size = from.size();
-    data_width = 0;
-    data_height = 0;
-    data_depth = 0;
-    host_pointer = from.steal_pointer();
-    assert(device_pointer == 0);
-  }
-
-  void give_data(array<T> &to)
-  {
-    device_free();
-
-    to.set_data((T *)host_pointer, data_size);
-    data_size = 0;
-    data_width = 0;
-    data_height = 0;
-    data_depth = 0;
-    host_pointer = 0;
-    assert(device_pointer == 0);
-  }
-
-  /* Free device and host memory. */
-  void free()
-  {
-    device_free();
-    host_free();
-
-    data_size = 0;
-    data_width = 0;
-    data_height = 0;
-    data_depth = 0;
-    host_pointer = 0;
-    modified = true;
-    need_realloc_ = true;
-    assert(device_pointer == 0);
-  }
-
-  void free_if_need_realloc(bool force_free)
-  {
-    if (need_realloc_ || force_free) {
-      free();
-    }
-  }
-
-  bool is_modified() const
-  {
-    return modified;
-  }
-
-  bool need_realloc()
-  {
-    return need_realloc_;
-  }
-
-  void tag_modified()
-  {
-    modified = true;
-  }
-
-  void tag_realloc()
-  {
-    need_realloc_ = true;
-    tag_modified();
-  }
-
-  size_t size() const
-  {
-    return data_size;
-  }
-
-  T *data()
-  {
-    return (T *)host_pointer;
-  }
-
-  const T *data() const
-  {
-    return (T *)host_pointer;
-  }
-
-  T &operator[](size_t i)
-  {
-    assert(i < data_size);
-    return data()[i];
-  }
-
-  void copy_to_device()
-  {
-    if (data_size != 0) {
-      device_copy_to();
-    }
-  }
-
-  void copy_to_device_if_modified()
-  {
-    if (!modified) {
-      return;
-    }
-
-    copy_to_device();
-  }
-
-  void clear_modified()
-  {
-    modified = false;
-    need_realloc_ = false;
-  }
-
-  void copy_from_device()
-  {
-    device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
-  }
-
-  void copy_from_device(size_t y, size_t w, size_t h)
-  {
-    device_copy_from(y, w, h, sizeof(T));
-  }
-
-  void zero_to_device()
-  {
-    device_zero();
-  }
-
-  void move_device(Device *new_device)
-  {
-    copy_from_device();
-    device_free();
-    device = new_device;
-    copy_to_device();
-  }
-
- protected:
-  size_t size(size_t width, size_t height, size_t depth)
-  {
-    return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
-  }
-};
-
-/* Device Sub Memory
- *
- * Pointer into existing memory. It is not allocated separately, but created
- * from an already allocated base memory. It is freed automatically when it
- * goes out of scope, which should happen before base memory is freed.
- *
- * Note: some devices require offset and size of the sub_ptr to be properly
- * aligned to device->mem_address_alingment(). */
-
-class device_sub_ptr {
- public:
-  device_sub_ptr(device_memory &mem, size_t offset, size_t size);
-  ~device_sub_ptr();
-
-  device_ptr operator*() const
-  {
-    return ptr;
-  }
-
- protected:
-  /* No copying. */
-  device_sub_ptr &operator=(const device_sub_ptr &);
-
-  Device *device;
-  device_ptr ptr;
-};
-
-/* Device Texture
- *
- * 2D or 3D image texture memory. */
-
-class device_texture : public device_memory {
- public:
-  device_texture(Device *device,
-                 const char *name,
-                 const uint slot,
-                 ImageDataType image_data_type,
-                 InterpolationType interpolation,
-                 ExtensionType extension);
-  ~device_texture();
-
-  void *alloc(const size_t width, const size_t height, const size_t depth = 0);
-  void copy_to_device();
-
-  uint slot;
-  TextureInfo info;
-
- protected:
-  size_t size(const size_t width, const size_t height, const size_t depth)
-  {
-    return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __DEVICE_MEMORY_H__ */
diff --git a/intern/cycles/device/device_queue.cpp b/intern/cycles/device/device_queue.cpp
deleted file mode 100644
index f2b2f3496e0..00000000000
--- a/intern/cycles/device/device_queue.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "device/device_queue.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_logging.h"
-#include "util/util_time.h"
-
-#include <iomanip>
-
-CCL_NAMESPACE_BEGIN
-
-DeviceQueue::DeviceQueue(Device *device)
-    : device(device), last_kernels_enqueued_(0), last_sync_time_(0.0)
-{
-  DCHECK_NE(device, nullptr);
-}
-
-DeviceQueue::~DeviceQueue()
-{
-  if (VLOG_IS_ON(3)) {
-    /* Print kernel execution times sorted by time. */
-    vector<pair<DeviceKernelMask, double>> stats_sorted;
-    for (const auto &stat : stats_kernel_time_) {
-      stats_sorted.push_back(stat);
-    }
-
-    sort(stats_sorted.begin(),
-         stats_sorted.end(),
-         [](const pair<DeviceKernelMask, double> &a, const pair<DeviceKernelMask, double> &b) {
-           return a.second > b.second;
-         });
-
-    VLOG(3) << "GPU queue stats:";
-    for (const auto &[mask, time] : stats_sorted) {
-      VLOG(3) << "  " << std::setfill(' ') << std::setw(10) << std::fixed << std::setprecision(5)
-              << std::right << time << "s: " << device_kernel_mask_as_string(mask);
-    }
-  }
-}
-
-void DeviceQueue::debug_init_execution()
-{
-  if (VLOG_IS_ON(3)) {
-    last_sync_time_ = time_dt();
-  }
-
-  last_kernels_enqueued_ = 0;
-}
-
-void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
-{
-  if (VLOG_IS_ON(3)) {
-    VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
-            << work_size;
-  }
-
-  last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
-}
-
-void DeviceQueue::debug_synchronize()
-{
-  if (VLOG_IS_ON(3)) {
-    const double new_time = time_dt();
-    const double elapsed_time = new_time - last_sync_time_;
-    VLOG(4) << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
-
-    stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
-
-    last_sync_time_ = new_time;
-  }
-
-  last_kernels_enqueued_ = 0;
-}
-
-string DeviceQueue::debug_active_kernels()
-{
-  return device_kernel_mask_as_string(last_kernels_enqueued_);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/device_queue.h b/intern/cycles/device/device_queue.h
deleted file mode 100644
index e6835b787cf..00000000000
--- a/intern/cycles/device/device_queue.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "device/device_kernel.h"
-
-#include "device/device_graphics_interop.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Device;
-class device_memory;
-
-struct KernelWorkTile;
-
-/* Abstraction of a command queue for a device.
- * Provides API to schedule kernel execution in a specific queue with minimal possible overhead
- * from driver side.
- *
- * This class encapsulates all properties needed for commands execution. */
-class DeviceQueue {
- public:
-  virtual ~DeviceQueue();
-
-  /* Number of concurrent states to process for integrator,
-   * based on number of cores and/or available memory. */
-  virtual int num_concurrent_states(const size_t state_size) const = 0;
-
-  /* Number of states which keeps the device occupied with work without loosing performance.
-   * The renderer will add more work (when available) when number of active paths falls below this
-   * value. */
-  virtual int num_concurrent_busy_states() const = 0;
-
-  /* Initialize execution of kernels on this queue.
-   *
-   * Will, for example, load all data required by the kernels from Device to global or path state.
-   *
-   * Use this method after device synchronization has finished before enqueueing any kernels. */
-  virtual void init_execution() = 0;
-
-  /* Test if an optional device kernel is available. */
-  virtual bool kernel_available(DeviceKernel kernel) const = 0;
-
-  /* Enqueue kernel execution.
-   *
-   * Execute the kernel work_size times on the device.
-   * Supported arguments types:
-   * - int: pass pointer to the int
-   * - device memory: pass pointer to device_memory.device_pointer
-   * Return false if there was an error executing this or a previous kernel. */
-  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0;
-
-  /* Wait unit all enqueued kernels have finished execution.
-   * Return false if there was an error executing any of the enqueued kernels. */
-  virtual bool synchronize() = 0;
-
-  /* Copy memory to/from device as part of the command queue, to ensure
-   * operations are done in order without having to synchronize. */
-  virtual void zero_to_device(device_memory &mem) = 0;
-  virtual void copy_to_device(device_memory &mem) = 0;
-  virtual void copy_from_device(device_memory &mem) = 0;
-
-  /* Graphics resources interoperability.
-   *
-   * The interoperability comes here by the meaning that the device is capable of computing result
-   * directly into an OpenGL (or other graphics library) buffer. */
-
-  /* Create graphics interoperability context which will be taking care of mapping graphics
-   * resource as a buffer writable by kernels of this device. */
-  virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create()
-  {
-    LOG(FATAL) << "Request of GPU interop of a device which does not support it.";
-    return nullptr;
-  }
-
-  /* Device this queue has been created for. */
-  Device *device;
-
- protected:
-  /* Hide construction so that allocation via `Device` API is enforced. */
-  explicit DeviceQueue(Device *device);
-
-  /* Implementations call these from the corresponding methods to generate debugging logs. */
-  void debug_init_execution();
-  void debug_enqueue(DeviceKernel kernel, const int work_size);
-  void debug_synchronize();
-  string debug_active_kernels();
-
-  /* Combination of kernels enqueued together sync last synchronize. */
-  DeviceKernelMask last_kernels_enqueued_;
-  /* Time of synchronize call. */
-  double last_sync_time_;
-  /* Accumulated execution time for combinations of kernels launched together. */
-  map<DeviceKernelMask, double> stats_kernel_time_;
-};
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/device/dummy/device.cpp b/intern/cycles/device/dummy/device.cpp
index e3cea272300..64f6b0eb58c 100644
--- a/intern/cycles/device/dummy/device.cpp
+++ b/intern/cycles/device/dummy/device.cpp
@@ -17,7 +17,7 @@
 #include "device/dummy/device.h"
 
 #include "device/device.h"
-#include "device/device_queue.h"
+#include "device/queue.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/dummy/device.h b/intern/cycles/device/dummy/device.h
index 832a9568129..c45eb036ca5 100644
--- a/intern/cycles/device/dummy/device.h
+++ b/intern/cycles/device/dummy/device.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/graphics_interop.cpp b/intern/cycles/device/graphics_interop.cpp
new file mode 100644
index 00000000000..0b092711b61
--- /dev/null
+++ b/intern/cycles/device/graphics_interop.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/graphics_interop.h"
+
+CCL_NAMESPACE_BEGIN
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/graphics_interop.h b/intern/cycles/device/graphics_interop.h
new file mode 100644
index 00000000000..f1661146ddd
--- /dev/null
+++ b/intern/cycles/device/graphics_interop.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "session/display_driver.h"
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Device-side graphics interoperability support.
+ *
+ * Takes care of holding all the handlers needed by the device to implement interoperability with
+ * the graphics library. */
+class DeviceGraphicsInterop {
+ public:
+  DeviceGraphicsInterop() = default;
+  virtual ~DeviceGraphicsInterop() = default;
+
+  /* Update this device-side graphics interoperability object with the given destination resource
+   * information. */
+  virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
+
+  virtual device_ptr map() = 0;
+  virtual void unmap() = 0;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/hip/device.cpp b/intern/cycles/device/hip/device.cpp
index 90028ac7f10..f71732d14bb 100644
--- a/intern/cycles/device/hip/device.cpp
+++ b/intern/cycles/device/hip/device.cpp
@@ -16,14 +16,14 @@
 
 #include "device/hip/device.h"
 
-#include "util/util_logging.h"
+#include "util/log.h"
 
 #ifdef WITH_HIP
 #  include "device/device.h"
 #  include "device/hip/device_impl.h"
 
-#  include "util/util_string.h"
-#  include "util/util_windows.h"
+#  include "util/string.h"
+#  include "util/windows.h"
 #endif /* WITH_HIP */
 
 CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/device/hip/device.h b/intern/cycles/device/hip/device.h
index 965fd9e484b..cdbe364b2b3 100644
--- a/intern/cycles/device/hip/device.h
+++ b/intern/cycles/device/hip/device.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 9ffeb9724a1..31b7b07383b 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -24,18 +24,18 @@
 
 #  include "device/hip/device_impl.h"
 
-#  include "util/util_debug.h"
-#  include "util/util_foreach.h"
-#  include "util/util_logging.h"
-#  include "util/util_map.h"
-#  include "util/util_md5.h"
-#  include "util/util_opengl.h"
-#  include "util/util_path.h"
-#  include "util/util_string.h"
-#  include "util/util_system.h"
-#  include "util/util_time.h"
-#  include "util/util_types.h"
-#  include "util/util_windows.h"
+#  include "util/debug.h"
+#  include "util/foreach.h"
+#  include "util/log.h"
+#  include "util/map.h"
+#  include "util/md5.h"
+#  include "util/opengl.h"
+#  include "util/path.h"
+#  include "util/string.h"
+#  include "util/system.h"
+#  include "util/time.h"
+#  include "util/types.h"
+#  include "util/windows.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/hip/device_impl.h b/intern/cycles/device/hip/device_impl.h
index 1d138ee9856..8d81291d15e 100644
--- a/intern/cycles/device/hip/device_impl.h
+++ b/intern/cycles/device/hip/device_impl.h
@@ -21,12 +21,12 @@
 #  include "device/hip/queue.h"
 #  include "device/hip/util.h"
 
-#  include "util/util_map.h"
+#  include "util/map.h"
 
 #  ifdef WITH_HIP_DYNLOAD
 #    include "hipew.h"
 #  else
-#    include "util/util_opengl.h"
+#    include "util/opengl.h"
 #  endif
 
 CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/device/hip/graphics_interop.h b/intern/cycles/device/hip/graphics_interop.h
index 2b2d287ff6c..8314405e670 100644
--- a/intern/cycles/device/hip/graphics_interop.h
+++ b/intern/cycles/device/hip/graphics_interop.h
@@ -16,7 +16,7 @@
 
 #ifdef WITH_HIP
 
-#  include "device/device_graphics_interop.h"
+#  include "device/graphics_interop.h"
 
 #  ifdef WITH_HIP_DYNLOAD
 #    include "hipew.h"
diff --git a/intern/cycles/device/hip/kernel.h b/intern/cycles/device/hip/kernel.h
index 3301731f56e..f1378f8eebf 100644
--- a/intern/cycles/device/hip/kernel.h
+++ b/intern/cycles/device/hip/kernel.h
@@ -18,7 +18,7 @@
 
 #ifdef WITH_HIP
 
-#  include "device/device_kernel.h"
+#  include "device/kernel.h"
 
 #  ifdef WITH_HIP_DYNLOAD
 #    include "hipew.h"
diff --git a/intern/cycles/device/hip/queue.h b/intern/cycles/device/hip/queue.h
index b92f7de7e4b..95d1afaff0f 100644
--- a/intern/cycles/device/hip/queue.h
+++ b/intern/cycles/device/hip/queue.h
@@ -18,9 +18,9 @@
 
 #ifdef WITH_HIP
 
-#  include "device/device_kernel.h"
-#  include "device/device_memory.h"
-#  include "device/device_queue.h"
+#  include "device/kernel.h"
+#  include "device/memory.h"
+#  include "device/queue.h"
 
 #  include "device/hip/util.h"
 
diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp
new file mode 100644
index 00000000000..1e4f0c48f18
--- /dev/null
+++ b/intern/cycles/device/kernel.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/kernel.h"
+
+#include "util/log.h"
+
+CCL_NAMESPACE_BEGIN
+
+const char *device_kernel_as_string(DeviceKernel kernel)
+{
+  switch (kernel) {
+    /* Integrator. */
+    case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA:
+      return "integrator_init_from_camera";
+    case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE:
+      return "integrator_init_from_bake";
+    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
+      return "integrator_intersect_closest";
+    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
+      return "integrator_intersect_shadow";
+    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
+      return "integrator_intersect_subsurface";
+    case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
+      return "integrator_intersect_volume_stack";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
+      return "integrator_shade_background";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
+      return "integrator_shade_light";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
+      return "integrator_shade_shadow";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
+      return "integrator_shade_surface";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+      return "integrator_shade_surface_raytrace";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
+      return "integrator_shade_volume";
+    case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
+      return "integrator_megakernel";
+    case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
+      return "integrator_queued_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
+      return "integrator_queued_shadow_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
+      return "integrator_active_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
+      return "integrator_terminated_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
+      return "integrator_sorted_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
+      return "integrator_compact_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
+      return "integrator_compact_states";
+    case DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY:
+      return "integrator_terminated_shadow_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY:
+      return "integrator_compact_shadow_paths_array";
+    case DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES:
+      return "integrator_compact_shadow_states";
+    case DEVICE_KERNEL_INTEGRATOR_RESET:
+      return "integrator_reset";
+    case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS:
+      return "integrator_shadow_catcher_count_possible_splits";
+
+    /* Shader evaluation. */
+    case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
+      return "shader_eval_displace";
+    case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
+      return "shader_eval_background";
+    case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
+      return "shader_eval_curve_shadow_transparency";
+
+      /* Film. */
+
+#define FILM_CONVERT_KERNEL_AS_STRING(variant, variant_lowercase) \
+  case DEVICE_KERNEL_FILM_CONVERT_##variant: \
+    return "film_convert_" #variant_lowercase; \
+  case DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA: \
+    return "film_convert_" #variant_lowercase "_half_rgba";
+
+      FILM_CONVERT_KERNEL_AS_STRING(DEPTH, depth)
+      FILM_CONVERT_KERNEL_AS_STRING(MIST, mist)
+      FILM_CONVERT_KERNEL_AS_STRING(SAMPLE_COUNT, sample_count)
+      FILM_CONVERT_KERNEL_AS_STRING(FLOAT, float)
+      FILM_CONVERT_KERNEL_AS_STRING(LIGHT_PATH, light_path)
+      FILM_CONVERT_KERNEL_AS_STRING(FLOAT3, float3)
+      FILM_CONVERT_KERNEL_AS_STRING(MOTION, motion)
+      FILM_CONVERT_KERNEL_AS_STRING(CRYPTOMATTE, cryptomatte)
+      FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER, shadow_catcher)
+      FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER_MATTE_WITH_SHADOW,
+                                    shadow_catcher_matte_with_shadow)
+      FILM_CONVERT_KERNEL_AS_STRING(COMBINED, combined)
+      FILM_CONVERT_KERNEL_AS_STRING(FLOAT4, float4)
+
+#undef FILM_CONVERT_KERNEL_AS_STRING
+
+    /* Adaptive sampling. */
+    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK:
+      return "adaptive_sampling_convergence_check";
+    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X:
+      return "adaptive_sampling_filter_x";
+    case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y:
+      return "adaptive_sampling_filter_y";
+
+    /* Denoising. */
+    case DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS:
+      return "filter_guiding_preprocess";
+    case DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO:
+      return "filter_guiding_set_fake_albedo";
+    case DEVICE_KERNEL_FILTER_COLOR_PREPROCESS:
+      return "filter_color_preprocess";
+    case DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS:
+      return "filter_color_postprocess";
+
+    /* Cryptomatte. */
+    case DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS:
+      return "cryptomatte_postprocess";
+
+    /* Generic */
+    case DEVICE_KERNEL_PREFIX_SUM:
+      return "prefix_sum";
+
+    case DEVICE_KERNEL_NUM:
+      break;
+  };
+  LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
+  return "UNKNOWN";
+}
+
+std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
+{
+  os << device_kernel_as_string(kernel);
+  return os;
+}
+
+string device_kernel_mask_as_string(DeviceKernelMask mask)
+{
+  string str;
+
+  for (uint64_t i = 0; i < sizeof(DeviceKernelMask) * 8; i++) {
+    if (mask & (uint64_t(1) << i)) {
+      if (!str.empty()) {
+        str += " ";
+      }
+      str += device_kernel_as_string((DeviceKernel)i);
+    }
+  }
+
+  return str;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/kernel.h b/intern/cycles/device/kernel.h
new file mode 100644
index 00000000000..780ead2d28a
--- /dev/null
+++ b/intern/cycles/device/kernel.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/types.h"
+
+#include "util/string.h"
+
+#include <ostream>  // NOLINT
+
+CCL_NAMESPACE_BEGIN
+
+const char *device_kernel_as_string(DeviceKernel kernel);
+std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
+
+typedef uint64_t DeviceKernelMask;
+string device_kernel_mask_as_string(DeviceKernelMask mask);
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/memory.cpp b/intern/cycles/device/memory.cpp
new file mode 100644
index 00000000000..f162b00d9f7
--- /dev/null
+++ b/intern/cycles/device/memory.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/memory.h"
+#include "device/device.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Device Memory */
+
+device_memory::device_memory(Device *device, const char *name, MemoryType type)
+    : data_type(device_type_traits<uchar>::data_type),
+      data_elements(device_type_traits<uchar>::num_elements_cpu),
+      data_size(0),
+      device_size(0),
+      data_width(0),
+      data_height(0),
+      data_depth(0),
+      type(type),
+      name(name),
+      device(device),
+      device_pointer(0),
+      host_pointer(0),
+      shared_pointer(0),
+      shared_counter(0),
+      original_device_ptr(0),
+      original_device_size(0),
+      original_device(0),
+      need_realloc_(false),
+      modified(false)
+{
+}
+
+device_memory::device_memory(device_memory &&other) noexcept
+    : data_type(other.data_type),
+      data_elements(other.data_elements),
+      data_size(other.data_size),
+      device_size(other.device_size),
+      data_width(other.data_width),
+      data_height(other.data_height),
+      data_depth(other.data_depth),
+      type(other.type),
+      name(other.name),
+      device(other.device),
+      device_pointer(other.device_pointer),
+      host_pointer(other.host_pointer),
+      shared_pointer(other.shared_pointer),
+      shared_counter(other.shared_counter),
+      original_device_ptr(other.original_device_ptr),
+      original_device_size(other.original_device_size),
+      original_device(other.original_device),
+      need_realloc_(other.need_realloc_),
+      modified(other.modified)
+{
+  other.data_elements = 0;
+  other.data_size = 0;
+  other.device_size = 0;
+  other.data_width = 0;
+  other.data_height = 0;
+  other.data_depth = 0;
+  other.device = 0;
+  other.device_pointer = 0;
+  other.host_pointer = 0;
+  other.shared_pointer = 0;
+  other.shared_counter = 0;
+  other.original_device_ptr = 0;
+  other.original_device_size = 0;
+  other.original_device = 0;
+  other.need_realloc_ = false;
+  other.modified = false;
+}
+
+device_memory::~device_memory()
+{
+  assert(shared_pointer == 0);
+  assert(shared_counter == 0);
+}
+
+void *device_memory::host_alloc(size_t size)
+{
+  if (!size) {
+    return 0;
+  }
+
+  void *ptr = util_aligned_malloc(size, MIN_ALIGNMENT_CPU_DATA_TYPES);
+
+  if (ptr) {
+    util_guarded_mem_alloc(size);
+  }
+  else {
+    throw std::bad_alloc();
+  }
+
+  return ptr;
+}
+
+void device_memory::host_free()
+{
+  if (host_pointer) {
+    util_guarded_mem_free(memory_size());
+    util_aligned_free((void *)host_pointer);
+    host_pointer = 0;
+  }
+}
+
+void device_memory::device_alloc()
+{
+  assert(!device_pointer && type != MEM_TEXTURE && type != MEM_GLOBAL);
+  device->mem_alloc(*this);
+}
+
+void device_memory::device_free()
+{
+  if (device_pointer) {
+    device->mem_free(*this);
+  }
+}
+
+void device_memory::device_copy_to()
+{
+  if (host_pointer) {
+    device->mem_copy_to(*this);
+  }
+}
+
+void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
+{
+  assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
+  device->mem_copy_from(*this, y, w, h, elem);
+}
+
+void device_memory::device_zero()
+{
+  if (data_size) {
+    device->mem_zero(*this);
+  }
+}
+
+bool device_memory::device_is_cpu()
+{
+  return (device->info.type == DEVICE_CPU);
+}
+
+void device_memory::swap_device(Device *new_device,
+                                size_t new_device_size,
+                                device_ptr new_device_ptr)
+{
+  original_device = device;
+  original_device_size = device_size;
+  original_device_ptr = device_pointer;
+
+  device = new_device;
+  device_size = new_device_size;
+  device_pointer = new_device_ptr;
+}
+
+void device_memory::restore_device()
+{
+  device = original_device;
+  device_size = original_device_size;
+  device_pointer = original_device_ptr;
+}
+
+bool device_memory::is_resident(Device *sub_device) const
+{
+  return device->is_resident(device_pointer, sub_device);
+}
+
+/* Device Sub Ptr */
+
+device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
+{
+  ptr = device->mem_alloc_sub_ptr(mem, offset, size);
+}
+
+device_sub_ptr::~device_sub_ptr()
+{
+  device->mem_free_sub_ptr(ptr);
+}
+
+/* Device Texture */
+
+device_texture::device_texture(Device *device,
+                               const char *name,
+                               const uint slot,
+                               ImageDataType image_data_type,
+                               InterpolationType interpolation,
+                               ExtensionType extension)
+    : device_memory(device, name, MEM_TEXTURE), slot(slot)
+{
+  switch (image_data_type) {
+    case IMAGE_DATA_TYPE_FLOAT4:
+      data_type = TYPE_FLOAT;
+      data_elements = 4;
+      break;
+    case IMAGE_DATA_TYPE_FLOAT:
+      data_type = TYPE_FLOAT;
+      data_elements = 1;
+      break;
+    case IMAGE_DATA_TYPE_BYTE4:
+      data_type = TYPE_UCHAR;
+      data_elements = 4;
+      break;
+    case IMAGE_DATA_TYPE_BYTE:
+    case IMAGE_DATA_TYPE_NANOVDB_FLOAT:
+    case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
+      data_type = TYPE_UCHAR;
+      data_elements = 1;
+      break;
+    case IMAGE_DATA_TYPE_HALF4:
+      data_type = TYPE_HALF;
+      data_elements = 4;
+      break;
+    case IMAGE_DATA_TYPE_HALF:
+      data_type = TYPE_HALF;
+      data_elements = 1;
+      break;
+    case IMAGE_DATA_TYPE_USHORT4:
+      data_type = TYPE_UINT16;
+      data_elements = 4;
+      break;
+    case IMAGE_DATA_TYPE_USHORT:
+      data_type = TYPE_UINT16;
+      data_elements = 1;
+      break;
+    case IMAGE_DATA_NUM_TYPES:
+      assert(0);
+      return;
+  }
+
+  memset(&info, 0, sizeof(info));
+  info.data_type = image_data_type;
+  info.interpolation = interpolation;
+  info.extension = extension;
+}
+
+device_texture::~device_texture()
+{
+  device_free();
+  host_free();
+}
+
+/* Host memory allocation. */
+void *device_texture::alloc(const size_t width, const size_t height, const size_t depth)
+{
+  const size_t new_size = size(width, height, depth);
+
+  if (new_size != data_size) {
+    device_free();
+    host_free();
+    host_pointer = host_alloc(data_elements * datatype_size(data_type) * new_size);
+    assert(device_pointer == 0);
+  }
+
+  data_size = new_size;
+  data_width = width;
+  data_height = height;
+  data_depth = depth;
+
+  info.width = width;
+  info.height = height;
+  info.depth = depth;
+
+  return host_pointer;
+}
+
+void device_texture::copy_to_device()
+{
+  device_copy_to();
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/memory.h b/intern/cycles/device/memory.h
new file mode 100644
index 00000000000..281c54cc6a5
--- /dev/null
+++ b/intern/cycles/device/memory.h
@@ -0,0 +1,650 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEVICE_MEMORY_H__
+#define __DEVICE_MEMORY_H__
+
+/* Device Memory
+ *
+ * Data types for allocating, copying and freeing device memory. */
+
+#include "util/array.h"
+#include "util/half.h"
+#include "util/string.h"
+#include "util/texture.h"
+#include "util/types.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+
+enum MemoryType {
+  MEM_READ_ONLY,
+  MEM_READ_WRITE,
+  MEM_DEVICE_ONLY,
+  MEM_GLOBAL,
+  MEM_TEXTURE,
+};
+
+/* Supported Data Types */
+
+enum DataType {
+  TYPE_UNKNOWN,
+  TYPE_UCHAR,
+  TYPE_UINT16,
+  TYPE_UINT,
+  TYPE_INT,
+  TYPE_FLOAT,
+  TYPE_HALF,
+  TYPE_UINT64,
+};
+
+static constexpr size_t datatype_size(DataType datatype)
+{
+  switch (datatype) {
+    case TYPE_UNKNOWN:
+      return 1;
+    case TYPE_UCHAR:
+      return sizeof(uchar);
+    case TYPE_FLOAT:
+      return sizeof(float);
+    case TYPE_UINT:
+      return sizeof(uint);
+    case TYPE_UINT16:
+      return sizeof(uint16_t);
+    case TYPE_INT:
+      return sizeof(int);
+    case TYPE_HALF:
+      return sizeof(half);
+    case TYPE_UINT64:
+      return sizeof(uint64_t);
+    default:
+      return 0;
+  }
+}
+
+/* Traits for data types */
+
+template<typename T> struct device_type_traits {
+  static const DataType data_type = TYPE_UNKNOWN;
+  static const size_t num_elements_cpu = sizeof(T);
+  static const size_t num_elements_gpu = sizeof(T);
+};
+
+template<> struct device_type_traits<uchar> {
+  static const DataType data_type = TYPE_UCHAR;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uchar2> {
+  static const DataType data_type = TYPE_UCHAR;
+  static const size_t num_elements_cpu = 2;
+  static const size_t num_elements_gpu = 2;
+  static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uchar3> {
+  static const DataType data_type = TYPE_UCHAR;
+  static const size_t num_elements_cpu = 3;
+  static const size_t num_elements_gpu = 3;
+  static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uchar4> {
+  static const DataType data_type = TYPE_UCHAR;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 4;
+  static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uint> {
+  static const DataType data_type = TYPE_UINT;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uint2> {
+  static const DataType data_type = TYPE_UINT;
+  static const size_t num_elements_cpu = 2;
+  static const size_t num_elements_gpu = 2;
+  static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uint3> {
+  static const DataType data_type = TYPE_UINT;
+  static const size_t num_elements_cpu = 3;
+  static const size_t num_elements_gpu = 3;
+  static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uint4> {
+  static const DataType data_type = TYPE_UINT;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 4;
+  static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<int> {
+  static const DataType data_type = TYPE_INT;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<int2> {
+  static const DataType data_type = TYPE_INT;
+  static const size_t num_elements_cpu = 2;
+  static const size_t num_elements_gpu = 2;
+  static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<int3> {
+  static const DataType data_type = TYPE_INT;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 3;
+  static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<int4> {
+  static const DataType data_type = TYPE_INT;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 4;
+  static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<float> {
+  static const DataType data_type = TYPE_FLOAT;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<float2> {
+  static const DataType data_type = TYPE_FLOAT;
+  static const size_t num_elements_cpu = 2;
+  static const size_t num_elements_gpu = 2;
+  static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<float3> {
+  static const DataType data_type = TYPE_FLOAT;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 3;
+  static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<float4> {
+  static const DataType data_type = TYPE_FLOAT;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 4;
+  static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<half> {
+  static const DataType data_type = TYPE_HALF;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<ushort4> {
+  static const DataType data_type = TYPE_UINT16;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 4;
+  static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uint16_t> {
+  static const DataType data_type = TYPE_UINT16;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<half4> {
+  static const DataType data_type = TYPE_HALF;
+  static const size_t num_elements_cpu = 4;
+  static const size_t num_elements_gpu = 4;
+  static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
+};
+
+template<> struct device_type_traits<uint64_t> {
+  static const DataType data_type = TYPE_UINT64;
+  static const size_t num_elements_cpu = 1;
+  static const size_t num_elements_gpu = 1;
+  static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
+};
+
+/* Device Memory
+ *
+ * Base class for all device memory. This should not be allocated directly,
+ * instead the appropriate subclass can be used. */
+
+class device_memory {
+ public:
+  size_t memory_size()
+  {
+    return data_size * data_elements * datatype_size(data_type);
+  }
+  size_t memory_elements_size(int elements)
+  {
+    return elements * data_elements * datatype_size(data_type);
+  }
+
+  /* Data information. */
+  DataType data_type;
+  int data_elements;
+  size_t data_size;
+  size_t device_size;
+  size_t data_width;
+  size_t data_height;
+  size_t data_depth;
+  MemoryType type;
+  const char *name;
+
+  /* Pointers. */
+  Device *device;
+  device_ptr device_pointer;
+  void *host_pointer;
+  void *shared_pointer;
+  /* reference counter for shared_pointer */
+  int shared_counter;
+
+  virtual ~device_memory();
+
+  void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr);
+  void restore_device();
+
+  bool is_resident(Device *sub_device) const;
+
+ protected:
+  friend class CUDADevice;
+  friend class OptiXDevice;
+  friend class HIPDevice;
+
+  /* Only create through subclasses. */
+  device_memory(Device *device, const char *name, MemoryType type);
+  device_memory(device_memory &&other) noexcept;
+
+  /* No copying allowed. */
+  device_memory(const device_memory &) = delete;
+  device_memory &operator=(const device_memory &) = delete;
+
+  /* Host allocation on the device. All host_pointer memory should be
+   * allocated with these functions, for devices that support using
+   * the same pointer for host and device. */
+  void *host_alloc(size_t size);
+  void host_free();
+
+  /* Device memory allocation and copying. */
+  void device_alloc();
+  void device_free();
+  void device_copy_to();
+  void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
+  void device_zero();
+
+  bool device_is_cpu();
+
+  device_ptr original_device_ptr;
+  size_t original_device_size;
+  Device *original_device;
+  bool need_realloc_;
+  bool modified;
+};
+
+/* Device Only Memory
+ *
+ * Working memory only needed by the device, with no corresponding allocation
+ * on the host. Only used internally in the device implementations. */
+
+template<typename T> class device_only_memory : public device_memory {
+ public:
+  device_only_memory(Device *device, const char *name, bool allow_host_memory_fallback = false)
+      : device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
+  {
+    data_type = device_type_traits<T>::data_type;
+    data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
+                                          device_type_traits<T>::num_elements_gpu,
+                        1);
+  }
+
+  device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
+  {
+  }
+
+  virtual ~device_only_memory()
+  {
+    free();
+  }
+
+  void alloc_to_device(size_t num, bool shrink_to_fit = true)
+  {
+    size_t new_size = num;
+    bool reallocate;
+
+    if (shrink_to_fit) {
+      reallocate = (data_size != new_size);
+    }
+    else {
+      reallocate = (data_size < new_size);
+    }
+
+    if (reallocate) {
+      device_free();
+      data_size = new_size;
+      device_alloc();
+    }
+  }
+
+  void free()
+  {
+    device_free();
+    data_size = 0;
+  }
+
+  void zero_to_device()
+  {
+    device_zero();
+  }
+};
+
+/* Device Vector
+ *
+ * Data vector to exchange data between host and device. Memory will be
+ * allocated on the host first with alloc() and resize, and then filled
+ * in and copied to the device with copy_to_device(). Or alternatively
+ * allocated and set to zero on the device with zero_to_device().
+ *
+ * When using memory type MEM_GLOBAL, a pointer to this memory will be
+ * automatically attached to kernel globals, using the provided name
+ * matching an entry in kernel_textures.h. */
+
+template<typename T> class device_vector : public device_memory {
+ public:
+  /* Can only use this for types that have the same size on CPU and GPU. */
+  static_assert(device_type_traits<T>::num_elements_cpu ==
+                device_type_traits<T>::num_elements_gpu);
+
+  device_vector(Device *device, const char *name, MemoryType type)
+      : device_memory(device, name, type)
+  {
+    data_type = device_type_traits<T>::data_type;
+    data_elements = device_type_traits<T>::num_elements_cpu;
+    modified = true;
+    need_realloc_ = true;
+
+    assert(data_elements > 0);
+  }
+
+  virtual ~device_vector()
+  {
+    free();
+  }
+
+  /* Host memory allocation. */
+  T *alloc(size_t width, size_t height = 0, size_t depth = 0)
+  {
+    size_t new_size = size(width, height, depth);
+
+    if (new_size != data_size) {
+      device_free();
+      host_free();
+      host_pointer = host_alloc(sizeof(T) * new_size);
+      modified = true;
+      assert(device_pointer == 0);
+    }
+
+    data_size = new_size;
+    data_width = width;
+    data_height = height;
+    data_depth = depth;
+
+    return data();
+  }
+
+  /* Host memory resize. Only use this if the original data needs to be
+   * preserved, it is faster to call alloc() if it can be discarded. */
+  T *resize(size_t width, size_t height = 0, size_t depth = 0)
+  {
+    size_t new_size = size(width, height, depth);
+
+    if (new_size != data_size) {
+      void *new_ptr = host_alloc(sizeof(T) * new_size);
+
+      if (new_size && data_size) {
+        size_t min_size = ((new_size < data_size) ? new_size : data_size);
+        memcpy((T *)new_ptr, (T *)host_pointer, sizeof(T) * min_size);
+      }
+
+      device_free();
+      host_free();
+      host_pointer = new_ptr;
+      assert(device_pointer == 0);
+    }
+
+    data_size = new_size;
+    data_width = width;
+    data_height = height;
+    data_depth = depth;
+
+    return data();
+  }
+
+  /* Take over data from an existing array. */
+  void steal_data(array<T> &from)
+  {
+    device_free();
+    host_free();
+
+    data_size = from.size();
+    data_width = 0;
+    data_height = 0;
+    data_depth = 0;
+    host_pointer = from.steal_pointer();
+    assert(device_pointer == 0);
+  }
+
+  void give_data(array<T> &to)
+  {
+    device_free();
+
+    to.set_data((T *)host_pointer, data_size);
+    data_size = 0;
+    data_width = 0;
+    data_height = 0;
+    data_depth = 0;
+    host_pointer = 0;
+    assert(device_pointer == 0);
+  }
+
+  /* Free device and host memory. */
+  void free()
+  {
+    device_free();
+    host_free();
+
+    data_size = 0;
+    data_width = 0;
+    data_height = 0;
+    data_depth = 0;
+    host_pointer = 0;
+    modified = true;
+    need_realloc_ = true;
+    assert(device_pointer == 0);
+  }
+
+  void free_if_need_realloc(bool force_free)
+  {
+    if (need_realloc_ || force_free) {
+      free();
+    }
+  }
+
+  bool is_modified() const
+  {
+    return modified;
+  }
+
+  bool need_realloc()
+  {
+    return need_realloc_;
+  }
+
+  void tag_modified()
+  {
+    modified = true;
+  }
+
+  void tag_realloc()
+  {
+    need_realloc_ = true;
+    tag_modified();
+  }
+
+  size_t size() const
+  {
+    return data_size;
+  }
+
+  T *data()
+  {
+    return (T *)host_pointer;
+  }
+
+  const T *data() const
+  {
+    return (T *)host_pointer;
+  }
+
+  T &operator[](size_t i)
+  {
+    assert(i < data_size);
+    return data()[i];
+  }
+
+  void copy_to_device()
+  {
+    if (data_size != 0) {
+      device_copy_to();
+    }
+  }
+
+  void copy_to_device_if_modified()
+  {
+    if (!modified) {
+      return;
+    }
+
+    copy_to_device();
+  }
+
+  void clear_modified()
+  {
+    modified = false;
+    need_realloc_ = false;
+  }
+
+  void copy_from_device()
+  {
+    device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
+  }
+
+  void copy_from_device(size_t y, size_t w, size_t h)
+  {
+    device_copy_from(y, w, h, sizeof(T));
+  }
+
+  void zero_to_device()
+  {
+    device_zero();
+  }
+
+  void move_device(Device *new_device)
+  {
+    copy_from_device();
+    device_free();
+    device = new_device;
+    copy_to_device();
+  }
+
+ protected:
+  size_t size(size_t width, size_t height, size_t depth)
+  {
+    return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
+  }
+};
+
+/* Device Sub Memory
+ *
+ * Pointer into existing memory. It is not allocated separately, but created
+ * from an already allocated base memory. It is freed automatically when it
+ * goes out of scope, which should happen before base memory is freed.
+ *
+ * Note: some devices require offset and size of the sub_ptr to be properly
+ * aligned to device->mem_address_alingment(). */
+
+class device_sub_ptr {
+ public:
+  device_sub_ptr(device_memory &mem, size_t offset, size_t size);
+  ~device_sub_ptr();
+
+  device_ptr operator*() const
+  {
+    return ptr;
+  }
+
+ protected:
+  /* No copying. */
+  device_sub_ptr &operator=(const device_sub_ptr &);
+
+  Device *device;
+  device_ptr ptr;
+};
+
+/* Device Texture
+ *
+ * 2D or 3D image texture memory. */
+
+class device_texture : public device_memory {
+ public:
+  device_texture(Device *device,
+                 const char *name,
+                 const uint slot,
+                 ImageDataType image_data_type,
+                 InterpolationType interpolation,
+                 ExtensionType extension);
+  ~device_texture();
+
+  void *alloc(const size_t width, const size_t height, const size_t depth = 0);
+  void copy_to_device();
+
+  uint slot;
+  TextureInfo info;
+
+ protected:
+  size_t size(const size_t width, const size_t height, const size_t depth)
+  {
+    return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __DEVICE_MEMORY_H__ */
diff --git a/intern/cycles/device/multi/device.cpp b/intern/cycles/device/multi/device.cpp
index 330a1ed06ef..56efec3e131 100644
--- a/intern/cycles/device/multi/device.cpp
+++ b/intern/cycles/device/multi/device.cpp
@@ -19,18 +19,18 @@
 #include <sstream>
 #include <stdlib.h>
 
-#include "bvh/bvh_multi.h"
+#include "bvh/multi.h"
 
 #include "device/device.h"
-#include "device/device_queue.h"
+#include "device/queue.h"
 
 #include "scene/geometry.h"
 
-#include "util/util_foreach.h"
-#include "util/util_list.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_time.h"
+#include "util/foreach.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/map.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/multi/device.h b/intern/cycles/device/multi/device.h
index 6e121014a1f..ac77f6574ef 100644
--- a/intern/cycles/device/multi/device.h
+++ b/intern/cycles/device/multi/device.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/optix/device.cpp b/intern/cycles/device/optix/device.cpp
index 13f23bd229a..a00169e719f 100644
--- a/intern/cycles/device/optix/device.cpp
+++ b/intern/cycles/device/optix/device.cpp
@@ -19,7 +19,8 @@
 
 #include "device/cuda/device.h"
 #include "device/optix/device_impl.h"
-#include "util/util_logging.h"
+
+#include "util/log.h"
 
 #ifdef WITH_OPTIX
 #  include <optix_function_table_definition.h>
diff --git a/intern/cycles/device/optix/device.h b/intern/cycles/device/optix/device.h
index 29fa729c2e4..dd60a7aa6e2 100644
--- a/intern/cycles/device/optix/device.h
+++ b/intern/cycles/device/optix/device.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 55c3fbd88f2..e9164cc0a76 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -20,7 +20,7 @@
 #  include "device/optix/device_impl.h"
 
 #  include "bvh/bvh.h"
-#  include "bvh/bvh_optix.h"
+#  include "bvh/optix.h"
 
 #  include "integrator/pass_accessor_gpu.h"
 
@@ -30,12 +30,12 @@
 #  include "scene/pass.h"
 #  include "scene/scene.h"
 
-#  include "util/util_debug.h"
-#  include "util/util_logging.h"
-#  include "util/util_md5.h"
-#  include "util/util_path.h"
-#  include "util/util_progress.h"
-#  include "util/util_time.h"
+#  include "util/debug.h"
+#  include "util/log.h"
+#  include "util/md5.h"
+#  include "util/path.h"
+#  include "util/progress.h"
+#  include "util/time.h"
 
 #  undef __KERNEL_CPU__
 #  define __KERNEL_OPTIX__
@@ -1574,7 +1574,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)
       return; \
     }
   KERNEL_TEX(IntegratorStateGPU, __integrator_state)
-#  include "kernel/kernel_textures.h"
+#  include "kernel/textures.h"
 #  undef KERNEL_TEX
 }
 
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index b20d42f8c61..3ec98098eb7 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -22,7 +22,7 @@
 #  include "device/cuda/device_impl.h"
 #  include "device/optix/queue.h"
 #  include "device/optix/util.h"
-#  include "kernel/kernel_types.h"
+#  include "kernel/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index 458ed70baa8..f5bfd916ccf 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -19,7 +19,7 @@
 #  include "device/optix/queue.h"
 #  include "device/optix/device_impl.h"
 
-#  include "util/util_time.h"
+#  include "util/time.h"
 
 #  undef __KERNEL_CPU__
 #  define __KERNEL_OPTIX__
diff --git a/intern/cycles/device/queue.cpp b/intern/cycles/device/queue.cpp
new file mode 100644
index 00000000000..556dc97f23b
--- /dev/null
+++ b/intern/cycles/device/queue.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/queue.h"
+
+#include "util/algorithm.h"
+#include "util/log.h"
+#include "util/time.h"
+
+#include <iomanip>
+
+CCL_NAMESPACE_BEGIN
+
+DeviceQueue::DeviceQueue(Device *device)
+    : device(device), last_kernels_enqueued_(0), last_sync_time_(0.0)
+{
+  DCHECK_NE(device, nullptr);
+}
+
+DeviceQueue::~DeviceQueue()
+{
+  if (VLOG_IS_ON(3)) {
+    /* Print kernel execution times sorted by time. */
+    vector<pair<DeviceKernelMask, double>> stats_sorted;
+    for (const auto &stat : stats_kernel_time_) {
+      stats_sorted.push_back(stat);
+    }
+
+    sort(stats_sorted.begin(),
+         stats_sorted.end(),
+         [](const pair<DeviceKernelMask, double> &a, const pair<DeviceKernelMask, double> &b) {
+           return a.second > b.second;
+         });
+
+    VLOG(3) << "GPU queue stats:";
+    for (const auto &[mask, time] : stats_sorted) {
+      VLOG(3) << "  " << std::setfill(' ') << std::setw(10) << std::fixed << std::setprecision(5)
+              << std::right << time << "s: " << device_kernel_mask_as_string(mask);
+    }
+  }
+}
+
+void DeviceQueue::debug_init_execution()
+{
+  if (VLOG_IS_ON(3)) {
+    last_sync_time_ = time_dt();
+  }
+
+  last_kernels_enqueued_ = 0;
+}
+
+void DeviceQueue::debug_enqueue(DeviceKernel kernel, const int work_size)
+{
+  if (VLOG_IS_ON(3)) {
+    VLOG(4) << "GPU queue launch " << device_kernel_as_string(kernel) << ", work_size "
+            << work_size;
+  }
+
+  last_kernels_enqueued_ |= (uint64_t(1) << (uint64_t)kernel);
+}
+
+void DeviceQueue::debug_synchronize()
+{
+  if (VLOG_IS_ON(3)) {
+    const double new_time = time_dt();
+    const double elapsed_time = new_time - last_sync_time_;
+    VLOG(4) << "GPU queue synchronize, elapsed " << std::setw(10) << elapsed_time << "s";
+
+    stats_kernel_time_[last_kernels_enqueued_] += elapsed_time;
+
+    last_sync_time_ = new_time;
+  }
+
+  last_kernels_enqueued_ = 0;
+}
+
+string DeviceQueue::debug_active_kernels()
+{
+  return device_kernel_mask_as_string(last_kernels_enqueued_);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/device/queue.h b/intern/cycles/device/queue.h
new file mode 100644
index 00000000000..188162f4b74
--- /dev/null
+++ b/intern/cycles/device/queue.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "device/kernel.h"
+
+#include "device/graphics_interop.h"
+#include "util/log.h"
+#include "util/map.h"
+#include "util/string.h"
+#include "util/unique_ptr.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class device_memory;
+
+struct KernelWorkTile;
+
+/* Abstraction of a command queue for a device.
+ * Provides API to schedule kernel execution in a specific queue with minimal possible overhead
+ * from driver side.
+ *
+ * This class encapsulates all properties needed for commands execution. */
+class DeviceQueue {
+ public:
+  virtual ~DeviceQueue();
+
+  /* Number of concurrent states to process for integrator,
+   * based on number of cores and/or available memory. */
+  virtual int num_concurrent_states(const size_t state_size) const = 0;
+
+  /* Number of states which keeps the device occupied with work without loosing performance.
+   * The renderer will add more work (when available) when number of active paths falls below this
+   * value. */
+  virtual int num_concurrent_busy_states() const = 0;
+
+  /* Initialize execution of kernels on this queue.
+   *
+   * Will, for example, load all data required by the kernels from Device to global or path state.
+   *
+   * Use this method after device synchronization has finished before enqueueing any kernels. */
+  virtual void init_execution() = 0;
+
+  /* Test if an optional device kernel is available. */
+  virtual bool kernel_available(DeviceKernel kernel) const = 0;
+
+  /* Enqueue kernel execution.
+   *
+   * Execute the kernel work_size times on the device.
+   * Supported arguments types:
+   * - int: pass pointer to the int
+   * - device memory: pass pointer to device_memory.device_pointer
+   * Return false if there was an error executing this or a previous kernel. */
+  virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) = 0;
+
+  /* Wait unit all enqueued kernels have finished execution.
+   * Return false if there was an error executing any of the enqueued kernels. */
+  virtual bool synchronize() = 0;
+
+  /* Copy memory to/from device as part of the command queue, to ensure
+   * operations are done in order without having to synchronize. */
+  virtual void zero_to_device(device_memory &mem) = 0;
+  virtual void copy_to_device(device_memory &mem) = 0;
+  virtual void copy_from_device(device_memory &mem) = 0;
+
+  /* Graphics resources interoperability.
+   *
+   * The interoperability comes here by the meaning that the device is capable of computing result
+   * directly into an OpenGL (or other graphics library) buffer. */
+
+  /* Create graphics interoperability context which will be taking care of mapping graphics
+   * resource as a buffer writable by kernels of this device. */
+  virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create()
+  {
+    LOG(FATAL) << "Request of GPU interop of a device which does not support it.";
+    return nullptr;
+  }
+
+  /* Device this queue has been created for. */
+  Device *device;
+
+ protected:
+  /* Hide construction so that allocation via `Device` API is enforced. */
+  explicit DeviceQueue(Device *device);
+
+  /* Implementations call these from the corresponding methods to generate debugging logs. */
+  void debug_init_execution();
+  void debug_enqueue(DeviceKernel kernel, const int work_size);
+  void debug_synchronize();
+  string debug_active_kernels();
+
+  /* Combination of kernels enqueued together sync last synchronize. */
+  DeviceKernelMask last_kernels_enqueued_;
+  /* Time of synchronize call. */
+  double last_sync_time_;
+  /* Accumulated execution time for combinations of kernels launched together. */
+  map<DeviceKernelMask, double> stats_kernel_time_;
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/graph/node.cpp b/intern/cycles/graph/node.cpp
index 8294e716ebe..a3d75e53afd 100644
--- a/intern/cycles/graph/node.cpp
+++ b/intern/cycles/graph/node.cpp
@@ -17,10 +17,10 @@
 #include "graph/node.h"
 #include "graph/node_type.h"
 
-#include "util/util_foreach.h"
-#include "util/util_md5.h"
-#include "util/util_param.h"
-#include "util/util_transform.h"
+#include "util/foreach.h"
+#include "util/md5.h"
+#include "util/param.h"
+#include "util/transform.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/graph/node.h b/intern/cycles/graph/node.h
index 8f27a82d37b..a00162a3b9a 100644
--- a/intern/cycles/graph/node.h
+++ b/intern/cycles/graph/node.h
@@ -20,9 +20,9 @@
 
 #include "graph/node_type.h"
 
-#include "util/util_array.h"
-#include "util/util_map.h"
-#include "util/util_param.h"
+#include "util/array.h"
+#include "util/map.h"
+#include "util/param.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/graph/node_enum.h b/intern/cycles/graph/node_enum.h
index d3ed0928a4f..831c6e4a9c4 100644
--- a/intern/cycles/graph/node_enum.h
+++ b/intern/cycles/graph/node_enum.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_map.h"
-#include "util/util_param.h"
+#include "util/map.h"
+#include "util/param.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/graph/node_type.cpp b/intern/cycles/graph/node_type.cpp
index 4efbd6725ee..bce98c694e1 100644
--- a/intern/cycles/graph/node_type.cpp
+++ b/intern/cycles/graph/node_type.cpp
@@ -15,8 +15,8 @@
  */
 
 #include "graph/node_type.h"
-#include "util/util_foreach.h"
-#include "util/util_transform.h"
+#include "util/foreach.h"
+#include "util/transform.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/graph/node_type.h b/intern/cycles/graph/node_type.h
index 8b37398fa17..71639341617 100644
--- a/intern/cycles/graph/node_type.h
+++ b/intern/cycles/graph/node_type.h
@@ -17,11 +17,11 @@
 #pragma once
 
 #include "graph/node_enum.h"
-#include "util/util_array.h"
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/array.h"
+#include "util/map.h"
+#include "util/param.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/graph/node_xml.cpp b/intern/cycles/graph/node_xml.cpp
index 43462662b6a..b0c863ad4b5 100644
--- a/intern/cycles/graph/node_xml.cpp
+++ b/intern/cycles/graph/node_xml.cpp
@@ -16,9 +16,9 @@
 
 #include "graph/node_xml.h"
 
-#include "util/util_foreach.h"
-#include "util/util_string.h"
-#include "util/util_transform.h"
+#include "util/foreach.h"
+#include "util/string.h"
+#include "util/transform.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/graph/node_xml.h b/intern/cycles/graph/node_xml.h
index 15bbf5d5621..ddbc5213ab1 100644
--- a/intern/cycles/graph/node_xml.h
+++ b/intern/cycles/graph/node_xml.h
@@ -18,9 +18,9 @@
 
 #include "graph/node.h"
 
-#include "util/util_map.h"
-#include "util/util_string.h"
-#include "util/util_xml.h"
+#include "util/map.h"
+#include "util/string.h"
+#include "util/xml.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/adaptive_sampling.cpp b/intern/cycles/integrator/adaptive_sampling.cpp
index 23fbcfea5c2..253879d67e3 100644
--- a/intern/cycles/integrator/adaptive_sampling.cpp
+++ b/intern/cycles/integrator/adaptive_sampling.cpp
@@ -16,7 +16,7 @@
 
 #include "integrator/adaptive_sampling.h"
 
-#include "util/util_math.h"
+#include "util/math.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp
index 45f967b38eb..b6ca96faebf 100644
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@@ -20,8 +20,8 @@
 #include "integrator/denoiser_oidn.h"
 #include "integrator/denoiser_optix.h"
 #include "session/buffers.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
+#include "util/log.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/denoiser.h b/intern/cycles/integrator/denoiser.h
index b02bcbeb046..8d7a644f8d9 100644
--- a/intern/cycles/integrator/denoiser.h
+++ b/intern/cycles/integrator/denoiser.h
@@ -19,10 +19,10 @@
 /* TODO(sergey): The integrator folder might not be the best. Is easy to move files around if the
  * better place is figured out. */
 
+#include "device/denoise.h"
 #include "device/device.h"
-#include "device/device_denoise.h"
-#include "util/util_function.h"
-#include "util/util_unique_ptr.h"
+#include "util/function.h"
+#include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/denoiser_device.cpp b/intern/cycles/integrator/denoiser_device.cpp
index 1afd8d46866..2121dee500a 100644
--- a/intern/cycles/integrator/denoiser_device.cpp
+++ b/intern/cycles/integrator/denoiser_device.cpp
@@ -16,13 +16,13 @@
 
 #include "integrator/denoiser_device.h"
 
+#include "device/denoise.h"
 #include "device/device.h"
-#include "device/device_denoise.h"
-#include "device/device_memory.h"
-#include "device/device_queue.h"
+#include "device/memory.h"
+#include "device/queue.h"
 #include "session/buffers.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
+#include "util/log.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/denoiser_device.h b/intern/cycles/integrator/denoiser_device.h
index 0fd934dba79..2bacecaa2a2 100644
--- a/intern/cycles/integrator/denoiser_device.h
+++ b/intern/cycles/integrator/denoiser_device.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include "integrator/denoiser.h"
-#include "util/util_unique_ptr.h"
+#include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index c3555008699..b09b95a11b0 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -19,12 +19,12 @@
 #include <array>
 
 #include "device/device.h"
-#include "device/device_queue.h"
+#include "device/queue.h"
 #include "integrator/pass_accessor_cpu.h"
 #include "session/buffers.h"
-#include "util/util_array.h"
-#include "util/util_logging.h"
-#include "util/util_openimagedenoise.h"
+#include "util/array.h"
+#include "util/log.h"
+#include "util/openimagedenoise.h"
 
 #include "kernel/device/cpu/compat.h"
 #include "kernel/device/cpu/kernel.h"
diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h
index 566e761ae79..a0ec3e26b9c 100644
--- a/intern/cycles/integrator/denoiser_oidn.h
+++ b/intern/cycles/integrator/denoiser_oidn.h
@@ -17,8 +17,8 @@
 #pragma once
 
 #include "integrator/denoiser.h"
-#include "util/util_thread.h"
-#include "util/util_unique_ptr.h"
+#include "util/thread.h"
+#include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/denoiser_optix.cpp b/intern/cycles/integrator/denoiser_optix.cpp
index 5f9de23bfe6..ebd95d62ae4 100644
--- a/intern/cycles/integrator/denoiser_optix.cpp
+++ b/intern/cycles/integrator/denoiser_optix.cpp
@@ -16,8 +16,8 @@
 
 #include "integrator/denoiser_optix.h"
 
+#include "device/denoise.h"
 #include "device/device.h"
-#include "device/device_denoise.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/pass_accessor.cpp b/intern/cycles/integrator/pass_accessor.cpp
index 0a8c445eca7..7e19de51daa 100644
--- a/intern/cycles/integrator/pass_accessor.cpp
+++ b/intern/cycles/integrator/pass_accessor.cpp
@@ -17,11 +17,11 @@
 #include "integrator/pass_accessor.h"
 
 #include "session/buffers.h"
-#include "util/util_logging.h"
+#include "util/log.h"
 
 // clang-format off
 #include "kernel/device/cpu/compat.h"
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 // clang-format on
 
 CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/integrator/pass_accessor.h b/intern/cycles/integrator/pass_accessor.h
index c9cf3ba8956..09eae0156c9 100644
--- a/intern/cycles/integrator/pass_accessor.h
+++ b/intern/cycles/integrator/pass_accessor.h
@@ -17,9 +17,9 @@
 #pragma once
 
 #include "scene/pass.h"
-#include "util/util_half.h"
-#include "util/util_string.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/string.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp
index f3ca38c667d..820da757be0 100644
--- a/intern/cycles/integrator/pass_accessor_cpu.cpp
+++ b/intern/cycles/integrator/pass_accessor_cpu.cpp
@@ -17,14 +17,14 @@
 #include "integrator/pass_accessor_cpu.h"
 
 #include "session/buffers.h"
-#include "util/util_logging.h"
-#include "util/util_tbb.h"
+#include "util/log.h"
+#include "util/tbb.h"
 
 // clang-format off
 #include "kernel/device/cpu/compat.h"
 #include "kernel/device/cpu/globals.h"
-#include "kernel/kernel_types.h"
-#include "kernel/film/film_read.h"
+#include "kernel/types.h"
+#include "kernel/film/read.h"
 // clang-format on
 
 CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/integrator/pass_accessor_gpu.cpp b/intern/cycles/integrator/pass_accessor_gpu.cpp
index 3b4290a89e6..c03ef64a2b2 100644
--- a/intern/cycles/integrator/pass_accessor_gpu.cpp
+++ b/intern/cycles/integrator/pass_accessor_gpu.cpp
@@ -16,9 +16,9 @@
 
 #include "integrator/pass_accessor_gpu.h"
 
-#include "device/device_queue.h"
+#include "device/queue.h"
 #include "session/buffers.h"
-#include "util/util_logging.h"
+#include "util/log.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/pass_accessor_gpu.h b/intern/cycles/integrator/pass_accessor_gpu.h
index bc37e4387f3..f3442d90013 100644
--- a/intern/cycles/integrator/pass_accessor_gpu.h
+++ b/intern/cycles/integrator/pass_accessor_gpu.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include "integrator/pass_accessor.h"
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index 94687f7bc95..daf270d6686 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -25,11 +25,11 @@
 #include "scene/pass.h"
 #include "scene/scene.h"
 #include "session/tile.h"
-#include "util/util_algorithm.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_tbb.h"
-#include "util/util_time.h"
+#include "util/algorithm.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/tbb.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h
index 89fa5fa8eaf..9b079352a63 100644
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -21,10 +21,10 @@
 #include "integrator/path_trace_work.h"
 #include "integrator/work_balancer.h"
 #include "session/buffers.h"
-#include "util/util_function.h"
-#include "util/util_thread.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
+#include "util/function.h"
+#include "util/thread.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_display.cpp b/intern/cycles/integrator/path_trace_display.cpp
index 7455a107ae6..c1cade923b1 100644
--- a/intern/cycles/integrator/path_trace_display.cpp
+++ b/intern/cycles/integrator/path_trace_display.cpp
@@ -18,7 +18,7 @@
 
 #include "session/buffers.h"
 
-#include "util/util_logging.h"
+#include "util/log.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_display.h b/intern/cycles/integrator/path_trace_display.h
index d40e45e7ead..b69ee85fbbc 100644
--- a/intern/cycles/integrator/path_trace_display.h
+++ b/intern/cycles/integrator/path_trace_display.h
@@ -18,10 +18,10 @@
 
 #include "session/display_driver.h"
 
-#include "util/util_half.h"
-#include "util/util_thread.h"
-#include "util/util_types.h"
-#include "util/util_unique_ptr.h"
+#include "util/half.h"
+#include "util/thread.h"
+#include "util/types.h"
+#include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_work.cpp b/intern/cycles/integrator/path_trace_work.cpp
index e08c410a579..b0c40cfe15c 100644
--- a/intern/cycles/integrator/path_trace_work.cpp
+++ b/intern/cycles/integrator/path_trace_work.cpp
@@ -24,7 +24,7 @@
 #include "scene/scene.h"
 #include "session/buffers.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_work.h b/intern/cycles/integrator/path_trace_work.h
index 6b2a6c71e81..0dc7cd2f896 100644
--- a/intern/cycles/integrator/path_trace_work.h
+++ b/intern/cycles/integrator/path_trace_work.h
@@ -19,8 +19,8 @@
 #include "integrator/pass_accessor.h"
 #include "scene/pass.h"
 #include "session/buffers.h"
-#include "util/util_types.h"
-#include "util/util_unique_ptr.h"
+#include "util/types.h"
+#include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp
index 1e59682a64c..541a7eca02f 100644
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -19,7 +19,7 @@
 #include "device/cpu/kernel.h"
 #include "device/device.h"
 
-#include "kernel/integrator/integrator_path_state.h"
+#include "kernel/integrator/path_state.h"
 
 #include "integrator/pass_accessor_cpu.h"
 #include "integrator/path_trace_display.h"
@@ -27,9 +27,9 @@
 #include "scene/scene.h"
 #include "session/buffers.h"
 
-#include "util/util_atomic.h"
-#include "util/util_logging.h"
-#include "util/util_tbb.h"
+#include "util/atomic.h"
+#include "util/log.h"
+#include "util/tbb.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_work_cpu.h b/intern/cycles/integrator/path_trace_work_cpu.h
index 91c024f4e4a..6e734690811 100644
--- a/intern/cycles/integrator/path_trace_work_cpu.h
+++ b/intern/cycles/integrator/path_trace_work_cpu.h
@@ -16,14 +16,14 @@
 
 #pragma once
 
-#include "kernel/integrator/integrator_state.h"
+#include "kernel/integrator/state.h"
 
 #include "device/cpu/kernel_thread_globals.h"
-#include "device/device_queue.h"
+#include "device/queue.h"
 
 #include "integrator/path_trace_work.h"
 
-#include "util/util_vector.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 67e5ae70316..b7dc4e5d181 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -22,12 +22,12 @@
 #include "integrator/pass_accessor_gpu.h"
 #include "scene/scene.h"
 #include "session/buffers.h"
-#include "util/util_logging.h"
-#include "util/util_string.h"
-#include "util/util_tbb.h"
-#include "util/util_time.h"
+#include "util/log.h"
+#include "util/string.h"
+#include "util/tbb.h"
+#include "util/time.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -53,9 +53,9 @@ static size_t estimate_single_state_size()
  * rely on this. */
 #define KERNEL_STRUCT_VOLUME_STACK_SIZE 4
 
-#include "kernel/integrator/integrator_state_template.h"
+#include "kernel/integrator/state_template.h"
 
-#include "kernel/integrator/integrator_shadow_state_template.h"
+#include "kernel/integrator/shadow_state_template.h"
 
 #undef KERNEL_STRUCT_BEGIN
 #undef KERNEL_STRUCT_MEMBER
@@ -146,9 +146,9 @@ void PathTraceWorkGPU::alloc_integrator_soa()
   }
 #define KERNEL_STRUCT_VOLUME_STACK_SIZE (integrator_state_soa_volume_stack_size_)
 
-#include "kernel/integrator/integrator_state_template.h"
+#include "kernel/integrator/state_template.h"
 
-#include "kernel/integrator/integrator_shadow_state_template.h"
+#include "kernel/integrator/shadow_state_template.h"
 
 #undef KERNEL_STRUCT_BEGIN
 #undef KERNEL_STRUCT_MEMBER
diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h
index 8734d2c2852..c5e291e72db 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -16,16 +16,16 @@
 
 #pragma once
 
-#include "kernel/integrator/integrator_state.h"
+#include "kernel/integrator/state.h"
 
-#include "device/device_graphics_interop.h"
-#include "device/device_memory.h"
-#include "device/device_queue.h"
+#include "device/graphics_interop.h"
+#include "device/memory.h"
+#include "device/queue.h"
 
 #include "integrator/path_trace_work.h"
 #include "integrator/work_tile_scheduler.h"
 
-#include "util/util_vector.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/render_scheduler.cpp b/intern/cycles/integrator/render_scheduler.cpp
index 3c64cba5feb..f776d01ef67 100644
--- a/intern/cycles/integrator/render_scheduler.cpp
+++ b/intern/cycles/integrator/render_scheduler.cpp
@@ -18,9 +18,9 @@
 
 #include "session/session.h"
 #include "session/tile.h"
-#include "util/util_logging.h"
-#include "util/util_math.h"
-#include "util/util_time.h"
+#include "util/log.h"
+#include "util/math.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/render_scheduler.h b/intern/cycles/integrator/render_scheduler.h
index a2c1e75d3b6..d7b7413ae31 100644
--- a/intern/cycles/integrator/render_scheduler.h
+++ b/intern/cycles/integrator/render_scheduler.h
@@ -19,7 +19,7 @@
 #include "integrator/adaptive_sampling.h"
 #include "integrator/denoiser.h" /* For DenoiseParams. */
 #include "session/buffers.h"
-#include "util/util_string.h"
+#include "util/string.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp
index 3de7bb6fd16..42cbf87f254 100644
--- a/intern/cycles/integrator/shader_eval.cpp
+++ b/intern/cycles/integrator/shader_eval.cpp
@@ -17,14 +17,14 @@
 #include "integrator/shader_eval.h"
 
 #include "device/device.h"
-#include "device/device_queue.h"
+#include "device/queue.h"
 
 #include "device/cpu/kernel.h"
 #include "device/cpu/kernel_thread_globals.h"
 
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_tbb.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/tbb.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/shader_eval.h b/intern/cycles/integrator/shader_eval.h
index 43b6b1bdd47..3ae63b84d04 100644
--- a/intern/cycles/integrator/shader_eval.h
+++ b/intern/cycles/integrator/shader_eval.h
@@ -16,11 +16,11 @@
 
 #pragma once
 
-#include "device/device_memory.h"
+#include "device/memory.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "util/util_function.h"
+#include "util/function.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/tile.cpp b/intern/cycles/integrator/tile.cpp
index 3387b7bedf1..7ea73451d80 100644
--- a/intern/cycles/integrator/tile.cpp
+++ b/intern/cycles/integrator/tile.cpp
@@ -16,8 +16,8 @@
 
 #include "integrator/tile.h"
 
-#include "util/util_logging.h"
-#include "util/util_math.h"
+#include "util/log.h"
+#include "util/math.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/tile.h b/intern/cycles/integrator/tile.h
index d0824843ddb..879c68b875c 100644
--- a/intern/cycles/integrator/tile.h
+++ b/intern/cycles/integrator/tile.h
@@ -18,7 +18,7 @@
 
 #include <ostream>
 
-#include "util/util_types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/work_balancer.cpp b/intern/cycles/integrator/work_balancer.cpp
index 9f96fe3632b..4c6fa341f35 100644
--- a/intern/cycles/integrator/work_balancer.cpp
+++ b/intern/cycles/integrator/work_balancer.cpp
@@ -16,9 +16,9 @@
 
 #include "integrator/work_balancer.h"
 
-#include "util/util_math.h"
+#include "util/math.h"
 
-#include "util/util_logging.h"
+#include "util/log.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/work_balancer.h b/intern/cycles/integrator/work_balancer.h
index fc5e561845e..86ff9335f91 100644
--- a/intern/cycles/integrator/work_balancer.h
+++ b/intern/cycles/integrator/work_balancer.h
@@ -16,7 +16,7 @@
 
 #pragma once
 
-#include "util/util_vector.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/work_tile_scheduler.cpp b/intern/cycles/integrator/work_tile_scheduler.cpp
index a6775f27b65..c874dffde91 100644
--- a/intern/cycles/integrator/work_tile_scheduler.cpp
+++ b/intern/cycles/integrator/work_tile_scheduler.cpp
@@ -16,11 +16,11 @@
 
 #include "integrator/work_tile_scheduler.h"
 
-#include "device/device_queue.h"
+#include "device/queue.h"
 #include "integrator/tile.h"
 #include "session/buffers.h"
-#include "util/util_atomic.h"
-#include "util/util_logging.h"
+#include "util/atomic.h"
+#include "util/log.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/integrator/work_tile_scheduler.h b/intern/cycles/integrator/work_tile_scheduler.h
index 85f11b601c7..155bba5cb68 100644
--- a/intern/cycles/integrator/work_tile_scheduler.h
+++ b/intern/cycles/integrator/work_tile_scheduler.h
@@ -17,7 +17,7 @@
 #pragma once
 
 #include "integrator/tile.h"
-#include "util/util_types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f27bcb41d3d..6d57eff3d22 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -47,41 +47,41 @@ set(SRC_DEVICE_OPTIX
 set(SRC_HEADERS
   bake/bake.h
   bvh/bvh.h
-  bvh/bvh_nodes.h
-  bvh/bvh_shadow_all.h
-  bvh/bvh_local.h
-  bvh/bvh_traversal.h
-  bvh/bvh_types.h
-  bvh/bvh_util.h
-  bvh/bvh_volume.h
-  bvh/bvh_volume_all.h
-  bvh/bvh_embree.h
+  bvh/nodes.h
+  bvh/shadow_all.h
+  bvh/local.h
+  bvh/traversal.h
+  bvh/types.h
+  bvh/util.h
+  bvh/volume.h
+  bvh/volume_all.h
+  bvh/embree.h
   camera/camera.h
-  camera/camera_projection.h
-  film/film_accumulate.h
-  film/film_adaptive_sampling.h
-  film/film_id_passes.h
-  film/film_passes.h
-  film/film_read.h
-  film/film_write_passes.h
-  integrator/integrator_path_state.h
-  integrator/integrator_shader_eval.h
-  integrator/integrator_shadow_catcher.h
-  kernel_textures.h
-  kernel_types.h
+  camera/projection.h
+  film/accumulate.h
+  film/adaptive_sampling.h
+  film/id_passes.h
+  film/passes.h
+  film/read.h
+  film/write_passes.h
+  integrator/path_state.h
+  integrator/shader_eval.h
+  integrator/shadow_catcher.h
+  textures.h
+  types.h
   light/light.h
-  light/light_background.h
-  light/light_common.h
-  light/light_sample.h
-  sample/sample_jitter.h
-  sample/sample_lcg.h
-  sample/sample_mapping.h
-  sample/sample_mis.h
-  sample/sample_pattern.h
-  util/util_color.h
-  util/util_differential.h
-  util/util_lookup_table.h
-  util/util_profiling.h
+  light/background.h
+  light/common.h
+  light/sample.h
+  sample/jitter.h
+  sample/lcg.h
+  sample/mapping.h
+  sample/mis.h
+  sample/pattern.h
+  util/color.h
+  util/differential.h
+  util/lookup_table.h
+  util/profiling.h
 )
 
 set(SRC_DEVICE_CPU_HEADERS
@@ -147,155 +147,155 @@ set(SRC_CLOSURE_HEADERS
 
 set(SRC_SVM_HEADERS
   svm/svm.h
-  svm/svm_ao.h
-  svm/svm_aov.h
-  svm/svm_attribute.h
-  svm/svm_bevel.h
-  svm/svm_blackbody.h
-  svm/svm_bump.h
-  svm/svm_camera.h
-  svm/svm_clamp.h
-  svm/svm_closure.h
-  svm/svm_convert.h
-  svm/svm_checker.h
-  svm/svm_color_util.h
-  svm/svm_brick.h
-  svm/svm_displace.h
-  svm/svm_fresnel.h
-  svm/svm_wireframe.h
-  svm/svm_wavelength.h
-  svm/svm_gamma.h
-  svm/svm_brightness.h
-  svm/svm_geometry.h
-  svm/svm_gradient.h
-  svm/svm_hsv.h
-  svm/svm_ies.h
-  svm/svm_image.h
-  svm/svm_invert.h
-  svm/svm_light_path.h
-  svm/svm_magic.h
-  svm/svm_map_range.h
-  svm/svm_mapping.h
-  svm/svm_mapping_util.h
-  svm/svm_math.h
-  svm/svm_math_util.h
-  svm/svm_mix.h
-  svm/svm_musgrave.h
-  svm/svm_noise.h
-  svm/svm_noisetex.h
-  svm/svm_normal.h
-  svm/svm_ramp.h
-  svm/svm_ramp_util.h
-  svm/svm_sepcomb_hsv.h
-  svm/svm_sepcomb_vector.h
-  svm/svm_sky.h
-  svm/svm_tex_coord.h
-  svm/svm_fractal_noise.h
-  svm/svm_types.h
-  svm/svm_value.h
-  svm/svm_vector_rotate.h
-  svm/svm_vector_transform.h
-  svm/svm_voronoi.h
-  svm/svm_voxel.h
-  svm/svm_wave.h
-  svm/svm_white_noise.h
-  svm/svm_vertex_color.h
+  svm/ao.h
+  svm/aov.h
+  svm/attribute.h
+  svm/bevel.h
+  svm/blackbody.h
+  svm/bump.h
+  svm/camera.h
+  svm/clamp.h
+  svm/closure.h
+  svm/convert.h
+  svm/checker.h
+  svm/color_util.h
+  svm/brick.h
+  svm/displace.h
+  svm/fresnel.h
+  svm/wireframe.h
+  svm/wavelength.h
+  svm/gamma.h
+  svm/brightness.h
+  svm/geometry.h
+  svm/gradient.h
+  svm/hsv.h
+  svm/ies.h
+  svm/image.h
+  svm/invert.h
+  svm/light_path.h
+  svm/magic.h
+  svm/map_range.h
+  svm/mapping.h
+  svm/mapping_util.h
+  svm/math.h
+  svm/math_util.h
+  svm/mix.h
+  svm/musgrave.h
+  svm/noise.h
+  svm/noisetex.h
+  svm/normal.h
+  svm/ramp.h
+  svm/ramp_util.h
+  svm/sepcomb_hsv.h
+  svm/sepcomb_vector.h
+  svm/sky.h
+  svm/tex_coord.h
+  svm/fractal_noise.h
+  svm/types.h
+  svm/value.h
+  svm/vector_rotate.h
+  svm/vector_transform.h
+  svm/voronoi.h
+  svm/voxel.h
+  svm/wave.h
+  svm/white_noise.h
+  svm/vertex_color.h
 )
 
 set(SRC_GEOM_HEADERS
   geom/geom.h
-  geom/geom_attribute.h
-  geom/geom_curve.h
-  geom/geom_curve_intersect.h
-  geom/geom_motion_curve.h
-  geom/geom_motion_triangle.h
-  geom/geom_motion_triangle_intersect.h
-  geom/geom_motion_triangle_shader.h
-  geom/geom_object.h
-  geom/geom_patch.h
-  geom/geom_primitive.h
-  geom/geom_shader_data.h
-  geom/geom_subd_triangle.h
-  geom/geom_triangle.h
-  geom/geom_triangle_intersect.h
-  geom/geom_volume.h
+  geom/attribute.h
+  geom/curve.h
+  geom/curve_intersect.h
+  geom/motion_curve.h
+  geom/motion_triangle.h
+  geom/motion_triangle_intersect.h
+  geom/motion_triangle_shader.h
+  geom/object.h
+  geom/patch.h
+  geom/primitive.h
+  geom/shader_data.h
+  geom/subd_triangle.h
+  geom/triangle.h
+  geom/triangle_intersect.h
+  geom/volume.h
 )
 
 set(SRC_INTEGRATOR_HEADERS
-  integrator/integrator_init_from_bake.h
-  integrator/integrator_init_from_camera.h
-  integrator/integrator_intersect_closest.h
-  integrator/integrator_intersect_shadow.h
-  integrator/integrator_intersect_subsurface.h
-  integrator/integrator_intersect_volume_stack.h
-  integrator/integrator_megakernel.h
-  integrator/integrator_shade_background.h
-  integrator/integrator_shade_light.h
-  integrator/integrator_shade_shadow.h
-  integrator/integrator_shade_surface.h
-  integrator/integrator_shade_volume.h
-  integrator/integrator_shadow_state_template.h
-  integrator/integrator_state.h
-  integrator/integrator_state_flow.h
-  integrator/integrator_state_template.h
-  integrator/integrator_state_util.h
-  integrator/integrator_subsurface.h
-  integrator/integrator_subsurface_disk.h
-  integrator/integrator_subsurface_random_walk.h
-  integrator/integrator_volume_stack.h
+  integrator/init_from_bake.h
+  integrator/init_from_camera.h
+  integrator/intersect_closest.h
+  integrator/intersect_shadow.h
+  integrator/intersect_subsurface.h
+  integrator/intersect_volume_stack.h
+  integrator/megakernel.h
+  integrator/shade_background.h
+  integrator/shade_light.h
+  integrator/shade_shadow.h
+  integrator/shade_surface.h
+  integrator/shade_volume.h
+  integrator/shadow_state_template.h
+  integrator/state.h
+  integrator/state_flow.h
+  integrator/state_template.h
+  integrator/state_util.h
+  integrator/subsurface.h
+  integrator/subsurface_disk.h
+  integrator/subsurface_random_walk.h
+  integrator/volume_stack.h
 )
 
 set(SRC_UTIL_HEADERS
-  ../util/util_atomic.h
-  ../util/util_color.h
-  ../util/util_defines.h
-  ../util/util_half.h
-  ../util/util_hash.h
-  ../util/util_math.h
-  ../util/util_math_fast.h
-  ../util/util_math_intersect.h
-  ../util/util_math_float2.h
-  ../util/util_math_float3.h
-  ../util/util_math_float4.h
-  ../util/util_math_int2.h
-  ../util/util_math_int3.h
-  ../util/util_math_int4.h
-  ../util/util_math_matrix.h
-  ../util/util_projection.h
-  ../util/util_rect.h
-  ../util/util_static_assert.h
-  ../util/util_transform.h
-  ../util/util_texture.h
-  ../util/util_types.h
-  ../util/util_types_float2.h
-  ../util/util_types_float2_impl.h
-  ../util/util_types_float3.h
-  ../util/util_types_float3_impl.h
-  ../util/util_types_float4.h
-  ../util/util_types_float4_impl.h
-  ../util/util_types_float8.h
-  ../util/util_types_float8_impl.h
-  ../util/util_types_int2.h
-  ../util/util_types_int2_impl.h
-  ../util/util_types_int3.h
-  ../util/util_types_int3_impl.h
-  ../util/util_types_int4.h
-  ../util/util_types_int4_impl.h
-  ../util/util_types_uchar2.h
-  ../util/util_types_uchar2_impl.h
-  ../util/util_types_uchar3.h
-  ../util/util_types_uchar3_impl.h
-  ../util/util_types_uchar4.h
-  ../util/util_types_uchar4_impl.h
-  ../util/util_types_uint2.h
-  ../util/util_types_uint2_impl.h
-  ../util/util_types_uint3.h
-  ../util/util_types_uint3_impl.h
-  ../util/util_types_uint4.h
-  ../util/util_types_uint4_impl.h
-  ../util/util_types_ushort4.h
-  ../util/util_types_vector3.h
-  ../util/util_types_vector3_impl.h
+  ../util/atomic.h
+  ../util/color.h
+  ../util/defines.h
+  ../util/half.h
+  ../util/hash.h
+  ../util/math.h
+  ../util/math_fast.h
+  ../util/math_intersect.h
+  ../util/math_float2.h
+  ../util/math_float3.h
+  ../util/math_float4.h
+  ../util/math_int2.h
+  ../util/math_int3.h
+  ../util/math_int4.h
+  ../util/math_matrix.h
+  ../util/projection.h
+  ../util/rect.h
+  ../util/static_assert.h
+  ../util/transform.h
+  ../util/texture.h
+  ../util/types.h
+  ../util/types_float2.h
+  ../util/types_float2_impl.h
+  ../util/types_float3.h
+  ../util/types_float3_impl.h
+  ../util/types_float4.h
+  ../util/types_float4_impl.h
+  ../util/types_float8.h
+  ../util/types_float8_impl.h
+  ../util/types_int2.h
+  ../util/types_int2_impl.h
+  ../util/types_int3.h
+  ../util/types_int3_impl.h
+  ../util/types_int4.h
+  ../util/types_int4_impl.h
+  ../util/types_uchar2.h
+  ../util/types_uchar2_impl.h
+  ../util/types_uchar3.h
+  ../util/types_uchar3_impl.h
+  ../util/types_uchar4.h
+  ../util/types_uchar4_impl.h
+  ../util/types_uint2.h
+  ../util/types_uint2_impl.h
+  ../util/types_uint3.h
+  ../util/types_uint3_impl.h
+  ../util/types_uint4.h
+  ../util/types_uint4_impl.h
+  ../util/types_ushort4.h
+  ../util/types_vector3.h
+  ../util/types_vector3_impl.h
 )
 
 set(LIB
diff --git a/intern/cycles/kernel/bake/bake.h b/intern/cycles/kernel/bake/bake.h
index e234d56bd3c..0a78a635d75 100644
--- a/intern/cycles/kernel/bake/bake.h
+++ b/intern/cycles/kernel/bake/bake.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "kernel/camera/camera_projection.h"
-#include "kernel/integrator/integrator_shader_eval.h"
+#include "kernel/camera/projection.h"
+#include "kernel/integrator/shader_eval.h"
 
 #include "kernel/geom/geom.h"
 
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 813ac15711e..0e083812355 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -28,13 +28,13 @@
 #pragma once
 
 #ifdef __EMBREE__
-#  include "kernel/bvh/bvh_embree.h"
+#  include "kernel/bvh/embree.h"
 #endif
 
-#include "kernel/bvh/bvh_types.h"
-#include "kernel/bvh/bvh_util.h"
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
 
-#include "kernel/integrator/integrator_state_util.h"
+#include "kernel/integrator/state_util.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -42,28 +42,28 @@ CCL_NAMESPACE_BEGIN
 
 /* Regular BVH traversal */
 
-#  include "kernel/bvh/bvh_nodes.h"
+#  include "kernel/bvh/nodes.h"
 
 #  define BVH_FUNCTION_NAME bvh_intersect
 #  define BVH_FUNCTION_FEATURES 0
-#  include "kernel/bvh/bvh_traversal.h"
+#  include "kernel/bvh/traversal.h"
 
 #  if defined(__HAIR__)
 #    define BVH_FUNCTION_NAME bvh_intersect_hair
 #    define BVH_FUNCTION_FEATURES BVH_HAIR
-#    include "kernel/bvh/bvh_traversal.h"
+#    include "kernel/bvh/traversal.h"
 #  endif
 
 #  if defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_motion
 #    define BVH_FUNCTION_FEATURES BVH_MOTION
-#    include "kernel/bvh/bvh_traversal.h"
+#    include "kernel/bvh/traversal.h"
 #  endif
 
 #  if defined(__HAIR__) && defined(__OBJECT_MOTION__)
 #    define BVH_FUNCTION_NAME bvh_intersect_hair_motion
 #    define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
-#    include "kernel/bvh/bvh_traversal.h"
+#    include "kernel/bvh/traversal.h"
 #  endif
 
 /* Subsurface scattering BVH traversal */
@@ -71,12 +71,12 @@ CCL_NAMESPACE_BEGIN
 #  if defined(__BVH_LOCAL__)
 #    define BVH_FUNCTION_NAME bvh_intersect_local
 #    define BVH_FUNCTION_FEATURES BVH_HAIR
-#    include "kernel/bvh/bvh_local.h"
+#    include "kernel/bvh/local.h"
 
 #    if defined(__OBJECT_MOTION__)
 #      define BVH_FUNCTION_NAME bvh_intersect_local_motion
 #      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-#      include "kernel/bvh/bvh_local.h"
+#      include "kernel/bvh/local.h"
 #    endif
 #  endif /* __BVH_LOCAL__ */
 
@@ -85,12 +85,12 @@ CCL_NAMESPACE_BEGIN
 #  if defined(__VOLUME__)
 #    define BVH_FUNCTION_NAME bvh_intersect_volume
 #    define BVH_FUNCTION_FEATURES BVH_HAIR
-#    include "kernel/bvh/bvh_volume.h"
+#    include "kernel/bvh/volume.h"
 
 #    if defined(__OBJECT_MOTION__)
 #      define BVH_FUNCTION_NAME bvh_intersect_volume_motion
 #      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-#      include "kernel/bvh/bvh_volume.h"
+#      include "kernel/bvh/volume.h"
 #    endif
 #  endif /* __VOLUME__ */
 
@@ -99,24 +99,24 @@ CCL_NAMESPACE_BEGIN
 #  if defined(__SHADOW_RECORD_ALL__)
 #    define BVH_FUNCTION_NAME bvh_intersect_shadow_all
 #    define BVH_FUNCTION_FEATURES 0
-#    include "kernel/bvh/bvh_shadow_all.h"
+#    include "kernel/bvh/shadow_all.h"
 
 #    if defined(__HAIR__)
 #      define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
 #      define BVH_FUNCTION_FEATURES BVH_HAIR
-#      include "kernel/bvh/bvh_shadow_all.h"
+#      include "kernel/bvh/shadow_all.h"
 #    endif
 
 #    if defined(__OBJECT_MOTION__)
 #      define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
 #      define BVH_FUNCTION_FEATURES BVH_MOTION
-#      include "kernel/bvh/bvh_shadow_all.h"
+#      include "kernel/bvh/shadow_all.h"
 #    endif
 
 #    if defined(__HAIR__) && defined(__OBJECT_MOTION__)
 #      define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
 #      define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION
-#      include "kernel/bvh/bvh_shadow_all.h"
+#      include "kernel/bvh/shadow_all.h"
 #    endif
 #  endif /* __SHADOW_RECORD_ALL__ */
 
@@ -125,12 +125,12 @@ CCL_NAMESPACE_BEGIN
 #  if defined(__VOLUME_RECORD_ALL__)
 #    define BVH_FUNCTION_NAME bvh_intersect_volume_all
 #    define BVH_FUNCTION_FEATURES BVH_HAIR
-#    include "kernel/bvh/bvh_volume_all.h"
+#    include "kernel/bvh/volume_all.h"
 
 #    if defined(__OBJECT_MOTION__)
 #      define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
 #      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-#      include "kernel/bvh/bvh_volume_all.h"
+#      include "kernel/bvh/volume_all.h"
 #    endif
 #  endif /* __VOLUME_RECORD_ALL__ */
 
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
deleted file mode 100644
index 321e0f28dae..00000000000
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2018, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <embree3/rtcore_ray.h>
-#include <embree3/rtcore_scene.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-struct CCLIntersectContext {
-  typedef enum {
-    RAY_REGULAR = 0,
-    RAY_SHADOW_ALL = 1,
-    RAY_LOCAL = 2,
-    RAY_SSS = 3,
-    RAY_VOLUME_ALL = 4,
-  } RayType;
-
-  KernelGlobals kg;
-  RayType type;
-
-  /* for shadow rays */
-  Intersection *isect_s;
-  uint max_hits;
-  uint num_hits;
-  uint num_recorded_hits;
-  float throughput;
-  float max_t;
-  bool opaque_hit;
-
-  /* for SSS Rays: */
-  LocalIntersection *local_isect;
-  int local_object_id;
-  uint *lcg_state;
-
-  CCLIntersectContext(KernelGlobals kg_, RayType type_)
-  {
-    kg = kg_;
-    type = type_;
-    max_hits = 1;
-    num_hits = 0;
-    num_recorded_hits = 0;
-    throughput = 1.0f;
-    max_t = FLT_MAX;
-    opaque_hit = false;
-    isect_s = NULL;
-    local_isect = NULL;
-    local_object_id = -1;
-    lcg_state = NULL;
-  }
-};
-
-class IntersectContext {
- public:
-  IntersectContext(CCLIntersectContext *ctx)
-  {
-    rtcInitIntersectContext(&context);
-    userRayExt = ctx;
-  }
-  RTCIntersectContext context;
-  CCLIntersectContext *userRayExt;
-};
-
-ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
-                                               RTCRay &rtc_ray,
-                                               const uint visibility)
-{
-  rtc_ray.org_x = ray.P.x;
-  rtc_ray.org_y = ray.P.y;
-  rtc_ray.org_z = ray.P.z;
-  rtc_ray.dir_x = ray.D.x;
-  rtc_ray.dir_y = ray.D.y;
-  rtc_ray.dir_z = ray.D.z;
-  rtc_ray.tnear = 0.0f;
-  rtc_ray.tfar = ray.t;
-  rtc_ray.time = ray.time;
-  rtc_ray.mask = visibility;
-}
-
-ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
-                                                  RTCRayHit &rayhit,
-                                                  const uint visibility)
-{
-  kernel_embree_setup_ray(ray, rayhit.ray, visibility);
-  rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
-  rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
-}
-
-ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
-                                                 const RTCRay *ray,
-                                                 const RTCHit *hit,
-                                                 Intersection *isect)
-{
-  isect->t = ray->tfar;
-  if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
-    RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-        rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
-    isect->prim = hit->primID +
-                  (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
-    isect->object = hit->instID[0] / 2;
-  }
-  else {
-    isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
-                                    rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
-    isect->object = hit->geomID / 2;
-  }
-
-  const bool is_hair = hit->geomID & 1;
-  if (is_hair) {
-    const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim);
-    isect->type = segment.type;
-    isect->prim = segment.prim;
-    isect->u = hit->u;
-    isect->v = hit->v;
-  }
-  else {
-    isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type;
-    isect->u = 1.0f - hit->v - hit->u;
-    isect->v = hit->u;
-  }
-}
-
-ccl_device_inline void kernel_embree_convert_sss_hit(
-    KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
-{
-  isect->u = 1.0f - hit->v - hit->u;
-  isect->v = hit->u;
-  isect->t = ray->tfar;
-  RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
-      rtcGetGeometry(kernel_data.bvh.scene, object * 2));
-  isect->prim = hit->primID +
-                (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
-  isect->object = object;
-  isect->type = kernel_tex_fetch(__objects, object).primitive_type;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
deleted file mode 100644
index 79cde69699e..00000000000
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2013, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT bvh_node_intersect
-#else
-#  define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function for finding local intersections
- * around the shading point, for subsurface scattering and bevel. We disable
- * various features for performance, and for instanced objects avoid traversing
- * other parts of the scene.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device
-#else
-ccl_device_inline
-#endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
-                                     ccl_private const Ray *ray,
-                                     ccl_private LocalIntersection *local_isect,
-                                     int local_object,
-                                     ccl_private uint *lcg_state,
-                                     int max_hits)
-{
-  /* todo:
-   * - test if pushing distance on the stack helps (for non shadow rays)
-   * - separate version for shadow rays
-   * - likely and unlikely for if() statements
-   * - test restrict attribute for pointers
-   */
-
-  /* traversal stack in CUDA thread-local memory */
-  int traversal_stack[BVH_STACK_SIZE];
-  traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-  /* traversal variables in registers */
-  int stack_ptr = 0;
-  int node_addr = kernel_tex_fetch(__object_node, local_object);
-
-  /* ray parameters in registers */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = ray->t;
-
-  if (local_isect != NULL) {
-    local_isect->num_hits = 0;
-  }
-  kernel_assert((local_isect == NULL) == (max_hits == 0));
-
-  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
-    Transform ob_itfm;
-    isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
-#else
-    isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
-#endif
-    object = local_object;
-  }
-
-  /* traversal loop */
-  do {
-    do {
-      /* traverse internal nodes */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        int node_addr_child1, traverse_mask;
-        float dist[2];
-        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-#if BVH_FEATURE(BVH_HAIR)
-                                       dir,
-#endif
-                                       idir,
-                                       isect_t,
-                                       node_addr,
-                                       PATH_RAY_ALL_VISIBILITY,
-                                       dist);
-
-        node_addr = __float_as_int(cnodes.z);
-        node_addr_child1 = __float_as_int(cnodes.w);
-
-        if (traverse_mask == 3) {
-          /* Both children were intersected, push the farther one. */
-          bool is_closest_child1 = (dist[1] < dist[0]);
-          if (is_closest_child1) {
-            int tmp = node_addr;
-            node_addr = node_addr_child1;
-            node_addr_child1 = tmp;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = node_addr_child1;
-        }
-        else {
-          /* One child was intersected. */
-          if (traverse_mask == 2) {
-            node_addr = node_addr_child1;
-          }
-          else if (traverse_mask == 0) {
-            /* Neither child was intersected. */
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-
-      /* if node is leaf, fetch triangle list */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        const int prim_addr2 = __float_as_int(leaf.y);
-        const uint type = __float_as_int(leaf.w);
-
-        /* pop */
-        node_addr = traversal_stack[stack_ptr];
-        --stack_ptr;
-
-        /* primitive intersection */
-        switch (type & PRIMITIVE_ALL) {
-          case PRIMITIVE_TRIANGLE: {
-            /* intersect ray against primitive */
-            for (; prim_addr < prim_addr2; prim_addr++) {
-              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-              if (triangle_intersect_local(kg,
-                                           local_isect,
-                                           P,
-                                           dir,
-                                           object,
-                                           local_object,
-                                           prim_addr,
-                                           isect_t,
-                                           lcg_state,
-                                           max_hits)) {
-                return true;
-              }
-            }
-            break;
-          }
-#if BVH_FEATURE(BVH_MOTION)
-          case PRIMITIVE_MOTION_TRIANGLE: {
-            /* intersect ray against primitive */
-            for (; prim_addr < prim_addr2; prim_addr++) {
-              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-              if (motion_triangle_intersect_local(kg,
-                                                  local_isect,
-                                                  P,
-                                                  dir,
-                                                  ray->time,
-                                                  object,
-                                                  local_object,
-                                                  prim_addr,
-                                                  isect_t,
-                                                  lcg_state,
-                                                  max_hits)) {
-                return true;
-              }
-            }
-            break;
-          }
-#endif
-          default: {
-            break;
-          }
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return false;
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
-                                         ccl_private const Ray *ray,
-                                         ccl_private LocalIntersection *local_isect,
-                                         int local_object,
-                                         ccl_private uint *lcg_state,
-                                         int max_hits)
-{
-  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
deleted file mode 100644
index 71122085f69..00000000000
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright 2011-2016, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
-// 3-vector which might be faster.
-ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg,
-                                                                int node_addr,
-                                                                int child)
-{
-  Transform space;
-  const int child_addr = node_addr + child * 3;
-  space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1);
-  space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2);
-  space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3);
-  return space;
-}
-
-ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
-                                                      const float3 P,
-                                                      const float3 idir,
-                                                      const float t,
-                                                      const int node_addr,
-                                                      const uint visibility,
-                                                      float dist[2])
-{
-
-  /* fetch node data */
-#ifdef __VISIBILITY_FLAG__
-  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#endif
-  float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
-  float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
-  float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
-
-  /* intersect ray against child nodes */
-  float c0lox = (node0.x - P.x) * idir.x;
-  float c0hix = (node0.z - P.x) * idir.x;
-  float c0loy = (node1.x - P.y) * idir.y;
-  float c0hiy = (node1.z - P.y) * idir.y;
-  float c0loz = (node2.x - P.z) * idir.z;
-  float c0hiz = (node2.z - P.z) * idir.z;
-  float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
-  float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
-
-  float c1lox = (node0.y - P.x) * idir.x;
-  float c1hix = (node0.w - P.x) * idir.x;
-  float c1loy = (node1.y - P.y) * idir.y;
-  float c1hiy = (node1.w - P.y) * idir.y;
-  float c1loz = (node2.y - P.z) * idir.z;
-  float c1hiz = (node2.w - P.z) * idir.z;
-  float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
-  float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
-
-  dist[0] = c0min;
-  dist[1] = c1min;
-
-#ifdef __VISIBILITY_FLAG__
-  /* this visibility test gives a 5% performance hit, how to solve? */
-  return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
-         (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-#else
-  return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-#endif
-}
-
-ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
-                                                               const float3 P,
-                                                               const float3 dir,
-                                                               const float t,
-                                                               int node_addr,
-                                                               int child,
-                                                               float dist[2])
-{
-  Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
-  float3 aligned_dir = transform_direction(&space, dir);
-  float3 aligned_P = transform_point(&space, P);
-  float3 nrdir = -bvh_inverse_direction(aligned_dir);
-  float3 lower_xyz = aligned_P * nrdir;
-  float3 upper_xyz = lower_xyz - nrdir;
-  const float near_x = min(lower_xyz.x, upper_xyz.x);
-  const float near_y = min(lower_xyz.y, upper_xyz.y);
-  const float near_z = min(lower_xyz.z, upper_xyz.z);
-  const float far_x = max(lower_xyz.x, upper_xyz.x);
-  const float far_y = max(lower_xyz.y, upper_xyz.y);
-  const float far_z = max(lower_xyz.z, upper_xyz.z);
-  const float tnear = max4(0.0f, near_x, near_y, near_z);
-  const float tfar = min4(t, far_x, far_y, far_z);
-  *dist = tnear;
-  return tnear <= tfar;
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
-                                                        const float3 P,
-                                                        const float3 dir,
-                                                        const float3 idir,
-                                                        const float t,
-                                                        const int node_addr,
-                                                        const uint visibility,
-                                                        float dist[2])
-{
-  int mask = 0;
-#ifdef __VISIBILITY_FLAG__
-  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#endif
-  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-#ifdef __VISIBILITY_FLAG__
-    if ((__float_as_uint(cnodes.x) & visibility))
-#endif
-    {
-      mask |= 1;
-    }
-  }
-  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-#ifdef __VISIBILITY_FLAG__
-    if ((__float_as_uint(cnodes.y) & visibility))
-#endif
-    {
-      mask |= 2;
-    }
-  }
-  return mask;
-}
-
-ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
-                                              const float3 P,
-                                              const float3 dir,
-                                              const float3 idir,
-                                              const float t,
-                                              const int node_addr,
-                                              const uint visibility,
-                                              float dist[2])
-{
-  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
-  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-    return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
-  }
-  else {
-    return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
-  }
-}
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
deleted file mode 100644
index 049c6a03fe0..00000000000
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2013, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT bvh_node_intersect
-#else
-#  define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device
-#else
-ccl_device_inline
-#endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
-                                     ccl_private const Ray *ray,
-                                     IntegratorShadowState state,
-                                     const uint visibility,
-                                     const uint max_hits,
-                                     ccl_private uint *num_recorded_hits,
-                                     ccl_private float *throughput)
-{
-  /* todo:
-   * - likely and unlikely for if() statements
-   * - test restrict attribute for pointers
-   */
-
-  /* traversal stack in CUDA thread-local memory */
-  int traversal_stack[BVH_STACK_SIZE];
-  traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-  /* traversal variables in registers */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* ray parameters in registers */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  uint num_hits = 0;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  /* Max distance in world space. May be dynamically reduced when max number of
-   * recorded hits is exceeded and we no longer need to find hits beyond the max
-   * distance found. */
-  float t_max_world = ray->t;
-  /* Equal to t_max_world when traversing top level BVH, transformed into local
-   * space when entering instances. */
-  float t_max_current = t_max_world;
-  /* Conversion from world to local space for the current instance if any, 1.0
-   * otherwise. */
-  float t_world_to_instance = 1.0f;
-
-  *num_recorded_hits = 0;
-  *throughput = 1.0f;
-
-  /* traversal loop */
-  do {
-    do {
-      /* traverse internal nodes */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        int node_addr_child1, traverse_mask;
-        float dist[2];
-        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-#if BVH_FEATURE(BVH_HAIR)
-                                       dir,
-#endif
-                                       idir,
-                                       t_max_current,
-                                       node_addr,
-                                       visibility,
-                                       dist);
-
-        node_addr = __float_as_int(cnodes.z);
-        node_addr_child1 = __float_as_int(cnodes.w);
-
-        if (traverse_mask == 3) {
-          /* Both children were intersected, push the farther one. */
-          bool is_closest_child1 = (dist[1] < dist[0]);
-          if (is_closest_child1) {
-            int tmp = node_addr;
-            node_addr = node_addr_child1;
-            node_addr_child1 = tmp;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = node_addr_child1;
-        }
-        else {
-          /* One child was intersected. */
-          if (traverse_mask == 2) {
-            node_addr = node_addr_child1;
-          }
-          else if (traverse_mask == 0) {
-            /* Neither child was intersected. */
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-
-      /* if node is leaf, fetch triangle list */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        if (prim_addr >= 0) {
-          const int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-
-          /* pop */
-          node_addr = traversal_stack[stack_ptr];
-          --stack_ptr;
-
-          /* primitive intersection */
-          while (prim_addr < prim_addr2) {
-            kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
-                          (type & PRIMITIVE_ALL));
-            bool hit;
-
-            /* todo: specialized intersect functions which don't fill in
-             * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
-             * might give a few % performance improvement */
-            Intersection isect ccl_optional_struct_init;
-
-            switch (type & PRIMITIVE_ALL) {
-              case PRIMITIVE_TRIANGLE: {
-                hit = triangle_intersect(
-                    kg, &isect, P, dir, t_max_current, visibility, object, prim_addr);
-                break;
-              }
-#if BVH_FEATURE(BVH_MOTION)
-              case PRIMITIVE_MOTION_TRIANGLE: {
-                hit = motion_triangle_intersect(
-                    kg, &isect, P, dir, t_max_current, ray->time, visibility, object, prim_addr);
-                break;
-              }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-              case PRIMITIVE_CURVE_THICK:
-              case PRIMITIVE_MOTION_CURVE_THICK:
-              case PRIMITIVE_CURVE_RIBBON:
-              case PRIMITIVE_MOTION_CURVE_RIBBON: {
-                if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
-                  const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
-                  if (ray->time < prim_time.x || ray->time > prim_time.y) {
-                    hit = false;
-                    break;
-                  }
-                }
-
-                const int curve_object = (object == OBJECT_NONE) ?
-                                             kernel_tex_fetch(__prim_object, prim_addr) :
-                                             object;
-                const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
-                hit = curve_intersect(kg,
-                                      &isect,
-                                      P,
-                                      dir,
-                                      t_max_current,
-                                      curve_object,
-                                      curve_prim,
-                                      ray->time,
-                                      curve_type);
-
-                break;
-              }
-#endif
-              default: {
-                hit = false;
-                break;
-              }
-            }
-
-            /* shadow ray early termination */
-            if (hit) {
-              /* Convert intersection distance to world space. */
-              isect.t /= t_world_to_instance;
-
-              /* detect if this surface has a shader with transparent shadows */
-              /* todo: optimize so primitive visibility flag indicates if
-               * the primitive has a transparent shadow shader? */
-              const int flags = intersection_get_shader_flags(kg, isect.prim, isect.type);
-
-              if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || num_hits >= max_hits) {
-                /* If no transparent shadows, all light is blocked and we can
-                 * stop immediately. */
-                return true;
-              }
-
-              num_hits++;
-
-              bool record_intersection = true;
-
-              /* Always use baked shadow transparency for curves. */
-              if (isect.type & PRIMITIVE_ALL_CURVE) {
-                *throughput *= intersection_curve_shadow_transparency(
-                    kg, isect.object, isect.prim, isect.u);
-
-                if (*throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
-                  return true;
-                }
-                else {
-                  record_intersection = false;
-                }
-              }
-
-              if (record_intersection) {
-                /* Increase the number of hits, possibly beyond max_hits, we will
-                 * simply not record those and only keep the max_hits closest. */
-                uint record_index = (*num_recorded_hits)++;
-
-                const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
-                if (record_index >= max_record_hits - 1) {
-                  /* If maximum number of hits reached, find the intersection with
-                   * the largest distance to potentially replace when another hit
-                   * is found. */
-                  const int num_recorded_hits = min(max_record_hits, record_index);
-                  float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
-                  int max_recorded_hit = 0;
-
-                  for (int i = 1; i < num_recorded_hits; i++) {
-                    const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
-                    if (isect_t > max_recorded_t) {
-                      max_recorded_t = isect_t;
-                      max_recorded_hit = i;
-                    }
-                  }
-
-                  if (record_index >= max_record_hits) {
-                    record_index = max_recorded_hit;
-                  }
-
-                  /* Limit the ray distance and stop counting hits beyond this. */
-                  t_max_world = max(max_recorded_t, isect.t);
-                  t_max_current = t_max_world * t_world_to_instance;
-                }
-
-                integrator_state_write_shadow_isect(state, &isect, record_index);
-              }
-            }
-
-            prim_addr++;
-          }
-        }
-        else {
-          /* instance push */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-#if BVH_FEATURE(BVH_MOTION)
-          t_world_to_instance = bvh_instance_motion_push(
-              kg, object, ray, &P, &dir, &idir, &ob_itfm);
-#else
-          t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
-#endif
-
-          /* Convert intersection to object space. */
-          t_max_current *= t_world_to_instance;
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
-          node_addr = kernel_tex_fetch(__object_node, object);
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-#if BVH_FEATURE(BVH_MOTION)
-      bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#else
-      bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#endif
-
-      /* Restore world space ray length. */
-      t_max_current = t_max_world;
-
-      object = OBJECT_NONE;
-      t_world_to_instance = 1.0f;
-      node_addr = traversal_stack[stack_ptr];
-      --stack_ptr;
-    }
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return false;
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
-                                         ccl_private const Ray *ray,
-                                         IntegratorShadowState state,
-                                         const uint visibility,
-                                         const uint max_hits,
-                                         ccl_private uint *num_recorded_hits,
-                                         ccl_private float *throughput)
-{
-  return BVH_FUNCTION_FULL_NAME(BVH)(
-      kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
deleted file mode 100644
index 1c17ebf767f..00000000000
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2013, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT bvh_node_intersect
-#else
-#  define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
-                                                     ccl_private const Ray *ray,
-                                                     ccl_private Intersection *isect,
-                                                     const uint visibility)
-{
-  /* todo:
-   * - test if pushing distance on the stack helps (for non shadow rays)
-   * - separate version for shadow rays
-   * - likely and unlikely for if() statements
-   * - test restrict attribute for pointers
-   */
-
-  /* traversal stack in CUDA thread-local memory */
-  int traversal_stack[BVH_STACK_SIZE];
-  traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-  /* traversal variables in registers */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* ray parameters in registers */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  isect->t = ray->t;
-  isect->u = 0.0f;
-  isect->v = 0.0f;
-  isect->prim = PRIM_NONE;
-  isect->object = OBJECT_NONE;
-
-  /* traversal loop */
-  do {
-    do {
-      /* traverse internal nodes */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        int node_addr_child1, traverse_mask;
-        float dist[2];
-        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-        {
-          traverse_mask = NODE_INTERSECT(kg,
-                                         P,
-#if BVH_FEATURE(BVH_HAIR)
-                                         dir,
-#endif
-                                         idir,
-                                         isect->t,
-                                         node_addr,
-                                         visibility,
-                                         dist);
-        }
-
-        node_addr = __float_as_int(cnodes.z);
-        node_addr_child1 = __float_as_int(cnodes.w);
-
-        if (traverse_mask == 3) {
-          /* Both children were intersected, push the farther one. */
-          bool is_closest_child1 = (dist[1] < dist[0]);
-          if (is_closest_child1) {
-            int tmp = node_addr;
-            node_addr = node_addr_child1;
-            node_addr_child1 = tmp;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = node_addr_child1;
-        }
-        else {
-          /* One child was intersected. */
-          if (traverse_mask == 2) {
-            node_addr = node_addr_child1;
-          }
-          else if (traverse_mask == 0) {
-            /* Neither child was intersected. */
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-
-      /* if node is leaf, fetch triangle list */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        if (prim_addr >= 0) {
-          const int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-
-          /* pop */
-          node_addr = traversal_stack[stack_ptr];
-          --stack_ptr;
-
-          /* primitive intersection */
-          switch (type & PRIMITIVE_ALL) {
-            case PRIMITIVE_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                if (triangle_intersect(
-                        kg, isect, P, dir, isect->t, visibility, object, prim_addr)) {
-                  /* shadow ray early termination */
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
-                    return true;
-                }
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                if (motion_triangle_intersect(
-                        kg, isect, P, dir, isect->t, ray->time, visibility, object, prim_addr)) {
-                  /* shadow ray early termination */
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
-                    return true;
-                }
-              }
-              break;
-            }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
-            case PRIMITIVE_CURVE_THICK:
-            case PRIMITIVE_MOTION_CURVE_THICK:
-            case PRIMITIVE_CURVE_RIBBON:
-            case PRIMITIVE_MOTION_CURVE_RIBBON: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
-                  const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
-                  if (ray->time < prim_time.x || ray->time > prim_time.y) {
-                    continue;
-                  }
-                }
-
-                const int curve_object = (object == OBJECT_NONE) ?
-                                             kernel_tex_fetch(__prim_object, prim_addr) :
-                                             object;
-                const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
-                const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                const bool hit = curve_intersect(
-                    kg, isect, P, dir, isect->t, curve_object, curve_prim, ray->time, curve_type);
-                if (hit) {
-                  /* shadow ray early termination */
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
-                    return true;
-                }
-              }
-              break;
-            }
-#endif /* BVH_FEATURE(BVH_HAIR) */
-          }
-        }
-        else {
-          /* instance push */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-#if BVH_FEATURE(BVH_MOTION)
-          isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
-#else
-          isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
-#endif
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
-          node_addr = kernel_tex_fetch(__object_node, object);
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* instance pop */
-#if BVH_FEATURE(BVH_MOTION)
-      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#else
-      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr];
-      --stack_ptr;
-    }
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return (isect->prim != PRIM_NONE);
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
-                                         ccl_private const Ray *ray,
-                                         ccl_private Intersection *isect,
-                                         const uint visibility)
-{
-  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
deleted file mode 100644
index 6039e707fc3..00000000000
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Don't inline intersect functions on GPU, this is faster */
-#ifdef __KERNEL_GPU__
-#  define ccl_device_intersect ccl_device_forceinline
-#else
-#  define ccl_device_intersect ccl_device_inline
-#endif
-
-/* bottom-most stack entry, indicating the end of traversal */
-#define ENTRYPOINT_SENTINEL 0x76543210
-
-/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
-#define BVH_STACK_SIZE 192
-/* BVH intersection function variations */
-
-#define BVH_MOTION 1
-#define BVH_HAIR 2
-
-#define BVH_NAME_JOIN(x, y) x##_##y
-#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
-#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME)
-
-#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
deleted file mode 100644
index 8686f887021..00000000000
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Ray offset to avoid self intersection.
- *
- * This function should be used to compute a modified ray start position for
- * rays leaving from a surface. */
-
-ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
-{
-#ifdef __INTERSECTION_REFINE__
-  const float epsilon_f = 1e-5f;
-  /* ideally this should match epsilon_f, but instancing and motion blur
-   * precision makes it problematic */
-  const float epsilon_test = 1.0f;
-  const int epsilon_i = 32;
-
-  float3 res;
-
-  /* x component */
-  if (fabsf(P.x) < epsilon_test) {
-    res.x = P.x + Ng.x * epsilon_f;
-  }
-  else {
-    uint ix = __float_as_uint(P.x);
-    ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
-    res.x = __uint_as_float(ix);
-  }
-
-  /* y component */
-  if (fabsf(P.y) < epsilon_test) {
-    res.y = P.y + Ng.y * epsilon_f;
-  }
-  else {
-    uint iy = __float_as_uint(P.y);
-    iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
-    res.y = __uint_as_float(iy);
-  }
-
-  /* z component */
-  if (fabsf(P.z) < epsilon_test) {
-    res.z = P.z + Ng.z * epsilon_f;
-  }
-  else {
-    uint iz = __float_as_uint(P.z);
-    iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
-    res.z = __uint_as_float(iz);
-  }
-
-  return res;
-#else
-  const float epsilon_f = 1e-4f;
-  return P + epsilon_f * Ng;
-#endif
-}
-
-#if defined(__KERNEL_CPU__)
-ccl_device int intersections_compare(const void *a, const void *b)
-{
-  const Intersection *isect_a = (const Intersection *)a;
-  const Intersection *isect_b = (const Intersection *)b;
-
-  if (isect_a->t < isect_b->t)
-    return -1;
-  else if (isect_a->t > isect_b->t)
-    return 1;
-  else
-    return 0;
-}
-#endif
-
-/* For subsurface scattering, only sorting a small amount of intersections
- * so bubble sort is fine for CPU and GPU. */
-ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits,
-                                                      ccl_private float3 *Ng,
-                                                      uint num_hits)
-{
-  bool swapped;
-  do {
-    swapped = false;
-    for (int j = 0; j < num_hits - 1; ++j) {
-      if (hits[j].t > hits[j + 1].t) {
-        struct Intersection tmp_hit = hits[j];
-        float3 tmp_Ng = Ng[j];
-        hits[j] = hits[j + 1];
-        Ng[j] = Ng[j + 1];
-        hits[j + 1] = tmp_hit;
-        Ng[j + 1] = tmp_Ng;
-        swapped = true;
-      }
-    }
-    --num_hits;
-  } while (swapped);
-}
-
-/* Utility to quickly get flags from an intersection. */
-
-ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg,
-                                                         const int prim,
-                                                         const int type)
-{
-  int shader = 0;
-
-#ifdef __HAIR__
-  if (type & PRIMITIVE_ALL_TRIANGLE)
-#endif
-  {
-    shader = kernel_tex_fetch(__tri_shader, prim);
-  }
-#ifdef __HAIR__
-  else {
-    shader = kernel_tex_fetch(__curves, prim).shader_id;
-  }
-#endif
-
-  return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-}
-
-ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg,
-                                                                   const int prim,
-                                                                   const int isect_type)
-{
-  int shader = 0;
-
-#ifdef __HAIR__
-  if (isect_type & PRIMITIVE_ALL_TRIANGLE)
-#endif
-  {
-    shader = kernel_tex_fetch(__tri_shader, prim);
-  }
-#ifdef __HAIR__
-  else {
-    shader = kernel_tex_fetch(__curves, prim).shader_id;
-  }
-#endif
-
-  return shader & SHADER_MASK;
-}
-
-ccl_device_forceinline int intersection_get_shader(
-    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
-{
-  return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
-}
-
-ccl_device_forceinline int intersection_get_object_flags(
-    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
-{
-  return kernel_tex_fetch(__object_flag, isect->object);
-}
-
-/* TODO: find a better (faster) solution for this. Maybe store offset per object for
- * attributes needed in intersection? */
-ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
-                                                  const int object,
-                                                  const uint id)
-{
-  uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset;
-  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
-  while (attr_map.x != id) {
-    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
-      if (UNLIKELY(attr_map.y == 0)) {
-        return (int)ATTR_STD_NOT_FOUND;
-      }
-      else {
-        /* Chain jump to a different part of the table. */
-        attr_offset = attr_map.z;
-      }
-    }
-    else {
-      attr_offset += ATTR_PRIM_TYPES;
-    }
-    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-  }
-
-  /* return result */
-  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
-}
-
-/* Transparent Shadows */
-
-/* Cut-off value to stop transparent shadow tracing when practically opaque. */
-#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
-
-ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
-                                                               const int object,
-                                                               const int prim,
-                                                               const float u)
-{
-  /* Find attribute. */
-  const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
-  if (offset == ATTR_STD_NOT_FOUND) {
-    /* If no shadow transparency attribute, assume opaque. */
-    return 0.0f;
-  }
-
-  /* Interpolate transparency between curve keys. */
-  const KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
-  const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
-  const int k1 = k0 + 1;
-
-  const float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
-  const float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
-
-  return (1.0f - u) * f0 + u * f1;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
deleted file mode 100644
index fa56bd02bef..00000000000
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT bvh_node_intersect
-#else
-#  define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device
-#else
-ccl_device_inline
-#endif
-    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
-                                     ccl_private const Ray *ray,
-                                     ccl_private Intersection *isect,
-                                     const uint visibility)
-{
-  /* todo:
-   * - test if pushing distance on the stack helps (for non shadow rays)
-   * - separate version for shadow rays
-   * - likely and unlikely for if() statements
-   * - test restrict attribute for pointers
-   */
-
-  /* traversal stack in CUDA thread-local memory */
-  int traversal_stack[BVH_STACK_SIZE];
-  traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-  /* traversal variables in registers */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* ray parameters in registers */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  isect->t = ray->t;
-  isect->u = 0.0f;
-  isect->v = 0.0f;
-  isect->prim = PRIM_NONE;
-  isect->object = OBJECT_NONE;
-
-  /* traversal loop */
-  do {
-    do {
-      /* traverse internal nodes */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        int node_addr_child1, traverse_mask;
-        float dist[2];
-        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-#if BVH_FEATURE(BVH_HAIR)
-                                       dir,
-#endif
-                                       idir,
-                                       isect->t,
-                                       node_addr,
-                                       visibility,
-                                       dist);
-
-        node_addr = __float_as_int(cnodes.z);
-        node_addr_child1 = __float_as_int(cnodes.w);
-
-        if (traverse_mask == 3) {
-          /* Both children were intersected, push the farther one. */
-          bool is_closest_child1 = (dist[1] < dist[0]);
-          if (is_closest_child1) {
-            int tmp = node_addr;
-            node_addr = node_addr_child1;
-            node_addr_child1 = tmp;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = node_addr_child1;
-        }
-        else {
-          /* One child was intersected. */
-          if (traverse_mask == 2) {
-            node_addr = node_addr_child1;
-          }
-          else if (traverse_mask == 0) {
-            /* Neither child was intersected. */
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-
-      /* if node is leaf, fetch triangle list */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        if (prim_addr >= 0) {
-          const int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-
-          /* pop */
-          node_addr = traversal_stack[stack_ptr];
-          --stack_ptr;
-
-          /* primitive intersection */
-          switch (type & PRIMITIVE_ALL) {
-            case PRIMITIVE_TRIANGLE: {
-              /* intersect ray against primitive */
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* only primitives from volume object */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                triangle_intersect(kg, isect, P, dir, isect->t, visibility, object, prim_addr);
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              /* intersect ray against primitive */
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* only primitives from volume object */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                motion_triangle_intersect(
-                    kg, isect, P, dir, isect->t, ray->time, visibility, object, prim_addr);
-              }
-              break;
-            }
-#endif
-            default: {
-              break;
-            }
-          }
-        }
-        else {
-          /* instance push */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-          int object_flag = kernel_tex_fetch(__object_flag, object);
-          if (object_flag & SD_OBJECT_HAS_VOLUME) {
-#if BVH_FEATURE(BVH_MOTION)
-            isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
-#else
-            isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
-#endif
-
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_STACK_SIZE);
-            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
-            node_addr = kernel_tex_fetch(__object_node, object);
-          }
-          else {
-            /* pop */
-            object = OBJECT_NONE;
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* instance pop */
-#if BVH_FEATURE(BVH_MOTION)
-      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#else
-      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr];
-      --stack_ptr;
-    }
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return (isect->prim != PRIM_NONE);
-}
-
-ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
-                                         ccl_private const Ray *ray,
-                                         ccl_private Intersection *isect,
-                                         const uint visibility)
-{
-  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
deleted file mode 100644
index 1d7d942e736..00000000000
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
- * and code copyright 2009-2012 Intel Corporation
- *
- * Modifications Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT bvh_node_intersect
-#else
-#  define NODE_INTERSECT bvh_aligned_node_intersect
-#endif
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device
-#else
-ccl_device_inline
-#endif
-    uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
-                                     ccl_private const Ray *ray,
-                                     Intersection *isect_array,
-                                     const uint max_hits,
-                                     const uint visibility)
-{
-  /* todo:
-   * - test if pushing distance on the stack helps (for non shadow rays)
-   * - separate version for shadow rays
-   * - likely and unlikely for if() statements
-   * - test restrict attribute for pointers
-   */
-
-  /* traversal stack in CUDA thread-local memory */
-  int traversal_stack[BVH_STACK_SIZE];
-  traversal_stack[0] = ENTRYPOINT_SENTINEL;
-
-  /* traversal variables in registers */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* ray parameters in registers */
-  const float tmax = ray->t;
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  int num_hits_in_instance = 0;
-
-  uint num_hits = 0;
-  isect_array->t = tmax;
-
-  /* traversal loop */
-  do {
-    do {
-      /* traverse internal nodes */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        int node_addr_child1, traverse_mask;
-        float dist[2];
-        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-#if BVH_FEATURE(BVH_HAIR)
-                                       dir,
-#endif
-                                       idir,
-                                       isect_t,
-                                       node_addr,
-                                       visibility,
-                                       dist);
-
-        node_addr = __float_as_int(cnodes.z);
-        node_addr_child1 = __float_as_int(cnodes.w);
-
-        if (traverse_mask == 3) {
-          /* Both children were intersected, push the farther one. */
-          bool is_closest_child1 = (dist[1] < dist[0]);
-          if (is_closest_child1) {
-            int tmp = node_addr;
-            node_addr = node_addr_child1;
-            node_addr_child1 = tmp;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_STACK_SIZE);
-          traversal_stack[stack_ptr] = node_addr_child1;
-        }
-        else {
-          /* One child was intersected. */
-          if (traverse_mask == 2) {
-            node_addr = node_addr_child1;
-          }
-          else if (traverse_mask == 0) {
-            /* Neither child was intersected. */
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-
-      /* if node is leaf, fetch triangle list */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        if (prim_addr >= 0) {
-          const int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          bool hit;
-
-          /* pop */
-          node_addr = traversal_stack[stack_ptr];
-          --stack_ptr;
-
-          /* primitive intersection */
-          switch (type & PRIMITIVE_ALL) {
-            case PRIMITIVE_TRIANGLE: {
-              /* intersect ray against primitive */
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* only primitives from volume object */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                hit = triangle_intersect(
-                    kg, isect_array, P, dir, isect_t, visibility, object, prim_addr);
-                if (hit) {
-                  /* Move on to next entry in intersections array. */
-                  isect_array++;
-                  num_hits++;
-                  num_hits_in_instance++;
-                  isect_array->t = isect_t;
-                  if (num_hits == max_hits) {
-                    if (object != OBJECT_NONE) {
-#if BVH_FEATURE(BVH_MOTION)
-                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#else
-                      Transform itfm = object_fetch_transform(
-                          kg, object, OBJECT_INVERSE_TRANSFORM);
-                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#endif
-                      for (int i = 0; i < num_hits_in_instance; i++) {
-                        (isect_array - i - 1)->t *= t_fac;
-                      }
-                    }
-                    return num_hits;
-                  }
-                }
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              /* intersect ray against primitive */
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* only primitives from volume object */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                hit = motion_triangle_intersect(
-                    kg, isect_array, P, dir, isect_t, ray->time, visibility, object, prim_addr);
-                if (hit) {
-                  /* Move on to next entry in intersections array. */
-                  isect_array++;
-                  num_hits++;
-                  num_hits_in_instance++;
-                  isect_array->t = isect_t;
-                  if (num_hits == max_hits) {
-                    if (object != OBJECT_NONE) {
-#  if BVH_FEATURE(BVH_MOTION)
-                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#  else
-                      Transform itfm = object_fetch_transform(
-                          kg, object, OBJECT_INVERSE_TRANSFORM);
-                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#  endif
-                      for (int i = 0; i < num_hits_in_instance; i++) {
-                        (isect_array - i - 1)->t *= t_fac;
-                      }
-                    }
-                    return num_hits;
-                  }
-                }
-              }
-              break;
-            }
-#endif /* BVH_MOTION */
-            default: {
-              break;
-            }
-          }
-        }
-        else {
-          /* instance push */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-          int object_flag = kernel_tex_fetch(__object_flag, object);
-          if (object_flag & SD_OBJECT_HAS_VOLUME) {
-#if BVH_FEATURE(BVH_MOTION)
-            isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
-#else
-            isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
-#endif
-
-            num_hits_in_instance = 0;
-            isect_array->t = isect_t;
-
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_STACK_SIZE);
-            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
-
-            node_addr = kernel_tex_fetch(__object_node, object);
-          }
-          else {
-            /* pop */
-            object = OBJECT_NONE;
-            node_addr = traversal_stack[stack_ptr];
-            --stack_ptr;
-          }
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-      if (num_hits_in_instance) {
-        float t_fac;
-#if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-#else
-        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-#endif
-        /* Scale isect->t to adjust for instancing. */
-        for (int i = 0; i < num_hits_in_instance; i++) {
-          (isect_array - i - 1)->t *= t_fac;
-        }
-      }
-      else {
-#if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#else
-        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#endif
-      }
-
-      isect_t = tmax;
-      isect_array->t = isect_t;
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr];
-      --stack_ptr;
-    }
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return num_hits;
-}
-
-ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals kg,
-                                         ccl_private const Ray *ray,
-                                         Intersection *isect_array,
-                                         const uint max_hits,
-                                         const uint visibility)
-{
-  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
-}
-
-#undef BVH_FUNCTION_NAME
-#undef BVH_FUNCTION_FEATURES
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h
new file mode 100644
index 00000000000..9edd4f90a7e
--- /dev/null
+++ b/intern/cycles/kernel/bvh/embree.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <embree3/rtcore_ray.h>
+#include <embree3/rtcore_scene.h>
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+struct CCLIntersectContext {
+  typedef enum {
+    RAY_REGULAR = 0,
+    RAY_SHADOW_ALL = 1,
+    RAY_LOCAL = 2,
+    RAY_SSS = 3,
+    RAY_VOLUME_ALL = 4,
+  } RayType;
+
+  KernelGlobals kg;
+  RayType type;
+
+  /* for shadow rays */
+  Intersection *isect_s;
+  uint max_hits;
+  uint num_hits;
+  uint num_recorded_hits;
+  float throughput;
+  float max_t;
+  bool opaque_hit;
+
+  /* for SSS Rays: */
+  LocalIntersection *local_isect;
+  int local_object_id;
+  uint *lcg_state;
+
+  CCLIntersectContext(KernelGlobals kg_, RayType type_)
+  {
+    kg = kg_;
+    type = type_;
+    max_hits = 1;
+    num_hits = 0;
+    num_recorded_hits = 0;
+    throughput = 1.0f;
+    max_t = FLT_MAX;
+    opaque_hit = false;
+    isect_s = NULL;
+    local_isect = NULL;
+    local_object_id = -1;
+    lcg_state = NULL;
+  }
+};
+
+class IntersectContext {
+ public:
+  IntersectContext(CCLIntersectContext *ctx)
+  {
+    rtcInitIntersectContext(&context);
+    userRayExt = ctx;
+  }
+  RTCIntersectContext context;
+  CCLIntersectContext *userRayExt;
+};
+
+ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
+                                               RTCRay &rtc_ray,
+                                               const uint visibility)
+{
+  rtc_ray.org_x = ray.P.x;
+  rtc_ray.org_y = ray.P.y;
+  rtc_ray.org_z = ray.P.z;
+  rtc_ray.dir_x = ray.D.x;
+  rtc_ray.dir_y = ray.D.y;
+  rtc_ray.dir_z = ray.D.z;
+  rtc_ray.tnear = 0.0f;
+  rtc_ray.tfar = ray.t;
+  rtc_ray.time = ray.time;
+  rtc_ray.mask = visibility;
+}
+
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
+                                                  RTCRayHit &rayhit,
+                                                  const uint visibility)
+{
+  kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+  rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+  rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID;
+}
+
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
+                                                 const RTCRay *ray,
+                                                 const RTCHit *hit,
+                                                 Intersection *isect)
+{
+  isect->t = ray->tfar;
+  if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+    RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+        rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
+    isect->prim = hit->primID +
+                  (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+    isect->object = hit->instID[0] / 2;
+  }
+  else {
+    isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
+                                    rtcGetGeometry(kernel_data.bvh.scene, hit->geomID));
+    isect->object = hit->geomID / 2;
+  }
+
+  const bool is_hair = hit->geomID & 1;
+  if (is_hair) {
+    const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim);
+    isect->type = segment.type;
+    isect->prim = segment.prim;
+    isect->u = hit->u;
+    isect->v = hit->v;
+  }
+  else {
+    isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type;
+    isect->u = 1.0f - hit->v - hit->u;
+    isect->v = hit->u;
+  }
+}
+
+ccl_device_inline void kernel_embree_convert_sss_hit(
+    KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
+{
+  isect->u = 1.0f - hit->v - hit->u;
+  isect->v = hit->u;
+  isect->t = ray->tfar;
+  RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+      rtcGetGeometry(kernel_data.bvh.scene, object * 2));
+  isect->prim = hit->primID +
+                (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+  isect->object = object;
+  isect->type = kernel_tex_fetch(__objects, object).primitive_type;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h
new file mode 100644
index 00000000000..79cde69699e
--- /dev/null
+++ b/intern/cycles/kernel/bvh/local.h
@@ -0,0 +1,211 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2013, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
+/* This is a template BVH traversal function for finding local intersections
+ * around the shading point, for subsurface scattering and bevel. We disable
+ * various features for performance, and for instanced objects avoid traversing
+ * other parts of the scene.
+ *
+ * BVH_MOTION: motion blur rendering
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private LocalIntersection *local_isect,
+                                     int local_object,
+                                     ccl_private uint *lcg_state,
+                                     int max_hits)
+{
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_tex_fetch(__object_node, local_object);
+
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = ray->t;
+
+  if (local_isect != NULL) {
+    local_isect->num_hits = 0;
+  }
+  kernel_assert((local_isect == NULL) == (max_hits == 0));
+
+  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+#if BVH_FEATURE(BVH_MOTION)
+    Transform ob_itfm;
+    isect_t *= bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
+#else
+    isect_t *= bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
+#endif
+    object = local_object;
+  }
+
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+#if BVH_FEATURE(BVH_HAIR)
+                                       dir,
+#endif
+                                       idir,
+                                       isect_t,
+                                       node_addr,
+                                       PATH_RAY_ALL_VISIBILITY,
+                                       dist);
+
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
+
+        const int prim_addr2 = __float_as_int(leaf.y);
+        const uint type = __float_as_int(leaf.w);
+
+        /* pop */
+        node_addr = traversal_stack[stack_ptr];
+        --stack_ptr;
+
+        /* primitive intersection */
+        switch (type & PRIMITIVE_ALL) {
+          case PRIMITIVE_TRIANGLE: {
+            /* intersect ray against primitive */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (triangle_intersect_local(kg,
+                                           local_isect,
+                                           P,
+                                           dir,
+                                           object,
+                                           local_object,
+                                           prim_addr,
+                                           isect_t,
+                                           lcg_state,
+                                           max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
+#if BVH_FEATURE(BVH_MOTION)
+          case PRIMITIVE_MOTION_TRIANGLE: {
+            /* intersect ray against primitive */
+            for (; prim_addr < prim_addr2; prim_addr++) {
+              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+              if (motion_triangle_intersect_local(kg,
+                                                  local_isect,
+                                                  P,
+                                                  dir,
+                                                  ray->time,
+                                                  object,
+                                                  local_object,
+                                                  prim_addr,
+                                                  isect_t,
+                                                  lcg_state,
+                                                  max_hits)) {
+                return true;
+              }
+            }
+            break;
+          }
+#endif
+          default: {
+            break;
+          }
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+  } while (node_addr != ENTRYPOINT_SENTINEL);
+
+  return false;
+}
+
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private LocalIntersection *local_isect,
+                                         int local_object,
+                                         ccl_private uint *lcg_state,
+                                         int max_hits)
+{
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/nodes.h b/intern/cycles/kernel/bvh/nodes.h
new file mode 100644
index 00000000000..71122085f69
--- /dev/null
+++ b/intern/cycles/kernel/bvh/nodes.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2011-2016, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
+// 3-vector which might be faster.
+ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals kg,
+                                                                int node_addr,
+                                                                int child)
+{
+  Transform space;
+  const int child_addr = node_addr + child * 3;
+  space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1);
+  space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2);
+  space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3);
+  return space;
+}
+
+ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals kg,
+                                                      const float3 P,
+                                                      const float3 idir,
+                                                      const float t,
+                                                      const int node_addr,
+                                                      const uint visibility,
+                                                      float dist[2])
+{
+
+  /* fetch node data */
+#ifdef __VISIBILITY_FLAG__
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+#endif
+  float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
+  float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
+  float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
+
+  /* intersect ray against child nodes */
+  float c0lox = (node0.x - P.x) * idir.x;
+  float c0hix = (node0.z - P.x) * idir.x;
+  float c0loy = (node1.x - P.y) * idir.y;
+  float c0hiy = (node1.z - P.y) * idir.y;
+  float c0loz = (node2.x - P.z) * idir.z;
+  float c0hiz = (node2.z - P.z) * idir.z;
+  float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz));
+  float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz));
+
+  float c1lox = (node0.y - P.x) * idir.x;
+  float c1hix = (node0.w - P.x) * idir.x;
+  float c1loy = (node1.y - P.y) * idir.y;
+  float c1hiy = (node1.w - P.y) * idir.y;
+  float c1loz = (node2.y - P.z) * idir.z;
+  float c1hiz = (node2.w - P.z) * idir.z;
+  float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz));
+  float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz));
+
+  dist[0] = c0min;
+  dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+  /* this visibility test gives a 5% performance hit, how to solve? */
+  return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
+         (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
+#else
+  return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
+#endif
+}
+
+ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals kg,
+                                                               const float3 P,
+                                                               const float3 dir,
+                                                               const float t,
+                                                               int node_addr,
+                                                               int child,
+                                                               float dist[2])
+{
+  Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child);
+  float3 aligned_dir = transform_direction(&space, dir);
+  float3 aligned_P = transform_point(&space, P);
+  float3 nrdir = -bvh_inverse_direction(aligned_dir);
+  float3 lower_xyz = aligned_P * nrdir;
+  float3 upper_xyz = lower_xyz - nrdir;
+  const float near_x = min(lower_xyz.x, upper_xyz.x);
+  const float near_y = min(lower_xyz.y, upper_xyz.y);
+  const float near_z = min(lower_xyz.z, upper_xyz.z);
+  const float far_x = max(lower_xyz.x, upper_xyz.x);
+  const float far_y = max(lower_xyz.y, upper_xyz.y);
+  const float far_z = max(lower_xyz.z, upper_xyz.z);
+  const float tnear = max4(0.0f, near_x, near_y, near_z);
+  const float tfar = min4(t, far_x, far_y, far_z);
+  *dist = tnear;
+  return tnear <= tfar;
+}
+
+ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals kg,
+                                                        const float3 P,
+                                                        const float3 dir,
+                                                        const float3 idir,
+                                                        const float t,
+                                                        const int node_addr,
+                                                        const uint visibility,
+                                                        float dist[2])
+{
+  int mask = 0;
+#ifdef __VISIBILITY_FLAG__
+  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+#endif
+  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
+#ifdef __VISIBILITY_FLAG__
+    if ((__float_as_uint(cnodes.x) & visibility))
+#endif
+    {
+      mask |= 1;
+    }
+  }
+  if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
+#ifdef __VISIBILITY_FLAG__
+    if ((__float_as_uint(cnodes.y) & visibility))
+#endif
+    {
+      mask |= 2;
+    }
+  }
+  return mask;
+}
+
+ccl_device_forceinline int bvh_node_intersect(KernelGlobals kg,
+                                              const float3 P,
+                                              const float3 dir,
+                                              const float3 idir,
+                                              const float t,
+                                              const int node_addr,
+                                              const uint visibility,
+                                              float dist[2])
+{
+  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
+  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
+    return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist);
+  }
+  else {
+    return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
+  }
+}
diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h
new file mode 100644
index 00000000000..049c6a03fe0
--- /dev/null
+++ b/intern/cycles/kernel/bvh/shadow_all.h
@@ -0,0 +1,339 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2013, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
+                                     ccl_private const Ray *ray,
+                                     IntegratorShadowState state,
+                                     const uint visibility,
+                                     const uint max_hits,
+                                     ccl_private uint *num_recorded_hits,
+                                     ccl_private float *throughput)
+{
+  /* todo:
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  uint num_hits = 0;
+
+#if BVH_FEATURE(BVH_MOTION)
+  Transform ob_itfm;
+#endif
+
+  /* Max distance in world space. May be dynamically reduced when max number of
+   * recorded hits is exceeded and we no longer need to find hits beyond the max
+   * distance found. */
+  float t_max_world = ray->t;
+  /* Equal to t_max_world when traversing top level BVH, transformed into local
+   * space when entering instances. */
+  float t_max_current = t_max_world;
+  /* Conversion from world to local space for the current instance if any, 1.0
+   * otherwise. */
+  float t_world_to_instance = 1.0f;
+
+  *num_recorded_hits = 0;
+  *throughput = 1.0f;
+
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+#if BVH_FEATURE(BVH_HAIR)
+                                       dir,
+#endif
+                                       idir,
+                                       t_max_current,
+                                       node_addr,
+                                       visibility,
+                                       dist);
+
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
+
+        if (prim_addr >= 0) {
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          while (prim_addr < prim_addr2) {
+            kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
+                          (type & PRIMITIVE_ALL));
+            bool hit;
+
+            /* todo: specialized intersect functions which don't fill in
+             * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+             * might give a few % performance improvement */
+            Intersection isect ccl_optional_struct_init;
+
+            switch (type & PRIMITIVE_ALL) {
+              case PRIMITIVE_TRIANGLE: {
+                hit = triangle_intersect(
+                    kg, &isect, P, dir, t_max_current, visibility, object, prim_addr);
+                break;
+              }
+#if BVH_FEATURE(BVH_MOTION)
+              case PRIMITIVE_MOTION_TRIANGLE: {
+                hit = motion_triangle_intersect(
+                    kg, &isect, P, dir, t_max_current, ray->time, visibility, object, prim_addr);
+                break;
+              }
+#endif
+#if BVH_FEATURE(BVH_HAIR)
+              case PRIMITIVE_CURVE_THICK:
+              case PRIMITIVE_MOTION_CURVE_THICK:
+              case PRIMITIVE_CURVE_RIBBON:
+              case PRIMITIVE_MOTION_CURVE_RIBBON: {
+                if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
+                  const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
+                  if (ray->time < prim_time.x || ray->time > prim_time.y) {
+                    hit = false;
+                    break;
+                  }
+                }
+
+                const int curve_object = (object == OBJECT_NONE) ?
+                                             kernel_tex_fetch(__prim_object, prim_addr) :
+                                             object;
+                const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
+                hit = curve_intersect(kg,
+                                      &isect,
+                                      P,
+                                      dir,
+                                      t_max_current,
+                                      curve_object,
+                                      curve_prim,
+                                      ray->time,
+                                      curve_type);
+
+                break;
+              }
+#endif
+              default: {
+                hit = false;
+                break;
+              }
+            }
+
+            /* shadow ray early termination */
+            if (hit) {
+              /* Convert intersection distance to world space. */
+              isect.t /= t_world_to_instance;
+
+              /* detect if this surface has a shader with transparent shadows */
+              /* todo: optimize so primitive visibility flag indicates if
+               * the primitive has a transparent shadow shader? */
+              const int flags = intersection_get_shader_flags(kg, isect.prim, isect.type);
+
+              if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || num_hits >= max_hits) {
+                /* If no transparent shadows, all light is blocked and we can
+                 * stop immediately. */
+                return true;
+              }
+
+              num_hits++;
+
+              bool record_intersection = true;
+
+              /* Always use baked shadow transparency for curves. */
+              if (isect.type & PRIMITIVE_ALL_CURVE) {
+                *throughput *= intersection_curve_shadow_transparency(
+                    kg, isect.object, isect.prim, isect.u);
+
+                if (*throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+                  return true;
+                }
+                else {
+                  record_intersection = false;
+                }
+              }
+
+              if (record_intersection) {
+                /* Increase the number of hits, possibly beyond max_hits, we will
+                 * simply not record those and only keep the max_hits closest. */
+                uint record_index = (*num_recorded_hits)++;
+
+                const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+                if (record_index >= max_record_hits - 1) {
+                  /* If maximum number of hits reached, find the intersection with
+                   * the largest distance to potentially replace when another hit
+                   * is found. */
+                  const int num_recorded_hits = min(max_record_hits, record_index);
+                  float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+                  int max_recorded_hit = 0;
+
+                  for (int i = 1; i < num_recorded_hits; i++) {
+                    const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+                    if (isect_t > max_recorded_t) {
+                      max_recorded_t = isect_t;
+                      max_recorded_hit = i;
+                    }
+                  }
+
+                  if (record_index >= max_record_hits) {
+                    record_index = max_recorded_hit;
+                  }
+
+                  /* Limit the ray distance and stop counting hits beyond this. */
+                  t_max_world = max(max_recorded_t, isect.t);
+                  t_max_current = t_max_world * t_world_to_instance;
+                }
+
+                integrator_state_write_shadow_isect(state, &isect, record_index);
+              }
+            }
+
+            prim_addr++;
+          }
+        }
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+
+#if BVH_FEATURE(BVH_MOTION)
+          t_world_to_instance = bvh_instance_motion_push(
+              kg, object, ray, &P, &dir, &idir, &ob_itfm);
+#else
+          t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+#endif
+
+          /* Convert intersection to object space. */
+          t_max_current *= t_world_to_instance;
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+          node_addr = kernel_tex_fetch(__object_node, object);
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
+
+      /* Instance pop. */
+#if BVH_FEATURE(BVH_MOTION)
+      bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+#else
+      bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+#endif
+
+      /* Restore world space ray length. */
+      t_max_current = t_max_world;
+
+      object = OBJECT_NONE;
+      t_world_to_instance = 1.0f;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+  } while (node_addr != ENTRYPOINT_SENTINEL);
+
+  return false;
+}
+
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
+                                         ccl_private const Ray *ray,
+                                         IntegratorShadowState state,
+                                         const uint visibility,
+                                         const uint max_hits,
+                                         ccl_private uint *num_recorded_hits,
+                                         ccl_private float *throughput)
+{
+  return BVH_FUNCTION_FULL_NAME(BVH)(
+      kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h
new file mode 100644
index 00000000000..1c17ebf767f
--- /dev/null
+++ b/intern/cycles/kernel/bvh/traversal.h
@@ -0,0 +1,241 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2013, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ */
+
+ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
+                                                     ccl_private const Ray *ray,
+                                                     ccl_private Intersection *isect,
+                                                     const uint visibility)
+{
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+
+#if BVH_FEATURE(BVH_MOTION)
+  Transform ob_itfm;
+#endif
+
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
+
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+
+        {
+          traverse_mask = NODE_INTERSECT(kg,
+                                         P,
+#if BVH_FEATURE(BVH_HAIR)
+                                         dir,
+#endif
+                                         idir,
+                                         isect->t,
+                                         node_addr,
+                                         visibility,
+                                         dist);
+        }
+
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
+
+        if (prim_addr >= 0) {
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (triangle_intersect(
+                        kg, isect, P, dir, isect->t, visibility, object, prim_addr)) {
+                  /* shadow ray early termination */
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
+                }
+              }
+              break;
+            }
+#if BVH_FEATURE(BVH_MOTION)
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                if (motion_triangle_intersect(
+                        kg, isect, P, dir, isect->t, ray->time, visibility, object, prim_addr)) {
+                  /* shadow ray early termination */
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_MOTION) */
+#if BVH_FEATURE(BVH_HAIR)
+            case PRIMITIVE_CURVE_THICK:
+            case PRIMITIVE_MOTION_CURVE_THICK:
+            case PRIMITIVE_CURVE_RIBBON:
+            case PRIMITIVE_MOTION_CURVE_RIBBON: {
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) {
+                  const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr);
+                  if (ray->time < prim_time.x || ray->time > prim_time.y) {
+                    continue;
+                  }
+                }
+
+                const int curve_object = (object == OBJECT_NONE) ?
+                                             kernel_tex_fetch(__prim_object, prim_addr) :
+                                             object;
+                const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr);
+                const int curve_type = kernel_tex_fetch(__prim_type, prim_addr);
+                const bool hit = curve_intersect(
+                    kg, isect, P, dir, isect->t, curve_object, curve_prim, ray->time, curve_type);
+                if (hit) {
+                  /* shadow ray early termination */
+                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
+                    return true;
+                }
+              }
+              break;
+            }
+#endif /* BVH_FEATURE(BVH_HAIR) */
+          }
+        }
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+
+#if BVH_FEATURE(BVH_MOTION)
+          isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
+#else
+          isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+#endif
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+          node_addr = kernel_tex_fetch(__object_node, object);
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
+
+      /* instance pop */
+#if BVH_FEATURE(BVH_MOTION)
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+#else
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+#endif
+
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+  } while (node_addr != ENTRYPOINT_SENTINEL);
+
+  return (isect->prim != PRIM_NONE);
+}
+
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
+                                         const uint visibility)
+{
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/types.h b/intern/cycles/kernel/bvh/types.h
new file mode 100644
index 00000000000..6039e707fc3
--- /dev/null
+++ b/intern/cycles/kernel/bvh/types.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Don't inline intersect functions on GPU, this is faster */
+#ifdef __KERNEL_GPU__
+#  define ccl_device_intersect ccl_device_forceinline
+#else
+#  define ccl_device_intersect ccl_device_inline
+#endif
+
+/* bottom-most stack entry, indicating the end of traversal */
+#define ENTRYPOINT_SENTINEL 0x76543210
+
+/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
+#define BVH_STACK_SIZE 192
+/* BVH intersection function variations */
+
+#define BVH_MOTION 1
+#define BVH_HAIR 2
+
+#define BVH_NAME_JOIN(x, y) x##_##y
+#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y)
+#define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME)
+
+#define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h
new file mode 100644
index 00000000000..8686f887021
--- /dev/null
+++ b/intern/cycles/kernel/bvh/util.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Ray offset to avoid self intersection.
+ *
+ * This function should be used to compute a modified ray start position for
+ * rays leaving from a surface. */
+
+ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
+{
+#ifdef __INTERSECTION_REFINE__
+  const float epsilon_f = 1e-5f;
+  /* ideally this should match epsilon_f, but instancing and motion blur
+   * precision makes it problematic */
+  const float epsilon_test = 1.0f;
+  const int epsilon_i = 32;
+
+  float3 res;
+
+  /* x component */
+  if (fabsf(P.x) < epsilon_test) {
+    res.x = P.x + Ng.x * epsilon_f;
+  }
+  else {
+    uint ix = __float_as_uint(P.x);
+    ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i;
+    res.x = __uint_as_float(ix);
+  }
+
+  /* y component */
+  if (fabsf(P.y) < epsilon_test) {
+    res.y = P.y + Ng.y * epsilon_f;
+  }
+  else {
+    uint iy = __float_as_uint(P.y);
+    iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i;
+    res.y = __uint_as_float(iy);
+  }
+
+  /* z component */
+  if (fabsf(P.z) < epsilon_test) {
+    res.z = P.z + Ng.z * epsilon_f;
+  }
+  else {
+    uint iz = __float_as_uint(P.z);
+    iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i;
+    res.z = __uint_as_float(iz);
+  }
+
+  return res;
+#else
+  const float epsilon_f = 1e-4f;
+  return P + epsilon_f * Ng;
+#endif
+}
+
+#if defined(__KERNEL_CPU__)
+ccl_device int intersections_compare(const void *a, const void *b)
+{
+  const Intersection *isect_a = (const Intersection *)a;
+  const Intersection *isect_b = (const Intersection *)b;
+
+  if (isect_a->t < isect_b->t)
+    return -1;
+  else if (isect_a->t > isect_b->t)
+    return 1;
+  else
+    return 0;
+}
+#endif
+
+/* For subsurface scattering, only sorting a small amount of intersections
+ * so bubble sort is fine for CPU and GPU. */
+ccl_device_inline void sort_intersections_and_normals(ccl_private Intersection *hits,
+                                                      ccl_private float3 *Ng,
+                                                      uint num_hits)
+{
+  bool swapped;
+  do {
+    swapped = false;
+    for (int j = 0; j < num_hits - 1; ++j) {
+      if (hits[j].t > hits[j + 1].t) {
+        struct Intersection tmp_hit = hits[j];
+        float3 tmp_Ng = Ng[j];
+        hits[j] = hits[j + 1];
+        Ng[j] = Ng[j + 1];
+        hits[j + 1] = tmp_hit;
+        Ng[j + 1] = tmp_Ng;
+        swapped = true;
+      }
+    }
+    --num_hits;
+  } while (swapped);
+}
+
+/* Utility to quickly get flags from an intersection. */
+
+ccl_device_forceinline int intersection_get_shader_flags(KernelGlobals kg,
+                                                         const int prim,
+                                                         const int type)
+{
+  int shader = 0;
+
+#ifdef __HAIR__
+  if (type & PRIMITIVE_ALL_TRIANGLE)
+#endif
+  {
+    shader = kernel_tex_fetch(__tri_shader, prim);
+  }
+#ifdef __HAIR__
+  else {
+    shader = kernel_tex_fetch(__curves, prim).shader_id;
+  }
+#endif
+
+  return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
+}
+
+ccl_device_forceinline int intersection_get_shader_from_isect_prim(KernelGlobals kg,
+                                                                   const int prim,
+                                                                   const int isect_type)
+{
+  int shader = 0;
+
+#ifdef __HAIR__
+  if (isect_type & PRIMITIVE_ALL_TRIANGLE)
+#endif
+  {
+    shader = kernel_tex_fetch(__tri_shader, prim);
+  }
+#ifdef __HAIR__
+  else {
+    shader = kernel_tex_fetch(__curves, prim).shader_id;
+  }
+#endif
+
+  return shader & SHADER_MASK;
+}
+
+ccl_device_forceinline int intersection_get_shader(
+    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
+{
+  return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type);
+}
+
+ccl_device_forceinline int intersection_get_object_flags(
+    KernelGlobals kg, ccl_private const Intersection *ccl_restrict isect)
+{
+  return kernel_tex_fetch(__object_flag, isect->object);
+}
+
+/* TODO: find a better (faster) solution for this. Maybe store offset per object for
+ * attributes needed in intersection? */
+ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
+                                                  const int object,
+                                                  const uint id)
+{
+  uint attr_offset = kernel_tex_fetch(__objects, object).attribute_map_offset;
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+  while (attr_map.x != id) {
+    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+      if (UNLIKELY(attr_map.y == 0)) {
+        return (int)ATTR_STD_NOT_FOUND;
+      }
+      else {
+        /* Chain jump to a different part of the table. */
+        attr_offset = attr_map.z;
+      }
+    }
+    else {
+      attr_offset += ATTR_PRIM_TYPES;
+    }
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
+
+  /* return result */
+  return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
+/* Transparent Shadows */
+
+/* Cut-off value to stop transparent shadow tracing when practically opaque. */
+#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
+
+ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
+                                                               const int object,
+                                                               const int prim,
+                                                               const float u)
+{
+  /* Find attribute. */
+  const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
+  if (offset == ATTR_STD_NOT_FOUND) {
+    /* If no shadow transparency attribute, assume opaque. */
+    return 0.0f;
+  }
+
+  /* Interpolate transparency between curve keys. */
+  const KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
+  const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
+  const int k1 = k0 + 1;
+
+  const float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
+  const float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
+
+  return (1.0f - u) * f0 + u * f1;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h
new file mode 100644
index 00000000000..fa56bd02bef
--- /dev/null
+++ b/intern/cycles/kernel/bvh/volume.h
@@ -0,0 +1,234 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_MOTION: motion blur rendering
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+    bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
+                                     ccl_private const Ray *ray,
+                                     ccl_private Intersection *isect,
+                                     const uint visibility)
+{
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+
+#if BVH_FEATURE(BVH_MOTION)
+  Transform ob_itfm;
+#endif
+
+  isect->t = ray->t;
+  isect->u = 0.0f;
+  isect->v = 0.0f;
+  isect->prim = PRIM_NONE;
+  isect->object = OBJECT_NONE;
+
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+#if BVH_FEATURE(BVH_HAIR)
+                                       dir,
+#endif
+                                       idir,
+                                       isect->t,
+                                       node_addr,
+                                       visibility,
+                                       dist);
+
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
+
+        if (prim_addr >= 0) {
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                triangle_intersect(kg, isect, P, dir, isect->t, visibility, object, prim_addr);
+              }
+              break;
+            }
+#if BVH_FEATURE(BVH_MOTION)
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                motion_triangle_intersect(
+                    kg, isect, P, dir, isect->t, ray->time, visibility, object, prim_addr);
+              }
+              break;
+            }
+#endif
+            default: {
+              break;
+            }
+          }
+        }
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
+#if BVH_FEATURE(BVH_MOTION)
+            isect->t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
+#else
+            isect->t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+#endif
+
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_STACK_SIZE);
+            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* pop */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
+
+      /* instance pop */
+#if BVH_FEATURE(BVH_MOTION)
+      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
+#else
+      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
+#endif
+
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+  } while (node_addr != ENTRYPOINT_SENTINEL);
+
+  return (isect->prim != PRIM_NONE);
+}
+
+ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
+                                         ccl_private const Ray *ray,
+                                         ccl_private Intersection *isect,
+                                         const uint visibility)
+{
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h
new file mode 100644
index 00000000000..1d7d942e736
--- /dev/null
+++ b/intern/cycles/kernel/bvh/volume_all.h
@@ -0,0 +1,303 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if BVH_FEATURE(BVH_HAIR)
+#  define NODE_INTERSECT bvh_node_intersect
+#else
+#  define NODE_INTERSECT bvh_aligned_node_intersect
+#endif
+
+/* This is a template BVH traversal function for volumes, where
+ * various features can be enabled/disabled. This way we can compile optimized
+ * versions for each case without new features slowing things down.
+ *
+ * BVH_MOTION: motion blur rendering
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+    uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
+                                     ccl_private const Ray *ray,
+                                     Intersection *isect_array,
+                                     const uint max_hits,
+                                     const uint visibility)
+{
+  /* todo:
+   * - test if pushing distance on the stack helps (for non shadow rays)
+   * - separate version for shadow rays
+   * - likely and unlikely for if() statements
+   * - test restrict attribute for pointers
+   */
+
+  /* traversal stack in CUDA thread-local memory */
+  int traversal_stack[BVH_STACK_SIZE];
+  traversal_stack[0] = ENTRYPOINT_SENTINEL;
+
+  /* traversal variables in registers */
+  int stack_ptr = 0;
+  int node_addr = kernel_data.bvh.root;
+
+  /* ray parameters in registers */
+  const float tmax = ray->t;
+  float3 P = ray->P;
+  float3 dir = bvh_clamp_direction(ray->D);
+  float3 idir = bvh_inverse_direction(dir);
+  int object = OBJECT_NONE;
+  float isect_t = tmax;
+
+#if BVH_FEATURE(BVH_MOTION)
+  Transform ob_itfm;
+#endif
+
+  int num_hits_in_instance = 0;
+
+  uint num_hits = 0;
+  isect_array->t = tmax;
+
+  /* traversal loop */
+  do {
+    do {
+      /* traverse internal nodes */
+      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
+        int node_addr_child1, traverse_mask;
+        float dist[2];
+        float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+
+        traverse_mask = NODE_INTERSECT(kg,
+                                       P,
+#if BVH_FEATURE(BVH_HAIR)
+                                       dir,
+#endif
+                                       idir,
+                                       isect_t,
+                                       node_addr,
+                                       visibility,
+                                       dist);
+
+        node_addr = __float_as_int(cnodes.z);
+        node_addr_child1 = __float_as_int(cnodes.w);
+
+        if (traverse_mask == 3) {
+          /* Both children were intersected, push the farther one. */
+          bool is_closest_child1 = (dist[1] < dist[0]);
+          if (is_closest_child1) {
+            int tmp = node_addr;
+            node_addr = node_addr_child1;
+            node_addr_child1 = tmp;
+          }
+
+          ++stack_ptr;
+          kernel_assert(stack_ptr < BVH_STACK_SIZE);
+          traversal_stack[stack_ptr] = node_addr_child1;
+        }
+        else {
+          /* One child was intersected. */
+          if (traverse_mask == 2) {
+            node_addr = node_addr_child1;
+          }
+          else if (traverse_mask == 0) {
+            /* Neither child was intersected. */
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+
+      /* if node is leaf, fetch triangle list */
+      if (node_addr < 0) {
+        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
+        int prim_addr = __float_as_int(leaf.x);
+
+        if (prim_addr >= 0) {
+          const int prim_addr2 = __float_as_int(leaf.y);
+          const uint type = __float_as_int(leaf.w);
+          bool hit;
+
+          /* pop */
+          node_addr = traversal_stack[stack_ptr];
+          --stack_ptr;
+
+          /* primitive intersection */
+          switch (type & PRIMITIVE_ALL) {
+            case PRIMITIVE_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                hit = triangle_intersect(
+                    kg, isect_array, P, dir, isect_t, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
+                  num_hits_in_instance++;
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
+                    if (object != OBJECT_NONE) {
+#if BVH_FEATURE(BVH_MOTION)
+                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+#else
+                      Transform itfm = object_fetch_transform(
+                          kg, object, OBJECT_INVERSE_TRANSFORM);
+                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+#endif
+                      for (int i = 0; i < num_hits_in_instance; i++) {
+                        (isect_array - i - 1)->t *= t_fac;
+                      }
+                    }
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
+#if BVH_FEATURE(BVH_MOTION)
+            case PRIMITIVE_MOTION_TRIANGLE: {
+              /* intersect ray against primitive */
+              for (; prim_addr < prim_addr2; prim_addr++) {
+                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
+                /* only primitives from volume object */
+                uint tri_object = (object == OBJECT_NONE) ?
+                                      kernel_tex_fetch(__prim_object, prim_addr) :
+                                      object;
+                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+                  continue;
+                }
+                hit = motion_triangle_intersect(
+                    kg, isect_array, P, dir, isect_t, ray->time, visibility, object, prim_addr);
+                if (hit) {
+                  /* Move on to next entry in intersections array. */
+                  isect_array++;
+                  num_hits++;
+                  num_hits_in_instance++;
+                  isect_array->t = isect_t;
+                  if (num_hits == max_hits) {
+                    if (object != OBJECT_NONE) {
+#  if BVH_FEATURE(BVH_MOTION)
+                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
+#  else
+                      Transform itfm = object_fetch_transform(
+                          kg, object, OBJECT_INVERSE_TRANSFORM);
+                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
+#  endif
+                      for (int i = 0; i < num_hits_in_instance; i++) {
+                        (isect_array - i - 1)->t *= t_fac;
+                      }
+                    }
+                    return num_hits;
+                  }
+                }
+              }
+              break;
+            }
+#endif /* BVH_MOTION */
+            default: {
+              break;
+            }
+          }
+        }
+        else {
+          /* instance push */
+          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
+          int object_flag = kernel_tex_fetch(__object_flag, object);
+          if (object_flag & SD_OBJECT_HAS_VOLUME) {
+#if BVH_FEATURE(BVH_MOTION)
+            isect_t *= bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &ob_itfm);
+#else
+            isect_t *= bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+#endif
+
+            num_hits_in_instance = 0;
+            isect_array->t = isect_t;
+
+            ++stack_ptr;
+            kernel_assert(stack_ptr < BVH_STACK_SIZE);
+            traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
+
+            node_addr = kernel_tex_fetch(__object_node, object);
+          }
+          else {
+            /* pop */
+            object = OBJECT_NONE;
+            node_addr = traversal_stack[stack_ptr];
+            --stack_ptr;
+          }
+        }
+      }
+    } while (node_addr != ENTRYPOINT_SENTINEL);
+
+    if (stack_ptr >= 0) {
+      kernel_assert(object != OBJECT_NONE);
+
+      /* Instance pop. */
+      if (num_hits_in_instance) {
+        float t_fac;
+#if BVH_FEATURE(BVH_MOTION)
+        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
+#else
+        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+        /* Scale isect->t to adjust for instancing. */
+        for (int i = 0; i < num_hits_in_instance; i++) {
+          (isect_array - i - 1)->t *= t_fac;
+        }
+      }
+      else {
+#if BVH_FEATURE(BVH_MOTION)
+        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
+#else
+        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
+#endif
+      }
+
+      isect_t = tmax;
+      isect_array->t = isect_t;
+
+      object = OBJECT_NONE;
+      node_addr = traversal_stack[stack_ptr];
+      --stack_ptr;
+    }
+  } while (node_addr != ENTRYPOINT_SENTINEL);
+
+  return num_hits;
+}
+
+ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals kg,
+                                         ccl_private const Ray *ray,
+                                         Intersection *isect_array,
+                                         const uint max_hits,
+                                         const uint visibility)
+{
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
+}
+
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/camera/camera.h b/intern/cycles/kernel/camera/camera.h
index 66bc25bb879..e966e9e1596 100644
--- a/intern/cycles/kernel/camera/camera.h
+++ b/intern/cycles/kernel/camera/camera.h
@@ -16,10 +16,10 @@
 
 #pragma once
 
-#include "kernel/camera/camera_projection.h"
-#include "kernel/sample/sample_mapping.h"
-#include "kernel/util/util_differential.h"
-#include "kernel/util/util_lookup_table.h"
+#include "kernel/camera/projection.h"
+#include "kernel/sample/mapping.h"
+#include "kernel/util/differential.h"
+#include "kernel/util/lookup_table.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/camera/camera_projection.h b/intern/cycles/kernel/camera/camera_projection.h
deleted file mode 100644
index 0aea82fa812..00000000000
--- a/intern/cycles/kernel/camera/camera_projection.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Parts adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Spherical coordinates <-> Cartesian direction. */
-
-ccl_device float2 direction_to_spherical(float3 dir)
-{
-  float theta = safe_acosf(dir.z);
-  float phi = atan2f(dir.x, dir.y);
-
-  return make_float2(theta, phi);
-}
-
-ccl_device float3 spherical_to_direction(float theta, float phi)
-{
-  float sin_theta = sinf(theta);
-  return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
-}
-
-/* Equirectangular coordinates <-> Cartesian direction */
-
-ccl_device float2 direction_to_equirectangular_range(float3 dir, float4 range)
-{
-  if (is_zero(dir))
-    return zero_float2();
-
-  float u = (atan2f(dir.y, dir.x) - range.y) / range.x;
-  float v = (acosf(dir.z / len(dir)) - range.w) / range.z;
-
-  return make_float2(u, v);
-}
-
-ccl_device float3 equirectangular_range_to_direction(float u, float v, float4 range)
-{
-  float phi = range.x * u + range.y;
-  float theta = range.z * v + range.w;
-  float sin_theta = sinf(theta);
-  return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
-}
-
-ccl_device float2 direction_to_equirectangular(float3 dir)
-{
-  return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
-}
-
-ccl_device float3 equirectangular_to_direction(float u, float v)
-{
-  return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
-}
-
-/* Fisheye <-> Cartesian direction */
-
-ccl_device float2 direction_to_fisheye(float3 dir, float fov)
-{
-  float r = atan2f(sqrtf(dir.y * dir.y + dir.z * dir.z), dir.x) / fov;
-  float phi = atan2f(dir.z, dir.y);
-
-  float u = r * cosf(phi) + 0.5f;
-  float v = r * sinf(phi) + 0.5f;
-
-  return make_float2(u, v);
-}
-
-ccl_device float3 fisheye_to_direction(float u, float v, float fov)
-{
-  u = (u - 0.5f) * 2.0f;
-  v = (v - 0.5f) * 2.0f;
-
-  float r = sqrtf(u * u + v * v);
-
-  if (r > 1.0f)
-    return zero_float3();
-
-  float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
-  float theta = r * fov * 0.5f;
-
-  if (v < 0.0f)
-    phi = -phi;
-
-  return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
-}
-
-ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height)
-{
-  float theta = safe_acosf(dir.x);
-  float r = 2.0f * lens * sinf(theta * 0.5f);
-  float phi = atan2f(dir.z, dir.y);
-
-  float u = r * cosf(phi) / width + 0.5f;
-  float v = r * sinf(phi) / height + 0.5f;
-
-  return make_float2(u, v);
-}
-
-ccl_device_inline float3
-fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height)
-{
-  u = (u - 0.5f) * width;
-  v = (v - 0.5f) * height;
-
-  float rmax = 2.0f * lens * sinf(fov * 0.25f);
-  float r = sqrtf(u * u + v * v);
-
-  if (r > rmax)
-    return zero_float3();
-
-  float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
-  float theta = 2.0f * asinf(r / (2.0f * lens));
-
-  if (v < 0.0f)
-    phi = -phi;
-
-  return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
-}
-
-/* Mirror Ball <-> Cartesion direction */
-
-ccl_device float3 mirrorball_to_direction(float u, float v)
-{
-  /* point on sphere */
-  float3 dir;
-
-  dir.x = 2.0f * u - 1.0f;
-  dir.z = 2.0f * v - 1.0f;
-
-  if (dir.x * dir.x + dir.z * dir.z > 1.0f)
-    return zero_float3();
-
-  dir.y = -sqrtf(max(1.0f - dir.x * dir.x - dir.z * dir.z, 0.0f));
-
-  /* reflection */
-  float3 I = make_float3(0.0f, -1.0f, 0.0f);
-
-  return 2.0f * dot(dir, I) * dir - I;
-}
-
-ccl_device float2 direction_to_mirrorball(float3 dir)
-{
-  /* inverse of mirrorball_to_direction */
-  dir.y -= 1.0f;
-
-  float div = 2.0f * sqrtf(max(-0.5f * dir.y, 0.0f));
-  if (div > 0.0f)
-    dir /= div;
-
-  float u = 0.5f * (dir.x + 1.0f);
-  float v = 0.5f * (dir.z + 1.0f);
-
-  return make_float2(u, v);
-}
-
-ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v)
-{
-  switch (cam->panorama_type) {
-    case PANORAMA_EQUIRECTANGULAR:
-      return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
-    case PANORAMA_MIRRORBALL:
-      return mirrorball_to_direction(u, v);
-    case PANORAMA_FISHEYE_EQUIDISTANT:
-      return fisheye_to_direction(u, v, cam->fisheye_fov);
-    case PANORAMA_FISHEYE_EQUISOLID:
-    default:
-      return fisheye_equisolid_to_direction(
-          u, v, cam->fisheye_lens, cam->fisheye_fov, cam->sensorwidth, cam->sensorheight);
-  }
-}
-
-ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, float3 dir)
-{
-  switch (cam->panorama_type) {
-    case PANORAMA_EQUIRECTANGULAR:
-      return direction_to_equirectangular_range(dir, cam->equirectangular_range);
-    case PANORAMA_MIRRORBALL:
-      return direction_to_mirrorball(dir);
-    case PANORAMA_FISHEYE_EQUIDISTANT:
-      return direction_to_fisheye(dir, cam->fisheye_fov);
-    case PANORAMA_FISHEYE_EQUISOLID:
-    default:
-      return direction_to_fisheye_equisolid(
-          dir, cam->fisheye_lens, cam->sensorwidth, cam->sensorheight);
-  }
-}
-
-ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam,
-                                                  ccl_private float3 *P,
-                                                  ccl_private float3 *D)
-{
-  float interocular_offset = cam->interocular_offset;
-
-  /* Interocular offset of zero means either non stereo, or stereo without
-   * spherical stereo. */
-  kernel_assert(interocular_offset != 0.0f);
-
-  if (cam->pole_merge_angle_to > 0.0f) {
-    const float pole_merge_angle_from = cam->pole_merge_angle_from,
-                pole_merge_angle_to = cam->pole_merge_angle_to;
-    float altitude = fabsf(safe_asinf((*D).z));
-    if (altitude > pole_merge_angle_to) {
-      interocular_offset = 0.0f;
-    }
-    else if (altitude > pole_merge_angle_from) {
-      float fac = (altitude - pole_merge_angle_from) /
-                  (pole_merge_angle_to - pole_merge_angle_from);
-      float fade = cosf(fac * M_PI_2_F);
-      interocular_offset *= fade;
-    }
-  }
-
-  float3 up = make_float3(0.0f, 0.0f, 1.0f);
-  float3 side = normalize(cross(*D, up));
-  float3 stereo_offset = side * interocular_offset;
-
-  *P += stereo_offset;
-
-  /* Convergence distance is FLT_MAX in the case of parallel convergence mode,
-   * no need to modify direction in this case either. */
-  const float convergence_distance = cam->convergence_distance;
-
-  if (convergence_distance != FLT_MAX) {
-    float3 screen_offset = convergence_distance * (*D);
-    *D = normalize(screen_offset - stereo_offset);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/camera/projection.h b/intern/cycles/kernel/camera/projection.h
new file mode 100644
index 00000000000..0aea82fa812
--- /dev/null
+++ b/intern/cycles/kernel/camera/projection.h
@@ -0,0 +1,258 @@
+/*
+ * Parts adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Spherical coordinates <-> Cartesian direction. */
+
+ccl_device float2 direction_to_spherical(float3 dir)
+{
+  float theta = safe_acosf(dir.z);
+  float phi = atan2f(dir.x, dir.y);
+
+  return make_float2(theta, phi);
+}
+
+ccl_device float3 spherical_to_direction(float theta, float phi)
+{
+  float sin_theta = sinf(theta);
+  return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
+}
+
+/* Equirectangular coordinates <-> Cartesian direction */
+
+ccl_device float2 direction_to_equirectangular_range(float3 dir, float4 range)
+{
+  if (is_zero(dir))
+    return zero_float2();
+
+  float u = (atan2f(dir.y, dir.x) - range.y) / range.x;
+  float v = (acosf(dir.z / len(dir)) - range.w) / range.z;
+
+  return make_float2(u, v);
+}
+
+ccl_device float3 equirectangular_range_to_direction(float u, float v, float4 range)
+{
+  float phi = range.x * u + range.y;
+  float theta = range.z * v + range.w;
+  float sin_theta = sinf(theta);
+  return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta));
+}
+
+ccl_device float2 direction_to_equirectangular(float3 dir)
+{
+  return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
+}
+
+ccl_device float3 equirectangular_to_direction(float u, float v)
+{
+  return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F));
+}
+
+/* Fisheye <-> Cartesian direction */
+
+ccl_device float2 direction_to_fisheye(float3 dir, float fov)
+{
+  float r = atan2f(sqrtf(dir.y * dir.y + dir.z * dir.z), dir.x) / fov;
+  float phi = atan2f(dir.z, dir.y);
+
+  float u = r * cosf(phi) + 0.5f;
+  float v = r * sinf(phi) + 0.5f;
+
+  return make_float2(u, v);
+}
+
+ccl_device float3 fisheye_to_direction(float u, float v, float fov)
+{
+  u = (u - 0.5f) * 2.0f;
+  v = (v - 0.5f) * 2.0f;
+
+  float r = sqrtf(u * u + v * v);
+
+  if (r > 1.0f)
+    return zero_float3();
+
+  float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
+  float theta = r * fov * 0.5f;
+
+  if (v < 0.0f)
+    phi = -phi;
+
+  return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
+}
+
+ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height)
+{
+  float theta = safe_acosf(dir.x);
+  float r = 2.0f * lens * sinf(theta * 0.5f);
+  float phi = atan2f(dir.z, dir.y);
+
+  float u = r * cosf(phi) / width + 0.5f;
+  float v = r * sinf(phi) / height + 0.5f;
+
+  return make_float2(u, v);
+}
+
+ccl_device_inline float3
+fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height)
+{
+  u = (u - 0.5f) * width;
+  v = (v - 0.5f) * height;
+
+  float rmax = 2.0f * lens * sinf(fov * 0.25f);
+  float r = sqrtf(u * u + v * v);
+
+  if (r > rmax)
+    return zero_float3();
+
+  float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f);
+  float theta = 2.0f * asinf(r / (2.0f * lens));
+
+  if (v < 0.0f)
+    phi = -phi;
+
+  return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta));
+}
+
+/* Mirror Ball <-> Cartesion direction */
+
+ccl_device float3 mirrorball_to_direction(float u, float v)
+{
+  /* point on sphere */
+  float3 dir;
+
+  dir.x = 2.0f * u - 1.0f;
+  dir.z = 2.0f * v - 1.0f;
+
+  if (dir.x * dir.x + dir.z * dir.z > 1.0f)
+    return zero_float3();
+
+  dir.y = -sqrtf(max(1.0f - dir.x * dir.x - dir.z * dir.z, 0.0f));
+
+  /* reflection */
+  float3 I = make_float3(0.0f, -1.0f, 0.0f);
+
+  return 2.0f * dot(dir, I) * dir - I;
+}
+
+ccl_device float2 direction_to_mirrorball(float3 dir)
+{
+  /* inverse of mirrorball_to_direction */
+  dir.y -= 1.0f;
+
+  float div = 2.0f * sqrtf(max(-0.5f * dir.y, 0.0f));
+  if (div > 0.0f)
+    dir /= div;
+
+  float u = 0.5f * (dir.x + 1.0f);
+  float v = 0.5f * (dir.z + 1.0f);
+
+  return make_float2(u, v);
+}
+
+ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v)
+{
+  switch (cam->panorama_type) {
+    case PANORAMA_EQUIRECTANGULAR:
+      return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
+    case PANORAMA_MIRRORBALL:
+      return mirrorball_to_direction(u, v);
+    case PANORAMA_FISHEYE_EQUIDISTANT:
+      return fisheye_to_direction(u, v, cam->fisheye_fov);
+    case PANORAMA_FISHEYE_EQUISOLID:
+    default:
+      return fisheye_equisolid_to_direction(
+          u, v, cam->fisheye_lens, cam->fisheye_fov, cam->sensorwidth, cam->sensorheight);
+  }
+}
+
+ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, float3 dir)
+{
+  switch (cam->panorama_type) {
+    case PANORAMA_EQUIRECTANGULAR:
+      return direction_to_equirectangular_range(dir, cam->equirectangular_range);
+    case PANORAMA_MIRRORBALL:
+      return direction_to_mirrorball(dir);
+    case PANORAMA_FISHEYE_EQUIDISTANT:
+      return direction_to_fisheye(dir, cam->fisheye_fov);
+    case PANORAMA_FISHEYE_EQUISOLID:
+    default:
+      return direction_to_fisheye_equisolid(
+          dir, cam->fisheye_lens, cam->sensorwidth, cam->sensorheight);
+  }
+}
+
+ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam,
+                                                  ccl_private float3 *P,
+                                                  ccl_private float3 *D)
+{
+  float interocular_offset = cam->interocular_offset;
+
+  /* Interocular offset of zero means either non stereo, or stereo without
+   * spherical stereo. */
+  kernel_assert(interocular_offset != 0.0f);
+
+  if (cam->pole_merge_angle_to > 0.0f) {
+    const float pole_merge_angle_from = cam->pole_merge_angle_from,
+                pole_merge_angle_to = cam->pole_merge_angle_to;
+    float altitude = fabsf(safe_asinf((*D).z));
+    if (altitude > pole_merge_angle_to) {
+      interocular_offset = 0.0f;
+    }
+    else if (altitude > pole_merge_angle_from) {
+      float fac = (altitude - pole_merge_angle_from) /
+                  (pole_merge_angle_to - pole_merge_angle_from);
+      float fade = cosf(fac * M_PI_2_F);
+      interocular_offset *= fade;
+    }
+  }
+
+  float3 up = make_float3(0.0f, 0.0f, 1.0f);
+  float3 side = normalize(cross(*D, up));
+  float3 stereo_offset = side * interocular_offset;
+
+  *P += stereo_offset;
+
+  /* Convergence distance is FLT_MAX in the case of parallel convergence mode,
+   * no need to modify direction in this case either. */
+  const float convergence_distance = cam->convergence_distance;
+
+  if (convergence_distance != FLT_MAX) {
+    float3 screen_offset = convergence_distance * (*D);
+    *D = normalize(screen_offset - stereo_offset);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index fa88c66f536..b2a9c9555c3 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -32,7 +32,7 @@
 
 #pragma once
 
-#include "kernel/sample/sample_mapping.h"
+#include "kernel/sample/mapping.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index dd3b4500b1f..3139cb612fa 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -32,7 +32,7 @@
 
 #pragma once
 
-#include "kernel/sample/sample_mapping.h"
+#include "kernel/sample/mapping.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index 1e70d3e534e..fbb82617dad 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -32,7 +32,7 @@
 
 #pragma once
 
-#include "kernel/sample/sample_mapping.h"
+#include "kernel/sample/mapping.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index ff554d4a60e..f55ea0f6a2e 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -20,7 +20,7 @@
 #  include <fenv.h>
 #endif
 
-#include "kernel/util/util_color.h"
+#include "kernel/util/color.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 28aac368f2b..83242a73685 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -32,8 +32,11 @@
 
 #pragma once
 
-#include "kernel/sample/sample_pattern.h"
-#include "kernel/util/util_lookup_table.h"
+#include "kernel/closure/bsdf_util.h"
+
+#include "kernel/sample/pattern.h"
+
+#include "kernel/util/lookup_table.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
index b1ab8d7ffd0..77370fbec4e 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "kernel/sample/sample_lcg.h"
-#include "kernel/sample/sample_mapping.h"
+#include "kernel/sample/lcg.h"
+#include "kernel/sample/mapping.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 0e3b21117b5..69376c1294d 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -27,7 +27,7 @@
 
 #include "kernel/closure/bsdf_util.h"
 
-#include "kernel/sample/sample_mapping.h"
+#include "kernel/sample/mapping.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index 888c0d5d872..5ccca52255f 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -26,11 +26,11 @@
 #  pragma GCC diagnostic ignored "-Wuninitialized"
 #endif
 
-#include "util/util_half.h"
-#include "util/util_math.h"
-#include "util/util_simd.h"
-#include "util/util_texture.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/math.h"
+#include "util/simd.h"
+#include "util/texture.h"
+#include "util/types.h"
 
 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  * much slower than the double version.  This was fixed in glibc 2.16.
diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h
index f3e530a9edc..dd0327b3f94 100644
--- a/intern/cycles/kernel/device/cpu/globals.h
+++ b/intern/cycles/kernel/device/cpu/globals.h
@@ -18,8 +18,8 @@
 
 #pragma once
 
-#include "kernel/kernel_types.h"
-#include "kernel/util/util_profiling.h"
+#include "kernel/types.h"
+#include "kernel/util/profiling.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -36,7 +36,7 @@ struct OSLShadingSystem;
 
 typedef struct KernelGlobalsCPU {
 #define KERNEL_TEX(type, name) texture<type> name;
-#include "kernel/kernel_textures.h"
+#include "kernel/textures.h"
 
   KernelData __data;
 
diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp
index 8519b77aa08..a16c637d5ac 100644
--- a/intern/cycles/kernel/device/cpu/kernel.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel.cpp
@@ -85,7 +85,7 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem
     kg->tname.data = (type *)mem; \
     kg->tname.width = size; \
   }
-#include "kernel/kernel_textures.h"
+#include "kernel/textures.h"
   else {
     assert(0);
   }
diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h
index 28337a58898..c49d7ca445a 100644
--- a/intern/cycles/kernel/device/cpu/kernel.h
+++ b/intern/cycles/kernel/device/cpu/kernel.h
@@ -18,9 +18,9 @@
 
 /* CPU Kernel Interface */
 
-#include "util/util_types.h"
+#include "util/types.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 148b6a33cb5..6df5d7787fc 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -29,26 +29,26 @@
 #    include "kernel/device/cpu/globals.h"
 #    include "kernel/device/cpu/image.h"
 
-#    include "kernel/integrator/integrator_state.h"
-#    include "kernel/integrator/integrator_state_flow.h"
-#    include "kernel/integrator/integrator_state_util.h"
-
-#    include "kernel/integrator/integrator_init_from_camera.h"
-#    include "kernel/integrator/integrator_init_from_bake.h"
-#    include "kernel/integrator/integrator_intersect_closest.h"
-#    include "kernel/integrator/integrator_intersect_shadow.h"
-#    include "kernel/integrator/integrator_intersect_subsurface.h"
-#    include "kernel/integrator/integrator_intersect_volume_stack.h"
-#    include "kernel/integrator/integrator_shade_background.h"
-#    include "kernel/integrator/integrator_shade_light.h"
-#    include "kernel/integrator/integrator_shade_shadow.h"
-#    include "kernel/integrator/integrator_shade_surface.h"
-#    include "kernel/integrator/integrator_shade_volume.h"
-#    include "kernel/integrator/integrator_megakernel.h"
-
-#    include "kernel/film/film_adaptive_sampling.h"
-#    include "kernel/film/film_read.h"
-#    include "kernel/film/film_id_passes.h"
+#    include "kernel/integrator/state.h"
+#    include "kernel/integrator/state_flow.h"
+#    include "kernel/integrator/state_util.h"
+
+#    include "kernel/integrator/init_from_camera.h"
+#    include "kernel/integrator/init_from_bake.h"
+#    include "kernel/integrator/intersect_closest.h"
+#    include "kernel/integrator/intersect_shadow.h"
+#    include "kernel/integrator/intersect_subsurface.h"
+#    include "kernel/integrator/intersect_volume_stack.h"
+#    include "kernel/integrator/shade_background.h"
+#    include "kernel/integrator/shade_light.h"
+#    include "kernel/integrator/shade_shadow.h"
+#    include "kernel/integrator/shade_surface.h"
+#    include "kernel/integrator/shade_volume.h"
+#    include "kernel/integrator/megakernel.h"
+
+#    include "kernel/film/adaptive_sampling.h"
+#    include "kernel/film/read.h"
+#    include "kernel/film/id_passes.h"
 
 #    include "kernel/bake/bake.h"
 
diff --git a/intern/cycles/kernel/device/cpu/kernel_avx.cpp b/intern/cycles/kernel/device/cpu/kernel_avx.cpp
index 220768036ab..cece750a255 100644
--- a/intern/cycles/kernel/device/cpu/kernel_avx.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_avx.cpp
@@ -18,7 +18,7 @@
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
 
-#include "util/util_optimization.h"
+#include "util/optimization.h"
 
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
 #  define KERNEL_STUB
diff --git a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp
index 90c05113cbe..fad4581236e 100644
--- a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp
@@ -18,7 +18,7 @@
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
 
-#include "util/util_optimization.h"
+#include "util/optimization.h"
 
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
 #  define KERNEL_STUB
diff --git a/intern/cycles/kernel/device/cpu/kernel_sse2.cpp b/intern/cycles/kernel/device/cpu/kernel_sse2.cpp
index fb85ef5b0d0..5fb4849ac08 100644
--- a/intern/cycles/kernel/device/cpu/kernel_sse2.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_sse2.cpp
@@ -18,7 +18,7 @@
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
 
-#include "util/util_optimization.h"
+#include "util/optimization.h"
 
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
 #  define KERNEL_STUB
diff --git a/intern/cycles/kernel/device/cpu/kernel_sse3.cpp b/intern/cycles/kernel/device/cpu/kernel_sse3.cpp
index 87baf04258a..c9424682fd4 100644
--- a/intern/cycles/kernel/device/cpu/kernel_sse3.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_sse3.cpp
@@ -18,7 +18,7 @@
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
 
-#include "util/util_optimization.h"
+#include "util/optimization.h"
 
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
 #  define KERNEL_STUB
diff --git a/intern/cycles/kernel/device/cpu/kernel_sse41.cpp b/intern/cycles/kernel/device/cpu/kernel_sse41.cpp
index bb421d58815..849ebf51989 100644
--- a/intern/cycles/kernel/device/cpu/kernel_sse41.cpp
+++ b/intern/cycles/kernel/device/cpu/kernel_sse41.cpp
@@ -18,7 +18,7 @@
  * optimization flags and nearly all functions inlined, while kernel.cpp
  * is compiled without for other CPU's. */
 
-#include "util/util_optimization.h"
+#include "util/optimization.h"
 
 #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
 #  define KERNEL_STUB
diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h
index 8a50eb1a3d5..1ee82e6eb7c 100644
--- a/intern/cycles/kernel/device/cuda/compat.h
+++ b/intern/cycles/kernel/device/cuda/compat.h
@@ -137,5 +137,5 @@ __device__ float __half2float(const half h)
 
 /* Types */
 
-#include "util/util_half.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/types.h"
diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h
index cde935198b3..e5023fad40c 100644
--- a/intern/cycles/kernel/device/cuda/globals.h
+++ b/intern/cycles/kernel/device/cuda/globals.h
@@ -18,11 +18,11 @@
 
 #pragma once
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "kernel/integrator/integrator_state.h"
+#include "kernel/integrator/state.h"
 
-#include "kernel/util/util_profiling.h"
+#include "kernel/util/profiling.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -36,7 +36,7 @@ typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 /* Global scene data and textures */
 __constant__ KernelData __data;
 #define KERNEL_TEX(type, name) const __constant__ __device__ type *name;
-#include "kernel/kernel_textures.h"
+#include "kernel/textures.h"
 
 /* Integrator state */
 __constant__ IntegratorStateGPU __integrator_state;
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index aa360b3016a..f86a8c692aa 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -21,26 +21,26 @@
 #include "kernel/device/gpu/parallel_sorted_index.h"
 #include "kernel/device/gpu/work_stealing.h"
 
-#include "kernel/integrator/integrator_state.h"
-#include "kernel/integrator/integrator_state_flow.h"
-#include "kernel/integrator/integrator_state_util.h"
-
-#include "kernel/integrator/integrator_init_from_bake.h"
-#include "kernel/integrator/integrator_init_from_camera.h"
-#include "kernel/integrator/integrator_intersect_closest.h"
-#include "kernel/integrator/integrator_intersect_shadow.h"
-#include "kernel/integrator/integrator_intersect_subsurface.h"
-#include "kernel/integrator/integrator_intersect_volume_stack.h"
-#include "kernel/integrator/integrator_shade_background.h"
-#include "kernel/integrator/integrator_shade_light.h"
-#include "kernel/integrator/integrator_shade_shadow.h"
-#include "kernel/integrator/integrator_shade_surface.h"
-#include "kernel/integrator/integrator_shade_volume.h"
+#include "kernel/integrator/state.h"
+#include "kernel/integrator/state_flow.h"
+#include "kernel/integrator/state_util.h"
+
+#include "kernel/integrator/init_from_bake.h"
+#include "kernel/integrator/init_from_camera.h"
+#include "kernel/integrator/intersect_closest.h"
+#include "kernel/integrator/intersect_shadow.h"
+#include "kernel/integrator/intersect_subsurface.h"
+#include "kernel/integrator/intersect_volume_stack.h"
+#include "kernel/integrator/shade_background.h"
+#include "kernel/integrator/shade_light.h"
+#include "kernel/integrator/shade_shadow.h"
+#include "kernel/integrator/shade_surface.h"
+#include "kernel/integrator/shade_volume.h"
 
 #include "kernel/bake/bake.h"
 
-#include "kernel/film/film_adaptive_sampling.h"
-#include "kernel/film/film_read.h"
+#include "kernel/film/adaptive_sampling.h"
+#include "kernel/film/read.h"
 
 /* --------------------------------------------------------------------
  * Integrator.
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h
index db4a4bf71e0..d7416beb783 100644
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN
  *
  * Shared memory requirement is `sizeof(int) * (number_of_warps + 1)`. */
 
-#include "util/util_atomic.h"
+#include "util/atomic.h"
 
 #ifdef __HIP__
 #  define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024
diff --git a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
index aabe6e2e27a..6de3a022569 100644
--- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
+++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
@@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN
  * This is used for an array the size of the number of shaders in the scene
  * which is not usually huge, so might not be a significant bottleneck. */
 
-#include "util/util_atomic.h"
+#include "util/atomic.h"
 
 #ifdef __HIP__
 #  define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
diff --git a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
index 7570c5a6bbd..c06d7be444f 100644
--- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h
@@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN
  *
  * TODO: there may be ways to optimize this to avoid this many atomic ops? */
 
-#include "util/util_atomic.h"
+#include "util/atomic.h"
 
 #ifdef __HIP__
 #  define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024
diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h
index 089976d84e4..282c3eca641 100644
--- a/intern/cycles/kernel/device/hip/compat.h
+++ b/intern/cycles/kernel/device/hip/compat.h
@@ -116,5 +116,5 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object tex
 
 /* Types */
 
-#include "util/util_half.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/types.h"
diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h
index 079944bd8f2..d9a560d668b 100644
--- a/intern/cycles/kernel/device/hip/globals.h
+++ b/intern/cycles/kernel/device/hip/globals.h
@@ -18,11 +18,11 @@
 
 #pragma once
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "kernel/integrator/integrator_state.h"
+#include "kernel/integrator/state.h"
 
-#include "kernel/util/util_profiling.h"
+#include "kernel/util/profiling.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -36,7 +36,7 @@ typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals;
 /* Global scene data and textures */
 __constant__ KernelData __data;
 #define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name;
-#include "kernel/kernel_textures.h"
+#include "kernel/textures.h"
 
 /* Integrator state */
 __constant__ IntegratorStateGPU __integrator_state;
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index d27b7d55475..835e4621d47 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -129,5 +129,5 @@ __device__ float __half2float(const half h)
 
 /* Types */
 
-#include "util/util_half.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/types.h"
diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h
index e038bc1797a..e9b72369cd5 100644
--- a/intern/cycles/kernel/device/optix/globals.h
+++ b/intern/cycles/kernel/device/optix/globals.h
@@ -18,11 +18,11 @@
 
 #pragma once
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "kernel/integrator/integrator_state.h"
+#include "kernel/integrator/state.h"
 
-#include "kernel/util/util_profiling.h"
+#include "kernel/util/profiling.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -42,7 +42,7 @@ struct KernelParamsOptiX {
   /* Global scene data and textures */
   KernelData data;
 #define KERNEL_TEX(type, name) const type *name;
-#include "kernel/kernel_textures.h"
+#include "kernel/textures.h"
 
   /* Integrator state */
   IntegratorStateGPU __integrator_state;
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index a3bafb9846c..6989219cd9f 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -21,14 +21,14 @@
 
 #include "kernel/device/gpu/image.h"  /* Texture lookup uses normal CUDA intrinsics. */
 
-#include "kernel/integrator/integrator_state.h"
-#include "kernel/integrator/integrator_state_flow.h"
-#include "kernel/integrator/integrator_state_util.h"
-
-#include "kernel/integrator/integrator_intersect_closest.h"
-#include "kernel/integrator/integrator_intersect_shadow.h"
-#include "kernel/integrator/integrator_intersect_subsurface.h"
-#include "kernel/integrator/integrator_intersect_volume_stack.h"
+#include "kernel/integrator/state.h"
+#include "kernel/integrator/state_flow.h"
+#include "kernel/integrator/state_util.h"
+
+#include "kernel/integrator/intersect_closest.h"
+#include "kernel/integrator/intersect_shadow.h"
+#include "kernel/integrator/intersect_subsurface.h"
+#include "kernel/integrator/intersect_volume_stack.h"
 
 // clang-format on
 
diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
index bf787e29eaa..071e9deae0b 100644
--- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
+++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
@@ -18,7 +18,8 @@
  * much longer to compiler. This is only loaded when needed by the scene. */
 
 #include "kernel/device/optix/kernel.cu"
-#include "kernel/integrator/integrator_shade_surface.h"
+
+#include "kernel/integrator/shade_surface.h"
 
 extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytrace()
 {
diff --git a/intern/cycles/kernel/film/accumulate.h b/intern/cycles/kernel/film/accumulate.h
new file mode 100644
index 00000000000..30e1afea751
--- /dev/null
+++ b/intern/cycles/kernel/film/accumulate.h
@@ -0,0 +1,559 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/adaptive_sampling.h"
+#include "kernel/film/write_passes.h"
+
+#include "kernel/integrator/shadow_catcher.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* --------------------------------------------------------------------
+ * BSDF Evaluation
+ *
+ * BSDF evaluation result, split between diffuse and glossy. This is used to
+ * accumulate render passes separately. Note that reflection, transmission
+ * and volume scattering are written to different render passes, but we assume
+ * that only one of those can happen at a bounce, and so do not need to accumulate
+ * them separately. */
+
+ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
+                                      const bool is_diffuse,
+                                      float3 value)
+{
+  eval->diffuse = zero_float3();
+  eval->glossy = zero_float3();
+
+  if (is_diffuse) {
+    eval->diffuse = value;
+  }
+  else {
+    eval->glossy = value;
+  }
+}
+
+ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
+                                       const bool is_diffuse,
+                                       float3 value,
+                                       float mis_weight)
+{
+  value *= mis_weight;
+
+  if (is_diffuse) {
+    eval->diffuse += value;
+  }
+  else {
+    eval->glossy += value;
+  }
+}
+
+ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
+{
+  return is_zero(eval->diffuse) && is_zero(eval->glossy);
+}
+
+ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
+{
+  eval->diffuse *= value;
+  eval->glossy *= value;
+}
+
+ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
+{
+  eval->diffuse *= value;
+  eval->glossy *= value;
+}
+
+ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
+{
+  return eval->diffuse + eval->glossy;
+}
+
+ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
+{
+  /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
+   * We assume reflection, transmission and volume scatter to be exclusive. */
+  return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
+}
+
+/* --------------------------------------------------------------------
+ * Clamping
+ *
+ * Clamping is done on a per-contribution basis so that we can write directly
+ * to render buffers instead of using per-thread memory, and to avoid the
+ * impact of clamping on other contributions. */
+
+ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce)
+{
+#ifdef __KERNEL_DEBUG_NAN__
+  if (!isfinite3_safe(*L)) {
+    kernel_assert(!"Cycles sample with non-finite value detected");
+  }
+#endif
+  /* Make sure all components are finite, allowing the contribution to be usable by adaptive
+   * sampling convergence check, but also to make it so render result never causes issues with
+   * post-processing. */
+  *L = ensure_finite3(*L);
+
+#ifdef __CLAMP_SAMPLE__
+  float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
+                               kernel_data.integrator.sample_clamp_direct;
+  float sum = reduce_add(fabs(*L));
+  if (sum > limit) {
+    *L *= limit / sum;
+  }
+#endif
+}
+
+/* --------------------------------------------------------------------
+ * Pass accumulation utilities.
+ */
+
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
+    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                        kernel_data.film.pass_stride;
+  return render_buffer + render_buffer_offset;
+}
+
+/* --------------------------------------------------------------------
+ * Adaptive sampling.
+ */
+
+ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
+                                          ConstIntegratorState state,
+                                          ccl_global float *ccl_restrict render_buffer,
+                                          int sample)
+{
+  if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
+    return sample;
+  }
+
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+
+  return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
+}
+
+ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
+                                             const int sample,
+                                             const float3 contribution,
+                                             ccl_global float *ccl_restrict buffer)
+{
+  /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
+   * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
+   * Carlo global illumination" except that here it is applied per pixel and not in hierarchical
+   * tiles. */
+
+  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
+    return;
+  }
+
+  if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_adaptive_aux_buffer,
+        make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f));
+  }
+}
+
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+#ifdef __SHADOW_CATCHER__
+
+/* Accumulate contribution to the Shadow Catcher pass.
+ *
+ * Returns truth if the contribution is fully handled here and is not to be added to the other
+ * passes (like combined, adaptive sampling). */
+
+ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
+                                            const uint32_t path_flag,
+                                            const float3 contribution,
+                                            ccl_global float *ccl_restrict buffer)
+{
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
+  }
+
+  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(path_flag)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
+    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+     * sampling is based on how noisy the combined pass is as if there were no catchers in the
+     * scene. */
+  }
+
+  /* Shadow catcher pass. */
+  if (kernel_shadow_catcher_is_object_pass(path_flag)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+    return true;
+  }
+
+  return false;
+}
+
+ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
+                                                        const uint32_t path_flag,
+                                                        const float3 contribution,
+                                                        const float transparent,
+                                                        ccl_global float *ccl_restrict buffer)
+{
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
+  }
+
+  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+  if (path_flag & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+    return true;
+  }
+
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(path_flag)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_shadow_catcher_matte,
+        make_float4(contribution.x, contribution.y, contribution.z, transparent));
+    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
+     * sampling is based on how noisy the combined pass is as if there were no catchers in the
+     * scene. */
+  }
+
+  /* Shadow catcher pass. */
+  if (kernel_shadow_catcher_is_object_pass(path_flag)) {
+    /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
+     * calculation and the alpha channel of the pass contains numbers of samples contributed to a
+     * pixel of the pass. */
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+    return true;
+  }
+
+  return false;
+}
+
+ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
+                                                             const uint32_t path_flag,
+                                                             const float transparent,
+                                                             ccl_global float *ccl_restrict buffer)
+{
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return;
+  }
+
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+  /* Matte pass. */
+  if (kernel_shadow_catcher_is_matte_path(path_flag)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
+  }
+}
+
+#endif /* __SHADOW_CATCHER__ */
+
+/* --------------------------------------------------------------------
+ * Render passes.
+ */
+
+/* Write combined pass. */
+ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg,
+                                                  const uint32_t path_flag,
+                                                  const int sample,
+                                                  const float3 contribution,
+                                                  ccl_global float *ccl_restrict buffer)
+{
+#ifdef __SHADOW_CATCHER__
+  if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) {
+    return;
+  }
+#endif
+
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
+  }
+
+  kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
+}
+
+/* Write combined pass with transparency. */
+ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg,
+                                                              const uint32_t path_flag,
+                                                              const int sample,
+                                                              const float3 contribution,
+                                                              const float transparent,
+                                                              ccl_global float *ccl_restrict
+                                                                  buffer)
+{
+#ifdef __SHADOW_CATCHER__
+  if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) {
+    return;
+  }
+#endif
+
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float4(
+        buffer + kernel_data.film.pass_combined,
+        make_float4(contribution.x, contribution.y, contribution.z, transparent));
+  }
+
+  kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
+}
+
+/* Write background or emission to appropriate pass. */
+ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg,
+                                                                ConstIntegratorState state,
+                                                                float3 contribution,
+                                                                ccl_global float *ccl_restrict
+                                                                    buffer,
+                                                                const int pass)
+{
+  if (!(kernel_data.film.light_pass_flag & PASS_ANY)) {
+    return;
+  }
+
+#ifdef __PASSES__
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+  int pass_offset = PASS_UNUSED;
+
+  /* Denoising albedo. */
+#  ifdef __DENOISING_FEATURES__
+  if (path_flag & PATH_RAY_DENOISING_FEATURES) {
+    if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+      const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+          state, path, denoising_feature_throughput);
+      const float3 denoising_albedo = denoising_feature_throughput * contribution;
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+    }
+  }
+#  endif /* __DENOISING_FEATURES__ */
+
+  if (!(path_flag & PATH_RAY_ANY_PASS)) {
+    /* Directly visible, write to emission or background pass. */
+    pass_offset = pass;
+  }
+  else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+    /* Indirectly visible through reflection. */
+    const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+                                       ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
+                                            kernel_data.film.pass_glossy_direct :
+                                            kernel_data.film.pass_glossy_indirect) :
+                                       ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
+                                            kernel_data.film.pass_transmission_direct :
+                                            kernel_data.film.pass_transmission_indirect);
+
+    if (glossy_pass_offset != PASS_UNUSED) {
+      /* Glossy is a subset of the throughput, reconstruct it here using the
+       * diffuse-glossy ratio. */
+      const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+      const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+      kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+    }
+
+    /* Reconstruct diffuse subset of throughput. */
+    pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
+                      kernel_data.film.pass_diffuse_direct :
+                      kernel_data.film.pass_diffuse_indirect;
+    if (pass_offset != PASS_UNUSED) {
+      contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    }
+  }
+  else if (path_flag & PATH_RAY_VOLUME_PASS) {
+    /* Indirectly visible through volume. */
+    pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
+                      kernel_data.film.pass_volume_direct :
+                      kernel_data.film.pass_volume_indirect;
+  }
+
+  /* Single write call for GPU coherence. */
+  if (pass_offset != PASS_UNUSED) {
+    kernel_write_pass_float3(buffer + pass_offset, contribution);
+  }
+#endif /* __PASSES__ */
+}
+
+/* Write light contribution to render buffer. */
+ccl_device_inline void kernel_accum_light(KernelGlobals kg,
+                                          ConstIntegratorShadowState state,
+                                          ccl_global float *ccl_restrict render_buffer)
+{
+  /* The throughput for shadow paths already contains the light shader evaluation. */
+  float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput);
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce));
+
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index);
+  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                        kernel_data.film.pass_stride;
+  ccl_global float *buffer = render_buffer + render_buffer_offset;
+
+  const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
+  const int sample = INTEGRATOR_STATE(state, shadow_path, sample);
+
+  /* Ambient occlusion. */
+  if (path_flag & PATH_RAY_SHADOW_FOR_AO) {
+    if ((kernel_data.kernel_features & KERNEL_FEATURE_AO_PASS) && (path_flag & PATH_RAY_CAMERA)) {
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, contribution);
+    }
+    if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) {
+      const float3 ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput);
+      kernel_accum_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer);
+    }
+    return;
+  }
+
+  /* Direct light shadow. */
+  kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
+
+#ifdef __PASSES__
+  if (kernel_data.film.light_pass_flag & PASS_ANY) {
+    const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
+    int pass_offset = PASS_UNUSED;
+
+    if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
+      /* Indirectly visible through reflection. */
+      const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
+                                         ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+                                              kernel_data.film.pass_glossy_direct :
+                                              kernel_data.film.pass_glossy_indirect) :
+                                         ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+                                              kernel_data.film.pass_transmission_direct :
+                                              kernel_data.film.pass_transmission_indirect);
+
+      if (glossy_pass_offset != PASS_UNUSED) {
+        /* Glossy is a subset of the throughput, reconstruct it here using the
+         * diffuse-glossy ratio. */
+        const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
+        const float3 glossy_contribution = (one_float3() - ratio) * contribution;
+        kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
+      }
+
+      /* Reconstruct diffuse subset of throughput. */
+      pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+                        kernel_data.film.pass_diffuse_direct :
+                        kernel_data.film.pass_diffuse_indirect;
+      if (pass_offset != PASS_UNUSED) {
+        contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
+      }
+    }
+    else if (path_flag & PATH_RAY_VOLUME_PASS) {
+      /* Indirectly visible through volume. */
+      pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
+                        kernel_data.film.pass_volume_direct :
+                        kernel_data.film.pass_volume_indirect;
+    }
+
+    /* Single write call for GPU coherence. */
+    if (pass_offset != PASS_UNUSED) {
+      kernel_write_pass_float3(buffer + pass_offset, contribution);
+    }
+
+    /* Write shadow pass. */
+    if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
+        (path_flag & PATH_RAY_CAMERA)) {
+      const float3 unshadowed_throughput = INTEGRATOR_STATE(
+          state, shadow_path, unshadowed_throughput);
+      const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+      const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) *
+                            kernel_data.film.pass_shadow_scale;
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
+    }
+  }
+#endif
+}
+
+/* Write transparency to render buffer.
+ *
+ * Note that we accumulate transparency = 1 - alpha in the render buffer.
+ * Otherwise we'd have to write alpha on path termination, which happens
+ * in many places. */
+ccl_device_inline void kernel_accum_transparent(KernelGlobals kg,
+                                                ConstIntegratorState state,
+                                                const uint32_t path_flag,
+                                                const float transparent,
+                                                ccl_global float *ccl_restrict buffer)
+{
+  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
+  }
+
+  kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer);
+}
+
+/* Write holdout to render buffer. */
+ccl_device_inline void kernel_accum_holdout(KernelGlobals kg,
+                                            ConstIntegratorState state,
+                                            const uint32_t path_flag,
+                                            const float transparent,
+                                            ccl_global float *ccl_restrict render_buffer)
+{
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+  kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
+}
+
+/* Write background contribution to render buffer.
+ *
+ * Includes transparency, matching kernel_accum_transparent. */
+ccl_device_inline void kernel_accum_background(KernelGlobals kg,
+                                               ConstIntegratorState state,
+                                               const float3 L,
+                                               const float transparent,
+                                               const bool is_transparent_background_ray,
+                                               ccl_global float *ccl_restrict render_buffer)
+{
+  float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  if (is_transparent_background_ray) {
+    kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
+  }
+  else {
+    const int sample = INTEGRATOR_STATE(state, path, sample);
+    kernel_accum_combined_transparent_pass(
+        kg, path_flag, sample, contribution, transparent, buffer);
+  }
+  kernel_accum_emission_or_background_pass(
+      kg, state, contribution, buffer, kernel_data.film.pass_background);
+}
+
+/* Write emission to render buffer. */
+ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
+                                             ConstIntegratorState state,
+                                             const float3 throughput,
+                                             const float3 L,
+                                             ccl_global float *ccl_restrict render_buffer)
+{
+  float3 contribution = throughput * L;
+  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+
+  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+  const int sample = INTEGRATOR_STATE(state, path, sample);
+
+  kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
+  kernel_accum_emission_or_background_pass(
+      kg, state, contribution, buffer, kernel_data.film.pass_emission);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/adaptive_sampling.h b/intern/cycles/kernel/film/adaptive_sampling.h
new file mode 100644
index 00000000000..468c5d4486e
--- /dev/null
+++ b/intern/cycles/kernel/film/adaptive_sampling.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/write_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Check whether the pixel has converged and should not be sampled anymore. */
+
+ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg,
+                                                     ConstIntegratorState state,
+                                                     ccl_global float *render_buffer)
+{
+  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
+    return true;
+  }
+
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                        kernel_data.film.pass_stride;
+  ccl_global float *buffer = render_buffer + render_buffer_offset;
+
+  const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+  return buffer[aux_w_offset] == 0.0f;
+}
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg,
+                                                           ccl_global float *render_buffer,
+                                                           int x,
+                                                           int y,
+                                                           float threshold,
+                                                           bool reset,
+                                                           int offset,
+                                                           int stride)
+{
+  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED);
+
+  const int render_pixel_index = offset + x + y * stride;
+  ccl_global float *buffer = render_buffer +
+                             (uint64_t)render_pixel_index * kernel_data.film.pass_stride;
+
+  /* TODO(Stefan): Is this better in linear, sRGB or something else? */
+
+  const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+  if (!reset && A.w != 0.0f) {
+    /* If the pixel was considered converged, its state will not change in this kernel. Early
+     * output before doing any math.
+     *
+     * TODO(sergey): On a GPU it might be better to keep thread alive for better coherency? */
+    return true;
+  }
+
+  const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
+
+  const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
+  const float inv_sample = 1.0f / sample;
+
+  /* The per pixel error as seen in section 2.1 of
+   * "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
+  const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
+                                 inv_sample;
+  const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
+  /* A small epsilon is added to the divisor to prevent division by zero. */
+  const float error = error_difference / (0.0001f + error_normalize);
+  const bool did_converge = (error < threshold);
+
+  const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+  buffer[aux_w_offset] = did_converge;
+
+  return did_converge;
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
+                                                  ccl_global float *render_buffer,
+                                                  int y,
+                                                  int start_x,
+                                                  int width,
+                                                  int offset,
+                                                  int stride)
+{
+  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
+
+  bool prev = false;
+  for (int x = start_x; x < start_x + width; ++x) {
+    int index = offset + x + y * stride;
+    ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+    const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+
+    if (buffer[aux_w_offset] == 0.0f) {
+      if (x > start_x && !prev) {
+        index = index - 1;
+        buffer = render_buffer + index * kernel_data.film.pass_stride;
+        buffer[aux_w_offset] = 0.0f;
+      }
+      prev = true;
+    }
+    else {
+      if (prev) {
+        buffer[aux_w_offset] = 0.0f;
+      }
+      prev = false;
+    }
+  }
+}
+
+ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg,
+                                                  ccl_global float *render_buffer,
+                                                  int x,
+                                                  int start_y,
+                                                  int height,
+                                                  int offset,
+                                                  int stride)
+{
+  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
+
+  bool prev = false;
+  for (int y = start_y; y < start_y + height; ++y) {
+    int index = offset + x + y * stride;
+    ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+    const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+
+    if (buffer[aux_w_offset] == 0.0f) {
+      if (y > start_y && !prev) {
+        index = index - stride;
+        buffer = render_buffer + index * kernel_data.film.pass_stride;
+        buffer[aux_w_offset] = 0.0f;
+      }
+      prev = true;
+    }
+    else {
+      if (prev) {
+        buffer[aux_w_offset] = 0.0f;
+      }
+      prev = false;
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_accumulate.h b/intern/cycles/kernel/film/film_accumulate.h
deleted file mode 100644
index 91424fdbe21..00000000000
--- a/intern/cycles/kernel/film/film_accumulate.h
+++ /dev/null
@@ -1,559 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/film/film_adaptive_sampling.h"
-#include "kernel/film/film_write_passes.h"
-
-#include "kernel/integrator/integrator_shadow_catcher.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* --------------------------------------------------------------------
- * BSDF Evaluation
- *
- * BSDF evaluation result, split between diffuse and glossy. This is used to
- * accumulate render passes separately. Note that reflection, transmission
- * and volume scattering are written to different render passes, but we assume
- * that only one of those can happen at a bounce, and so do not need to accumulate
- * them separately. */
-
-ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
-                                      const bool is_diffuse,
-                                      float3 value)
-{
-  eval->diffuse = zero_float3();
-  eval->glossy = zero_float3();
-
-  if (is_diffuse) {
-    eval->diffuse = value;
-  }
-  else {
-    eval->glossy = value;
-  }
-}
-
-ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
-                                       const bool is_diffuse,
-                                       float3 value,
-                                       float mis_weight)
-{
-  value *= mis_weight;
-
-  if (is_diffuse) {
-    eval->diffuse += value;
-  }
-  else {
-    eval->glossy += value;
-  }
-}
-
-ccl_device_inline bool bsdf_eval_is_zero(ccl_private BsdfEval *eval)
-{
-  return is_zero(eval->diffuse) && is_zero(eval->glossy);
-}
-
-ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
-{
-  eval->diffuse *= value;
-  eval->glossy *= value;
-}
-
-ccl_device_inline void bsdf_eval_mul3(ccl_private BsdfEval *eval, float3 value)
-{
-  eval->diffuse *= value;
-  eval->glossy *= value;
-}
-
-ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
-{
-  return eval->diffuse + eval->glossy;
-}
-
-ccl_device_inline float3 bsdf_eval_diffuse_glossy_ratio(ccl_private const BsdfEval *eval)
-{
-  /* Ratio of diffuse and glossy to recover proportions for writing to render pass.
-   * We assume reflection, transmission and volume scatter to be exclusive. */
-  return safe_divide_float3_float3(eval->diffuse, eval->diffuse + eval->glossy);
-}
-
-/* --------------------------------------------------------------------
- * Clamping
- *
- * Clamping is done on a per-contribution basis so that we can write directly
- * to render buffers instead of using per-thread memory, and to avoid the
- * impact of clamping on other contributions. */
-
-ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce)
-{
-#ifdef __KERNEL_DEBUG_NAN__
-  if (!isfinite3_safe(*L)) {
-    kernel_assert(!"Cycles sample with non-finite value detected");
-  }
-#endif
-  /* Make sure all components are finite, allowing the contribution to be usable by adaptive
-   * sampling convergence check, but also to make it so render result never causes issues with
-   * post-processing. */
-  *L = ensure_finite3(*L);
-
-#ifdef __CLAMP_SAMPLE__
-  float limit = (bounce > 0) ? kernel_data.integrator.sample_clamp_indirect :
-                               kernel_data.integrator.sample_clamp_direct;
-  float sum = reduce_add(fabs(*L));
-  if (sum > limit) {
-    *L *= limit / sum;
-  }
-#endif
-}
-
-/* --------------------------------------------------------------------
- * Pass accumulation utilities.
- */
-
-/* Get pointer to pixel in render buffer. */
-ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
-    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
-  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
-                                        kernel_data.film.pass_stride;
-  return render_buffer + render_buffer_offset;
-}
-
-/* --------------------------------------------------------------------
- * Adaptive sampling.
- */
-
-ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
-                                          ConstIntegratorState state,
-                                          ccl_global float *ccl_restrict render_buffer,
-                                          int sample)
-{
-  if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
-    return sample;
-  }
-
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
-
-  return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
-}
-
-ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
-                                             const int sample,
-                                             const float3 contribution,
-                                             ccl_global float *ccl_restrict buffer)
-{
-  /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
-   * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
-   * Carlo global illumination" except that here it is applied per pixel and not in hierarchical
-   * tiles. */
-
-  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
-    return;
-  }
-
-  if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
-    kernel_write_pass_float4(
-        buffer + kernel_data.film.pass_adaptive_aux_buffer,
-        make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f));
-  }
-}
-
-/* --------------------------------------------------------------------
- * Shadow catcher.
- */
-
-#ifdef __SHADOW_CATCHER__
-
-/* Accumulate contribution to the Shadow Catcher pass.
- *
- * Returns truth if the contribution is fully handled here and is not to be added to the other
- * passes (like combined, adaptive sampling). */
-
-ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
-                                            const uint32_t path_flag,
-                                            const float3 contribution,
-                                            ccl_global float *ccl_restrict buffer)
-{
-  if (!kernel_data.integrator.has_shadow_catcher) {
-    return false;
-  }
-
-  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
-  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
-  /* Matte pass. */
-  if (kernel_shadow_catcher_is_matte_path(path_flag)) {
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
-    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
-     * sampling is based on how noisy the combined pass is as if there were no catchers in the
-     * scene. */
-  }
-
-  /* Shadow catcher pass. */
-  if (kernel_shadow_catcher_is_object_pass(path_flag)) {
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
-    return true;
-  }
-
-  return false;
-}
-
-ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
-                                                        const uint32_t path_flag,
-                                                        const float3 contribution,
-                                                        const float transparent,
-                                                        ccl_global float *ccl_restrict buffer)
-{
-  if (!kernel_data.integrator.has_shadow_catcher) {
-    return false;
-  }
-
-  kernel_assert(kernel_data.film.pass_shadow_catcher != PASS_UNUSED);
-  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
-  if (path_flag & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
-    return true;
-  }
-
-  /* Matte pass. */
-  if (kernel_shadow_catcher_is_matte_path(path_flag)) {
-    kernel_write_pass_float4(
-        buffer + kernel_data.film.pass_shadow_catcher_matte,
-        make_float4(contribution.x, contribution.y, contribution.z, transparent));
-    /* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
-     * sampling is based on how noisy the combined pass is as if there were no catchers in the
-     * scene. */
-  }
-
-  /* Shadow catcher pass. */
-  if (kernel_shadow_catcher_is_object_pass(path_flag)) {
-    /* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
-     * calculation and the alpha channel of the pass contains numbers of samples contributed to a
-     * pixel of the pass. */
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
-    return true;
-  }
-
-  return false;
-}
-
-ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
-                                                             const uint32_t path_flag,
-                                                             const float transparent,
-                                                             ccl_global float *ccl_restrict buffer)
-{
-  if (!kernel_data.integrator.has_shadow_catcher) {
-    return;
-  }
-
-  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
-  /* Matte pass. */
-  if (kernel_shadow_catcher_is_matte_path(path_flag)) {
-    kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
-  }
-}
-
-#endif /* __SHADOW_CATCHER__ */
-
-/* --------------------------------------------------------------------
- * Render passes.
- */
-
-/* Write combined pass. */
-ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg,
-                                                  const uint32_t path_flag,
-                                                  const int sample,
-                                                  const float3 contribution,
-                                                  ccl_global float *ccl_restrict buffer)
-{
-#ifdef __SHADOW_CATCHER__
-  if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) {
-    return;
-  }
-#endif
-
-  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
-  }
-
-  kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
-}
-
-/* Write combined pass with transparency. */
-ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg,
-                                                              const uint32_t path_flag,
-                                                              const int sample,
-                                                              const float3 contribution,
-                                                              const float transparent,
-                                                              ccl_global float *ccl_restrict
-                                                                  buffer)
-{
-#ifdef __SHADOW_CATCHER__
-  if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) {
-    return;
-  }
-#endif
-
-  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
-    kernel_write_pass_float4(
-        buffer + kernel_data.film.pass_combined,
-        make_float4(contribution.x, contribution.y, contribution.z, transparent));
-  }
-
-  kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
-}
-
-/* Write background or emission to appropriate pass. */
-ccl_device_inline void kernel_accum_emission_or_background_pass(KernelGlobals kg,
-                                                                ConstIntegratorState state,
-                                                                float3 contribution,
-                                                                ccl_global float *ccl_restrict
-                                                                    buffer,
-                                                                const int pass)
-{
-  if (!(kernel_data.film.light_pass_flag & PASS_ANY)) {
-    return;
-  }
-
-#ifdef __PASSES__
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  int pass_offset = PASS_UNUSED;
-
-  /* Denoising albedo. */
-#  ifdef __DENOISING_FEATURES__
-  if (path_flag & PATH_RAY_DENOISING_FEATURES) {
-    if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
-      const float3 denoising_feature_throughput = INTEGRATOR_STATE(
-          state, path, denoising_feature_throughput);
-      const float3 denoising_albedo = denoising_feature_throughput * contribution;
-      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
-    }
-  }
-#  endif /* __DENOISING_FEATURES__ */
-
-  if (!(path_flag & PATH_RAY_ANY_PASS)) {
-    /* Directly visible, write to emission or background pass. */
-    pass_offset = pass;
-  }
-  else if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
-    /* Indirectly visible through reflection. */
-    const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
-                                       ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
-                                            kernel_data.film.pass_glossy_direct :
-                                            kernel_data.film.pass_glossy_indirect) :
-                                       ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
-                                            kernel_data.film.pass_transmission_direct :
-                                            kernel_data.film.pass_transmission_indirect);
-
-    if (glossy_pass_offset != PASS_UNUSED) {
-      /* Glossy is a subset of the throughput, reconstruct it here using the
-       * diffuse-glossy ratio. */
-      const float3 ratio = INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
-      const float3 glossy_contribution = (one_float3() - ratio) * contribution;
-      kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
-    }
-
-    /* Reconstruct diffuse subset of throughput. */
-    pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
-                      kernel_data.film.pass_diffuse_direct :
-                      kernel_data.film.pass_diffuse_indirect;
-    if (pass_offset != PASS_UNUSED) {
-      contribution *= INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
-    }
-  }
-  else if (path_flag & PATH_RAY_VOLUME_PASS) {
-    /* Indirectly visible through volume. */
-    pass_offset = (INTEGRATOR_STATE(state, path, bounce) == 1) ?
-                      kernel_data.film.pass_volume_direct :
-                      kernel_data.film.pass_volume_indirect;
-  }
-
-  /* Single write call for GPU coherence. */
-  if (pass_offset != PASS_UNUSED) {
-    kernel_write_pass_float3(buffer + pass_offset, contribution);
-  }
-#endif /* __PASSES__ */
-}
-
-/* Write light contribution to render buffer. */
-ccl_device_inline void kernel_accum_light(KernelGlobals kg,
-                                          ConstIntegratorShadowState state,
-                                          ccl_global float *ccl_restrict render_buffer)
-{
-  /* The throughput for shadow paths already contains the light shader evaluation. */
-  float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput);
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce));
-
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index);
-  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
-                                        kernel_data.film.pass_stride;
-  ccl_global float *buffer = render_buffer + render_buffer_offset;
-
-  const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
-  const int sample = INTEGRATOR_STATE(state, shadow_path, sample);
-
-  /* Ambient occlusion. */
-  if (path_flag & PATH_RAY_SHADOW_FOR_AO) {
-    if ((kernel_data.kernel_features & KERNEL_FEATURE_AO_PASS) && (path_flag & PATH_RAY_CAMERA)) {
-      kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, contribution);
-    }
-    if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) {
-      const float3 ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput);
-      kernel_accum_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer);
-    }
-    return;
-  }
-
-  /* Direct light shadow. */
-  kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
-
-#ifdef __PASSES__
-  if (kernel_data.film.light_pass_flag & PASS_ANY) {
-    const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
-    int pass_offset = PASS_UNUSED;
-
-    if (path_flag & (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS)) {
-      /* Indirectly visible through reflection. */
-      const int glossy_pass_offset = (path_flag & PATH_RAY_REFLECT_PASS) ?
-                                         ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
-                                              kernel_data.film.pass_glossy_direct :
-                                              kernel_data.film.pass_glossy_indirect) :
-                                         ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
-                                              kernel_data.film.pass_transmission_direct :
-                                              kernel_data.film.pass_transmission_indirect);
-
-      if (glossy_pass_offset != PASS_UNUSED) {
-        /* Glossy is a subset of the throughput, reconstruct it here using the
-         * diffuse-glossy ratio. */
-        const float3 ratio = INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
-        const float3 glossy_contribution = (one_float3() - ratio) * contribution;
-        kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_contribution);
-      }
-
-      /* Reconstruct diffuse subset of throughput. */
-      pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
-                        kernel_data.film.pass_diffuse_direct :
-                        kernel_data.film.pass_diffuse_indirect;
-      if (pass_offset != PASS_UNUSED) {
-        contribution *= INTEGRATOR_STATE(state, shadow_path, diffuse_glossy_ratio);
-      }
-    }
-    else if (path_flag & PATH_RAY_VOLUME_PASS) {
-      /* Indirectly visible through volume. */
-      pass_offset = (INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
-                        kernel_data.film.pass_volume_direct :
-                        kernel_data.film.pass_volume_indirect;
-    }
-
-    /* Single write call for GPU coherence. */
-    if (pass_offset != PASS_UNUSED) {
-      kernel_write_pass_float3(buffer + pass_offset, contribution);
-    }
-
-    /* Write shadow pass. */
-    if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
-        (path_flag & PATH_RAY_CAMERA)) {
-      const float3 unshadowed_throughput = INTEGRATOR_STATE(
-          state, shadow_path, unshadowed_throughput);
-      const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
-      const float3 shadow = safe_divide_float3_float3(shadowed_throughput, unshadowed_throughput) *
-                            kernel_data.film.pass_shadow_scale;
-      kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
-    }
-  }
-#endif
-}
-
-/* Write transparency to render buffer.
- *
- * Note that we accumulate transparency = 1 - alpha in the render buffer.
- * Otherwise we'd have to write alpha on path termination, which happens
- * in many places. */
-ccl_device_inline void kernel_accum_transparent(KernelGlobals kg,
-                                                ConstIntegratorState state,
-                                                const uint32_t path_flag,
-                                                const float transparent,
-                                                ccl_global float *ccl_restrict buffer)
-{
-  if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
-    kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
-  }
-
-  kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer);
-}
-
-/* Write holdout to render buffer. */
-ccl_device_inline void kernel_accum_holdout(KernelGlobals kg,
-                                            ConstIntegratorState state,
-                                            const uint32_t path_flag,
-                                            const float transparent,
-                                            ccl_global float *ccl_restrict render_buffer)
-{
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
-  kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
-}
-
-/* Write background contribution to render buffer.
- *
- * Includes transparency, matching kernel_accum_transparent. */
-ccl_device_inline void kernel_accum_background(KernelGlobals kg,
-                                               ConstIntegratorState state,
-                                               const float3 L,
-                                               const float transparent,
-                                               const bool is_transparent_background_ray,
-                                               ccl_global float *ccl_restrict render_buffer)
-{
-  float3 contribution = INTEGRATOR_STATE(state, path, throughput) * L;
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
-
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if (is_transparent_background_ray) {
-    kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
-  }
-  else {
-    const int sample = INTEGRATOR_STATE(state, path, sample);
-    kernel_accum_combined_transparent_pass(
-        kg, path_flag, sample, contribution, transparent, buffer);
-  }
-  kernel_accum_emission_or_background_pass(
-      kg, state, contribution, buffer, kernel_data.film.pass_background);
-}
-
-/* Write emission to render buffer. */
-ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
-                                             ConstIntegratorState state,
-                                             const float3 throughput,
-                                             const float3 L,
-                                             ccl_global float *ccl_restrict render_buffer)
-{
-  float3 contribution = throughput * L;
-  kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
-
-  ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  const int sample = INTEGRATOR_STATE(state, path, sample);
-
-  kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
-  kernel_accum_emission_or_background_pass(
-      kg, state, contribution, buffer, kernel_data.film.pass_emission);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_adaptive_sampling.h b/intern/cycles/kernel/film/film_adaptive_sampling.h
deleted file mode 100644
index c78b5f6b707..00000000000
--- a/intern/cycles/kernel/film/film_adaptive_sampling.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright 2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/film/film_write_passes.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Check whether the pixel has converged and should not be sampled anymore. */
-
-ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg,
-                                                     ConstIntegratorState state,
-                                                     ccl_global float *render_buffer)
-{
-  if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
-    return true;
-  }
-
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
-  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
-                                        kernel_data.film.pass_stride;
-  ccl_global float *buffer = render_buffer + render_buffer_offset;
-
-  const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
-  return buffer[aux_w_offset] == 0.0f;
-}
-
-/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
-
-ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg,
-                                                           ccl_global float *render_buffer,
-                                                           int x,
-                                                           int y,
-                                                           float threshold,
-                                                           bool reset,
-                                                           int offset,
-                                                           int stride)
-{
-  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
-  kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED);
-
-  const int render_pixel_index = offset + x + y * stride;
-  ccl_global float *buffer = render_buffer +
-                             (uint64_t)render_pixel_index * kernel_data.film.pass_stride;
-
-  /* TODO(Stefan): Is this better in linear, sRGB or something else? */
-
-  const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer);
-  if (!reset && A.w != 0.0f) {
-    /* If the pixel was considered converged, its state will not change in this kernel. Early
-     * output before doing any math.
-     *
-     * TODO(sergey): On a GPU it might be better to keep thread alive for better coherency? */
-    return true;
-  }
-
-  const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
-
-  const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
-  const float inv_sample = 1.0f / sample;
-
-  /* The per pixel error as seen in section 2.1 of
-   * "A hierarchical automatic stopping condition for Monte Carlo global illumination" */
-  const float error_difference = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) *
-                                 inv_sample;
-  const float error_normalize = sqrtf((I.x + I.y + I.z) * inv_sample);
-  /* A small epsilon is added to the divisor to prevent division by zero. */
-  const float error = error_difference / (0.0001f + error_normalize);
-  const bool did_converge = (error < threshold);
-
-  const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
-  buffer[aux_w_offset] = did_converge;
-
-  return did_converge;
-}
-
-/* This is a simple box filter in two passes.
- * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
-
-ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
-                                                  ccl_global float *render_buffer,
-                                                  int y,
-                                                  int start_x,
-                                                  int width,
-                                                  int offset,
-                                                  int stride)
-{
-  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
-
-  bool prev = false;
-  for (int x = start_x; x < start_x + width; ++x) {
-    int index = offset + x + y * stride;
-    ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
-    const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
-
-    if (buffer[aux_w_offset] == 0.0f) {
-      if (x > start_x && !prev) {
-        index = index - 1;
-        buffer = render_buffer + index * kernel_data.film.pass_stride;
-        buffer[aux_w_offset] = 0.0f;
-      }
-      prev = true;
-    }
-    else {
-      if (prev) {
-        buffer[aux_w_offset] = 0.0f;
-      }
-      prev = false;
-    }
-  }
-}
-
-ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg,
-                                                  ccl_global float *render_buffer,
-                                                  int x,
-                                                  int start_y,
-                                                  int height,
-                                                  int offset,
-                                                  int stride)
-{
-  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
-
-  bool prev = false;
-  for (int y = start_y; y < start_y + height; ++y) {
-    int index = offset + x + y * stride;
-    ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
-    const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
-
-    if (buffer[aux_w_offset] == 0.0f) {
-      if (y > start_y && !prev) {
-        index = index - stride;
-        buffer = render_buffer + index * kernel_data.film.pass_stride;
-        buffer[aux_w_offset] = 0.0f;
-      }
-      prev = true;
-    }
-    else {
-      if (prev) {
-        buffer[aux_w_offset] = 0.0f;
-      }
-      prev = false;
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_id_passes.h b/intern/cycles/kernel/film/film_id_passes.h
deleted file mode 100644
index d5b8c90a828..00000000000
--- a/intern/cycles/kernel/film/film_id_passes.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright 2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Element of ID pass stored in the render buffers.
- * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
- * render buffers might not meet expected by compiler alignment. */
-typedef struct IDPassBufferElement {
-  float x;
-  float y;
-} IDPassBufferElement;
-
-ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
-                                             int num_slots,
-                                             float id,
-                                             float weight)
-{
-  kernel_assert(id != ID_NONE);
-  if (weight == 0.0f) {
-    return;
-  }
-
-  for (int slot = 0; slot < num_slots; slot++) {
-    ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
-#ifdef __ATOMIC_PASS_WRITE__
-    /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
-    if (id_buffer[slot].x == ID_NONE) {
-      /* Use an atomic to claim this slot.
-       * If a different thread got here first, try again from this slot on. */
-      float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
-      if (old_id != ID_NONE && old_id != id) {
-        continue;
-      }
-      atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
-      break;
-    }
-    /* If there already is a slot for that ID, add the weight.
-     * If no slot was found, add it to the last. */
-    else if (id_buffer[slot].x == id || slot == num_slots - 1) {
-      atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
-      break;
-    }
-#else  /* __ATOMIC_PASS_WRITE__ */
-    /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
-    if (id_buffer[slot].x == ID_NONE) {
-      id_buffer[slot].x = id;
-      id_buffer[slot].y = weight;
-      break;
-    }
-    /* If there already is a slot for that ID, add the weight.
-     * If no slot was found, add it to the last. */
-    else if (id_buffer[slot].x == id || slot == num_slots - 1) {
-      id_buffer[slot].y += weight;
-      break;
-    }
-#endif /* __ATOMIC_PASS_WRITE__ */
-  }
-}
-
-ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
-{
-  ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
-  for (int slot = 1; slot < num_slots; ++slot) {
-    if (id_buffer[slot].x == ID_NONE) {
-      return;
-    }
-    /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
-    int i = slot;
-    while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
-      const IDPassBufferElement swap = id_buffer[i];
-      id_buffer[i] = id_buffer[i - 1];
-      id_buffer[i - 1] = swap;
-      --i;
-    }
-  }
-}
-
-/* post-sorting for Cryptomatte */
-ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg,
-                                               ccl_global float *render_buffer,
-                                               int pixel_index)
-{
-  const int pass_stride = kernel_data.film.pass_stride;
-  const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride;
-  ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset +
-                                         kernel_data.film.pass_cryptomatte;
-  kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_passes.h b/intern/cycles/kernel/film/film_passes.h
deleted file mode 100644
index 6c124247f89..00000000000
--- a/intern/cycles/kernel/film/film_passes.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/geom/geom.h"
-
-#include "kernel/film/film_id_passes.h"
-#include "kernel/film/film_write_passes.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Get pointer to pixel in render buffer. */
-ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
-    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
-  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
-  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
-                                        kernel_data.film.pass_stride;
-  return render_buffer + render_buffer_offset;
-}
-
-#ifdef __DENOISING_FEATURES__
-
-ccl_device_forceinline void kernel_write_denoising_features_surface(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private const ShaderData *sd,
-    ccl_global float *ccl_restrict render_buffer)
-{
-  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) {
-    return;
-  }
-
-  /* Skip implicitly transparent surfaces. */
-  if (sd->flag & SD_HAS_ONLY_VOLUME) {
-    return;
-  }
-
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
-
-  if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) {
-    const float3 denoising_feature_throughput = INTEGRATOR_STATE(
-        state, path, denoising_feature_throughput);
-    const float denoising_depth = ensure_finite(average(denoising_feature_throughput) *
-                                                sd->ray_length);
-    kernel_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth);
-  }
-
-  float3 normal = zero_float3();
-  float3 diffuse_albedo = zero_float3();
-  float3 specular_albedo = zero_float3();
-  float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-      continue;
-    }
-
-    /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
-    normal += sc->N * sc->sample_weight;
-    sum_weight += sc->sample_weight;
-
-    float3 closure_albedo = sc->weight;
-    /* Closures that include a Fresnel term typically have weights close to 1 even though their
-     * actual contribution is significantly lower.
-     * To account for this, we scale their weight by the average fresnel factor (the same is also
-     * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
-    if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
-      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
-      closure_albedo *= bsdf->extra->fresnel_color;
-    }
-    else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
-      ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc;
-      closure_albedo *= bsdf->avg_value;
-    }
-    else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
-      closure_albedo *= bsdf_principled_hair_albedo(sc);
-    }
-
-    if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
-      diffuse_albedo += closure_albedo;
-      sum_nonspecular_weight += sc->sample_weight;
-    }
-    else {
-      specular_albedo += closure_albedo;
-    }
-  }
-
-  /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
-  if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
-    if (sum_weight != 0.0f) {
-      normal /= sum_weight;
-    }
-
-    if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
-      /* Transform normal into camera space. */
-      const Transform worldtocamera = kernel_data.cam.worldtocamera;
-      normal = transform_direction(&worldtocamera, normal);
-
-      const float3 denoising_normal = ensure_finite3(normal);
-      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
-    }
-
-    if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
-      const float3 denoising_feature_throughput = INTEGRATOR_STATE(
-          state, path, denoising_feature_throughput);
-      const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput *
-                                                     diffuse_albedo);
-      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
-    }
-
-    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
-  }
-  else {
-    INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo;
-  }
-}
-
-ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg,
-                                                                   IntegratorState state,
-                                                                   const float3 albedo,
-                                                                   const bool scatter,
-                                                                   ccl_global float *ccl_restrict
-                                                                       render_buffer)
-{
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
-  const float3 denoising_feature_throughput = INTEGRATOR_STATE(
-      state, path, denoising_feature_throughput);
-
-  if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
-    /* Assume scatter is sufficiently diffuse to stop writing denoising features. */
-    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
-
-    /* Write view direction as normal. */
-    const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f);
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
-  }
-
-  if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
-    /* Write albedo. */
-    const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * albedo);
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
-  }
-}
-#endif /* __DENOISING_FEATURES__ */
-
-#ifdef __SHADOW_CATCHER__
-
-/* Write shadow catcher passes on a bounce from the shadow catcher object. */
-ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private const ShaderData *sd,
-    ccl_global float *ccl_restrict render_buffer)
-{
-  if (!kernel_data.integrator.has_shadow_catcher) {
-    return;
-  }
-
-  kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
-  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
-  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
-    return;
-  }
-
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
-
-  /* Count sample for the shadow catcher object. */
-  kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
-
-  /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
-   * transparency to the matte. */
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
-                          average(throughput));
-}
-
-#endif /* __SHADOW_CATCHER__ */
-
-ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
-                                              size_t depth,
-                                              float id,
-                                              float matte_weight)
-{
-  kernel_write_id_slots(buffer, depth * 2, id, matte_weight);
-  return depth * 4;
-}
-
-ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
-                                                IntegratorState state,
-                                                ccl_private const ShaderData *sd,
-                                                ccl_global float *ccl_restrict render_buffer)
-{
-#ifdef __PASSES__
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if (!(path_flag & PATH_RAY_CAMERA)) {
-    return;
-  }
-
-  const int flag = kernel_data.film.pass_flag;
-
-  if (!(flag & PASS_ANY)) {
-    return;
-  }
-
-  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
-
-  if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
-    if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
-        average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
-      if (INTEGRATOR_STATE(state, path, sample) == 0) {
-        if (flag & PASSMASK(DEPTH)) {
-          const float depth = camera_z_depth(kg, sd->P);
-          kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
-        }
-        if (flag & PASSMASK(OBJECT_ID)) {
-          const float id = object_pass_id(kg, sd->object);
-          kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
-        }
-        if (flag & PASSMASK(MATERIAL_ID)) {
-          const float id = shader_pass_id(kg, sd);
-          kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
-        }
-        if (flag & PASSMASK(POSITION)) {
-          const float3 position = sd->P;
-          kernel_write_pass_float3(buffer + kernel_data.film.pass_position, position);
-        }
-      }
-
-      if (flag & PASSMASK(NORMAL)) {
-        const float3 normal = shader_bsdf_average_normal(kg, sd);
-        kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
-      }
-      if (flag & PASSMASK(ROUGHNESS)) {
-        const float roughness = shader_bsdf_average_roughness(sd);
-        kernel_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness);
-      }
-      if (flag & PASSMASK(UV)) {
-        const float3 uv = primitive_uv(kg, sd);
-        kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
-      }
-      if (flag & PASSMASK(MOTION)) {
-        const float4 speed = primitive_motion_vector(kg, sd);
-        kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
-        kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
-      }
-
-      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE;
-    }
-  }
-
-  if (kernel_data.film.cryptomatte_passes) {
-    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-    const float matte_weight = average(throughput) *
-                               (1.0f - average(shader_bsdf_transparency(kg, sd)));
-    if (matte_weight > 0.0f) {
-      ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
-      if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
-        const float id = object_cryptomatte_id(kg, sd->object);
-        cryptomatte_buffer += kernel_write_id_pass(
-            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
-      }
-      if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
-        const float id = shader_cryptomatte_id(kg, sd->shader);
-        cryptomatte_buffer += kernel_write_id_pass(
-            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
-      }
-      if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
-        const float id = object_cryptomatte_asset_id(kg, sd->object);
-        cryptomatte_buffer += kernel_write_id_pass(
-            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
-      }
-    }
-  }
-
-  if (flag & PASSMASK(DIFFUSE_COLOR)) {
-    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color,
-                             shader_bsdf_diffuse(kg, sd) * throughput);
-  }
-  if (flag & PASSMASK(GLOSSY_COLOR)) {
-    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color,
-                             shader_bsdf_glossy(kg, sd) * throughput);
-  }
-  if (flag & PASSMASK(TRANSMISSION_COLOR)) {
-    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
-                             shader_bsdf_transmission(kg, sd) * throughput);
-  }
-  if (flag & PASSMASK(MIST)) {
-    /* Bring depth into 0..1 range. */
-    const float mist_start = kernel_data.film.mist_start;
-    const float mist_inv_depth = kernel_data.film.mist_inv_depth;
-
-    const float depth = camera_distance(kg, sd->P);
-    float mist = saturate((depth - mist_start) * mist_inv_depth);
-
-    /* Falloff */
-    const float mist_falloff = kernel_data.film.mist_falloff;
-
-    if (mist_falloff == 1.0f)
-      ;
-    else if (mist_falloff == 2.0f)
-      mist = mist * mist;
-    else if (mist_falloff == 0.5f)
-      mist = sqrtf(mist);
-    else
-      mist = powf(mist, mist_falloff);
-
-    /* Modulate by transparency */
-    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-    const float3 alpha = shader_bsdf_alpha(kg, sd);
-    const float mist_output = (1.0f - mist) * average(throughput * alpha);
-
-    /* Note that the final value in the render buffer we want is 1 - mist_output,
-     * to avoid having to tracking this in the Integrator state we do the negation
-     * after rendering. */
-    kernel_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output);
-  }
-#endif
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_read.h b/intern/cycles/kernel/film/film_read.h
deleted file mode 100644
index a87eff3832e..00000000000
--- a/intern/cycles/kernel/film/film_read.h
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* --------------------------------------------------------------------
- * Common utilities.
- */
-
-/* The input buffer contains transparency = 1 - alpha, this converts it to
- * alpha. Also clamp since alpha might end up outside of 0..1 due to Russian
- * roulette. */
-ccl_device_forceinline float film_transparency_to_alpha(float transparency)
-{
-  return saturate(1.0f - transparency);
-}
-
-ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict
-                                           kfilm_convert,
-                                       ccl_global const float *ccl_restrict buffer)
-{
-  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
-    return kfilm_convert->scale;
-  }
-
-  if (kfilm_convert->pass_use_filter) {
-    const uint sample_count = *(
-        (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
-    return 1.0f / sample_count;
-  }
-
-  return 1.0f;
-}
-
-ccl_device_inline float film_get_scale_exposure(ccl_global const KernelFilmConvert *ccl_restrict
-                                                    kfilm_convert,
-                                                ccl_global const float *ccl_restrict buffer)
-{
-  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
-    return kfilm_convert->scale_exposure;
-  }
-
-  const float scale = film_get_scale(kfilm_convert, buffer);
-
-  if (kfilm_convert->pass_use_exposure) {
-    return scale * kfilm_convert->exposure;
-  }
-
-  return scale;
-}
-
-ccl_device_inline bool film_get_scale_and_scale_exposure(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict scale,
-    ccl_private float *ccl_restrict scale_exposure)
-{
-  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
-    *scale = kfilm_convert->scale;
-    *scale_exposure = kfilm_convert->scale_exposure;
-    return true;
-  }
-
-  const uint sample_count = *(
-      (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
-  if (!sample_count) {
-    *scale = 0.0f;
-    *scale_exposure = 0.0f;
-    return false;
-  }
-
-  if (kfilm_convert->pass_use_filter) {
-    *scale = 1.0f / sample_count;
-  }
-  else {
-    *scale = 1.0f;
-  }
-
-  if (kfilm_convert->pass_use_exposure) {
-    *scale_exposure = *scale * kfilm_convert->exposure;
-  }
-  else {
-    *scale_exposure = *scale;
-  }
-
-  return true;
-}
-
-/* --------------------------------------------------------------------
- * Float (scalar) passes.
- */
-
-ccl_device_inline void film_get_pass_pixel_depth(ccl_global const KernelFilmConvert *ccl_restrict
-                                                     kfilm_convert,
-                                                 ccl_global const float *ccl_restrict buffer,
-                                                 ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components >= 1);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-  const float f = *in;
-
-  pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
-}
-
-ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConvert *ccl_restrict
-                                                    kfilm_convert,
-                                                ccl_global const float *ccl_restrict buffer,
-                                                ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components >= 1);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-  const float f = *in;
-
-  /* Note that we accumulate 1 - mist in the kernel to avoid having to
-   * track the mist values in the integrator state. */
-  pixel[0] = saturate(1.0f - f * scale_exposure);
-}
-
-ccl_device_inline void film_get_pass_pixel_sample_count(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
-   * meaningful value when adaptive sampler stopped rendering image way before the maximum
-   * number of samples was reached (for examples when number of samples is set to 0 in
-   * viewport). */
-
-  kernel_assert(kfilm_convert->num_components >= 1);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-  const float f = *in;
-
-  pixel[0] = __float_as_uint(f) * kfilm_convert->scale;
-}
-
-ccl_device_inline void film_get_pass_pixel_float(ccl_global const KernelFilmConvert *ccl_restrict
-                                                     kfilm_convert,
-                                                 ccl_global const float *ccl_restrict buffer,
-                                                 ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components >= 1);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-  const float f = *in;
-
-  pixel[0] = f * scale_exposure;
-}
-
-/* --------------------------------------------------------------------
- * Float 3 passes.
- */
-
-ccl_device_inline void film_get_pass_pixel_light_path(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components >= 3);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  /* Read light pass. */
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-  float3 f = make_float3(in[0], in[1], in[2]);
-
-  /* Optionally add indirect light pass. */
-  if (kfilm_convert->pass_indirect != PASS_UNUSED) {
-    ccl_global const float *in_indirect = buffer + kfilm_convert->pass_indirect;
-    const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]);
-    f += f_indirect;
-  }
-
-  /* Optionally divide out color. */
-  if (kfilm_convert->pass_divide != PASS_UNUSED) {
-    ccl_global const float *in_divide = buffer + kfilm_convert->pass_divide;
-    const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
-    f = safe_divide_even_color(f, f_divide);
-
-    /* Exposure only, sample scale cancels out. */
-    f *= kfilm_convert->exposure;
-  }
-  else {
-    /* Sample scale and exposure. */
-    f *= film_get_scale_exposure(kfilm_convert, buffer);
-  }
-
-  pixel[0] = f.x;
-  pixel[1] = f.y;
-  pixel[2] = f.z;
-}
-
-ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict
-                                                      kfilm_convert,
-                                                  ccl_global const float *ccl_restrict buffer,
-                                                  ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components >= 3);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-
-  const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure;
-
-  pixel[0] = f.x;
-  pixel[1] = f.y;
-  pixel[2] = f.z;
-}
-
-/* --------------------------------------------------------------------
- * Float4 passes.
- */
-
-ccl_device_inline void film_get_pass_pixel_motion(ccl_global const KernelFilmConvert *ccl_restrict
-                                                      kfilm_convert,
-                                                  ccl_global const float *ccl_restrict buffer,
-                                                  ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components == 4);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-  kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-  ccl_global const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
-
-  const float weight = in_weight[0];
-  const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f;
-
-  const float4 motion = make_float4(in[0], in[1], in[2], in[3]) * weight_inv;
-
-  pixel[0] = motion.x;
-  pixel[1] = motion.y;
-  pixel[2] = motion.z;
-  pixel[3] = motion.w;
-}
-
-ccl_device_inline void film_get_pass_pixel_cryptomatte(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components == 4);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  const float scale = film_get_scale(kfilm_convert, buffer);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-
-  const float4 f = make_float4(in[0], in[1], in[2], in[3]);
-
-  /* x and z contain integer IDs, don't rescale them.
-   * y and w contain matte weights, they get scaled. */
-  pixel[0] = f.x;
-  pixel[1] = f.y * scale;
-  pixel[2] = f.z;
-  pixel[3] = f.w * scale;
-}
-
-ccl_device_inline void film_get_pass_pixel_float4(ccl_global const KernelFilmConvert *ccl_restrict
-                                                      kfilm_convert,
-                                                  ccl_global const float *ccl_restrict buffer,
-                                                  ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components == 4);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  float scale, scale_exposure;
-  film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-
-  const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
-  const float alpha = in[3] * scale;
-
-  pixel[0] = color.x;
-  pixel[1] = color.y;
-  pixel[2] = color.z;
-  pixel[3] = alpha;
-}
-
-ccl_device_inline void film_get_pass_pixel_combined(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components == 4);
-
-  /* 3rd channel contains transparency = 1 - alpha for the combined pass. */
-
-  kernel_assert(kfilm_convert->num_components == 4);
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-
-  float scale, scale_exposure;
-  if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
-    pixel[0] = 0.0f;
-    pixel[1] = 0.0f;
-    pixel[2] = 0.0f;
-    pixel[3] = 0.0f;
-    return;
-  }
-
-  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
-
-  const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
-  const float alpha = in[3] * scale;
-
-  pixel[0] = color.x;
-  pixel[1] = color.y;
-  pixel[2] = color.z;
-  pixel[3] = film_transparency_to_alpha(alpha);
-}
-
-/* --------------------------------------------------------------------
- * Shadow catcher.
- */
-
-ccl_device_inline float3 film_calculate_shadow_catcher_denoised(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer)
-{
-  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
-
-  float scale, scale_exposure;
-  film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
-
-  ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
-
-  const float3 pixel = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]) * scale_exposure;
-
-  return pixel;
-}
-
-ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b)
-{
-  float x, y, z;
-
-  x = (b.x != 0.0f) ? a.x / b.x : 1.0f;
-  y = (b.y != 0.0f) ? a.y / b.y : 1.0f;
-  z = (b.z != 0.0f) ? a.z / b.z : 1.0f;
-
-  return make_float3(x, y, z);
-}
-
-ccl_device_inline float3
-film_calculate_shadow_catcher(ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-                              ccl_global const float *ccl_restrict buffer)
-{
-  /* For the shadow catcher pass we divide combined pass by the shadow catcher.
-   * Note that denoised shadow catcher pass contains value which only needs ot be scaled (but not
-   * to be calculated as division). */
-
-  if (kfilm_convert->is_denoised) {
-    return film_calculate_shadow_catcher_denoised(kfilm_convert, buffer);
-  }
-
-  kernel_assert(kfilm_convert->pass_shadow_catcher_sample_count != PASS_UNUSED);
-
-  /* If there is no shadow catcher object in this pixel, there is no modification of the light
-   * needed, so return one. */
-  ccl_global const float *in_catcher_sample_count =
-      buffer + kfilm_convert->pass_shadow_catcher_sample_count;
-  const float num_samples = in_catcher_sample_count[0];
-  if (num_samples == 0.0f) {
-    return one_float3();
-  }
-
-  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
-  ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
-
-  /* NOTE: It is possible that the Shadow Catcher pass is requested as an output without actual
-   * shadow catcher objects in the scene. In this case there will be no auxiliary passes required
-   * for the decision (to save up memory). So delay the asserts to this point so that the number of
-   * samples check handles such configuration. */
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-  kernel_assert(kfilm_convert->pass_combined != PASS_UNUSED);
-  kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
-
-  ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined;
-  ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
-
-  /* No scaling needed. The integration works in way that number of samples in the combined and
-   * shadow catcher passes are the same, and exposure is canceled during the division. */
-  const float3 color_catcher = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]);
-  const float3 color_combined = make_float3(in_combined[0], in_combined[1], in_combined[2]);
-  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]);
-
-  /* Need to ignore contribution of the matte object when doing division (otherwise there will be
-   * artifacts caused by anti-aliasing). Since combined pass is used for adaptive sampling and need
-   * to contain matte objects, we subtract matte objects contribution here. This is the same as if
-   * the matte objects were not accumulated to the combined pass. */
-  const float3 combined_no_matte = color_combined - color_matte;
-
-  const float3 shadow_catcher = safe_divide_shadow_catcher(combined_no_matte, color_catcher);
-
-  const float scale = film_get_scale(kfilm_convert, buffer);
-  const float transparency = in_combined[3] * scale;
-  const float alpha = film_transparency_to_alpha(transparency);
-
-  /* Alpha-over on white using transparency of the combined pass. This allows to eliminate
-   * artifacts which are happening on an edge of a shadow catcher when using transparent film.
-   * Note that we treat shadow catcher as straight alpha here because alpha got canceled out
-   * during the division. */
-  const float3 pixel = (1.0f - alpha) * one_float3() + alpha * shadow_catcher;
-
-  return pixel;
-}
-
-ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer)
-{
-  /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation
-   * is possible.
-   *
-   * The matte is alpha-overed onto the shadow (which is kind of alpha-overing shadow onto footage,
-   * and then alpha-overing synthetic objects on top). */
-
-  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
-  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
-  kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
-
-  float scale, scale_exposure;
-  if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
-    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  }
-
-  ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
-
-  const float3 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer);
-  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure;
-
-  const float transparency = in_matte[3] * scale;
-  const float alpha = saturate(1.0f - transparency);
-
-  const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;
-
-  if (kfilm_convert->use_approximate_shadow_catcher_background) {
-    kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
-
-    ccl_global const float *in_background = buffer + kfilm_convert->pass_background;
-    const float3 color_background = make_float3(
-                                        in_background[0], in_background[1], in_background[2]) *
-                                    scale_exposure;
-    const float3 alpha_over = color_matte + color_background * (1.0f - alpha_matte);
-    return make_float4(alpha_over.x, alpha_over.y, alpha_over.z, 1.0f);
-  }
-
-  return make_float4(color_matte.x, color_matte.y, color_matte.z, alpha_matte);
-}
-
-ccl_device_inline void film_get_pass_pixel_shadow_catcher(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components >= 3);
-
-  const float3 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer);
-
-  pixel[0] = pixel_value.x;
-  pixel[1] = pixel_value.y;
-  pixel[2] = pixel_value.z;
-}
-
-ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4);
-
-  const float4 pixel_value = film_calculate_shadow_catcher_matte_with_shadow(kfilm_convert,
-                                                                             buffer);
-
-  pixel[0] = pixel_value.x;
-  pixel[1] = pixel_value.y;
-  pixel[2] = pixel_value.z;
-  if (kfilm_convert->num_components == 4) {
-    pixel[3] = pixel_value.w;
-  }
-}
-
-/* --------------------------------------------------------------------
- * Compositing and overlays.
- */
-
-ccl_device_inline void film_apply_pass_pixel_overlays_rgba(
-    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
-    ccl_global const float *ccl_restrict buffer,
-    ccl_private float *ccl_restrict pixel)
-{
-  if (kfilm_convert->show_active_pixels &&
-      kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) {
-    if (buffer[kfilm_convert->pass_adaptive_aux_buffer + 3] == 0.0f) {
-      const float3 active_rgb = make_float3(1.0f, 0.0f, 0.0f);
-      const float3 mix_rgb = interp(make_float3(pixel[0], pixel[1], pixel[2]), active_rgb, 0.5f);
-      pixel[0] = mix_rgb.x;
-      pixel[1] = mix_rgb.y;
-      pixel[2] = mix_rgb.z;
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/film_write_passes.h b/intern/cycles/kernel/film/film_write_passes.h
deleted file mode 100644
index 9d379495629..00000000000
--- a/intern/cycles/kernel/film/film_write_passes.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef __KERNEL_GPU__
-#  define __ATOMIC_PASS_WRITE__
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
-{
-#ifdef __ATOMIC_PASS_WRITE__
-  atomic_add_and_fetch_float(buffer, value);
-#else
-  *buffer += value;
-#endif
-}
-
-ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer,
-                                                float3 value)
-{
-#ifdef __ATOMIC_PASS_WRITE__
-  ccl_global float *buf_x = buffer + 0;
-  ccl_global float *buf_y = buffer + 1;
-  ccl_global float *buf_z = buffer + 2;
-
-  atomic_add_and_fetch_float(buf_x, value.x);
-  atomic_add_and_fetch_float(buf_y, value.y);
-  atomic_add_and_fetch_float(buf_z, value.z);
-#else
-  buffer[0] += value.x;
-  buffer[1] += value.y;
-  buffer[2] += value.z;
-#endif
-}
-
-ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer,
-                                                float4 value)
-{
-#ifdef __ATOMIC_PASS_WRITE__
-  ccl_global float *buf_x = buffer + 0;
-  ccl_global float *buf_y = buffer + 1;
-  ccl_global float *buf_z = buffer + 2;
-  ccl_global float *buf_w = buffer + 3;
-
-  atomic_add_and_fetch_float(buf_x, value.x);
-  atomic_add_and_fetch_float(buf_y, value.y);
-  atomic_add_and_fetch_float(buf_z, value.z);
-  atomic_add_and_fetch_float(buf_w, value.w);
-#else
-  buffer[0] += value.x;
-  buffer[1] += value.y;
-  buffer[2] += value.z;
-  buffer[3] += value.w;
-#endif
-}
-
-ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
-{
-  return *buffer;
-}
-
-ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer)
-{
-  return make_float3(buffer[0], buffer[1], buffer[2]);
-}
-
-ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer)
-{
-  return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/id_passes.h b/intern/cycles/kernel/film/id_passes.h
new file mode 100644
index 00000000000..d5b8c90a828
--- /dev/null
+++ b/intern/cycles/kernel/film/id_passes.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Element of ID pass stored in the render buffers.
+ * It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
+ * render buffers might not meet expected by compiler alignment. */
+typedef struct IDPassBufferElement {
+  float x;
+  float y;
+} IDPassBufferElement;
+
+ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
+                                             int num_slots,
+                                             float id,
+                                             float weight)
+{
+  kernel_assert(id != ID_NONE);
+  if (weight == 0.0f) {
+    return;
+  }
+
+  for (int slot = 0; slot < num_slots; slot++) {
+    ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
+#ifdef __ATOMIC_PASS_WRITE__
+    /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+    if (id_buffer[slot].x == ID_NONE) {
+      /* Use an atomic to claim this slot.
+       * If a different thread got here first, try again from this slot on. */
+      float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id);
+      if (old_id != ID_NONE && old_id != id) {
+        continue;
+      }
+      atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+      break;
+    }
+    /* If there already is a slot for that ID, add the weight.
+     * If no slot was found, add it to the last. */
+    else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+      atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight);
+      break;
+    }
+#else  /* __ATOMIC_PASS_WRITE__ */
+    /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
+    if (id_buffer[slot].x == ID_NONE) {
+      id_buffer[slot].x = id;
+      id_buffer[slot].y = weight;
+      break;
+    }
+    /* If there already is a slot for that ID, add the weight.
+     * If no slot was found, add it to the last. */
+    else if (id_buffer[slot].x == id || slot == num_slots - 1) {
+      id_buffer[slot].y += weight;
+      break;
+    }
+#endif /* __ATOMIC_PASS_WRITE__ */
+  }
+}
+
+ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
+{
+  ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
+  for (int slot = 1; slot < num_slots; ++slot) {
+    if (id_buffer[slot].x == ID_NONE) {
+      return;
+    }
+    /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
+    int i = slot;
+    while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
+      const IDPassBufferElement swap = id_buffer[i];
+      id_buffer[i] = id_buffer[i - 1];
+      id_buffer[i - 1] = swap;
+      --i;
+    }
+  }
+}
+
+/* post-sorting for Cryptomatte */
+ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg,
+                                               ccl_global float *render_buffer,
+                                               int pixel_index)
+{
+  const int pass_stride = kernel_data.film.pass_stride;
+  const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride;
+  ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset +
+                                         kernel_data.film.pass_cryptomatte;
+  kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h
new file mode 100644
index 00000000000..3a91d1653fe
--- /dev/null
+++ b/intern/cycles/kernel/film/passes.h
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/film/id_passes.h"
+#include "kernel/film/write_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *kernel_pass_pixel_render_buffer(
+    KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+  const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+  const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                        kernel_data.film.pass_stride;
+  return render_buffer + render_buffer_offset;
+}
+
+#ifdef __DENOISING_FEATURES__
+
+ccl_device_forceinline void kernel_write_denoising_features_surface(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const ShaderData *sd,
+    ccl_global float *ccl_restrict render_buffer)
+{
+  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) {
+    return;
+  }
+
+  /* Skip implicitly transparent surfaces. */
+  if (sd->flag & SD_HAS_ONLY_VOLUME) {
+    return;
+  }
+
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+
+  if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) {
+    const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+        state, path, denoising_feature_throughput);
+    const float denoising_depth = ensure_finite(average(denoising_feature_throughput) *
+                                                sd->ray_length);
+    kernel_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth);
+  }
+
+  float3 normal = zero_float3();
+  float3 diffuse_albedo = zero_float3();
+  float3 specular_albedo = zero_float3();
+  float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+      continue;
+    }
+
+    /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+    normal += sc->N * sc->sample_weight;
+    sum_weight += sc->sample_weight;
+
+    float3 closure_albedo = sc->weight;
+    /* Closures that include a Fresnel term typically have weights close to 1 even though their
+     * actual contribution is significantly lower.
+     * To account for this, we scale their weight by the average fresnel factor (the same is also
+     * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+    if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
+      closure_albedo *= bsdf->extra->fresnel_color;
+    }
+    else if (sc->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_ID) {
+      ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc;
+      closure_albedo *= bsdf->avg_value;
+    }
+    else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
+      closure_albedo *= bsdf_principled_hair_albedo(sc);
+    }
+
+    if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+      diffuse_albedo += closure_albedo;
+      sum_nonspecular_weight += sc->sample_weight;
+    }
+    else {
+      specular_albedo += closure_albedo;
+    }
+  }
+
+  /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+  if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+    if (sum_weight != 0.0f) {
+      normal /= sum_weight;
+    }
+
+    if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
+      /* Transform normal into camera space. */
+      const Transform worldtocamera = kernel_data.cam.worldtocamera;
+      normal = transform_direction(&worldtocamera, normal);
+
+      const float3 denoising_normal = ensure_finite3(normal);
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
+    }
+
+    if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+      const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+          state, path, denoising_feature_throughput);
+      const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput *
+                                                     diffuse_albedo);
+      kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+    }
+
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+  }
+  else {
+    INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo;
+  }
+}
+
+ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals kg,
+                                                                   IntegratorState state,
+                                                                   const float3 albedo,
+                                                                   const bool scatter,
+                                                                   ccl_global float *ccl_restrict
+                                                                       render_buffer)
+{
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+  const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+      state, path, denoising_feature_throughput);
+
+  if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
+    /* Assume scatter is sufficiently diffuse to stop writing denoising features. */
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+
+    /* Write view direction as normal. */
+    const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f);
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
+  }
+
+  if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+    /* Write albedo. */
+    const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput * albedo);
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+  }
+}
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __SHADOW_CATCHER__
+
+/* Write shadow catcher passes on a bounce from the shadow catcher object. */
+ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const ShaderData *sd,
+    ccl_global float *ccl_restrict render_buffer)
+{
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return;
+  }
+
+  kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
+  kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
+    return;
+  }
+
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+
+  /* Count sample for the shadow catcher object. */
+  kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
+
+  /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
+   * transparency to the matte. */
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
+                          average(throughput));
+}
+
+#endif /* __SHADOW_CATCHER__ */
+
+ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
+                                              size_t depth,
+                                              float id,
+                                              float matte_weight)
+{
+  kernel_write_id_slots(buffer, depth * 2, id, matte_weight);
+  return depth * 4;
+}
+
+ccl_device_inline void kernel_write_data_passes(KernelGlobals kg,
+                                                IntegratorState state,
+                                                ccl_private const ShaderData *sd,
+                                                ccl_global float *ccl_restrict render_buffer)
+{
+#ifdef __PASSES__
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  if (!(path_flag & PATH_RAY_CAMERA)) {
+    return;
+  }
+
+  const int flag = kernel_data.film.pass_flag;
+
+  if (!(flag & PASS_ANY)) {
+    return;
+  }
+
+  ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
+
+  if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+    if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+        average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+      if (INTEGRATOR_STATE(state, path, sample) == 0) {
+        if (flag & PASSMASK(DEPTH)) {
+          const float depth = camera_z_depth(kg, sd->P);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+        }
+        if (flag & PASSMASK(OBJECT_ID)) {
+          const float id = object_pass_id(kg, sd->object);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+        }
+        if (flag & PASSMASK(MATERIAL_ID)) {
+          const float id = shader_pass_id(kg, sd);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+        }
+        if (flag & PASSMASK(POSITION)) {
+          const float3 position = sd->P;
+          kernel_write_pass_float3(buffer + kernel_data.film.pass_position, position);
+        }
+      }
+
+      if (flag & PASSMASK(NORMAL)) {
+        const float3 normal = shader_bsdf_average_normal(kg, sd);
+        kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+      }
+      if (flag & PASSMASK(ROUGHNESS)) {
+        const float roughness = shader_bsdf_average_roughness(sd);
+        kernel_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness);
+      }
+      if (flag & PASSMASK(UV)) {
+        const float3 uv = primitive_uv(kg, sd);
+        kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+      }
+      if (flag & PASSMASK(MOTION)) {
+        const float4 speed = primitive_motion_vector(kg, sd);
+        kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+        kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+      }
+
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE;
+    }
+  }
+
+  if (kernel_data.film.cryptomatte_passes) {
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+    const float matte_weight = average(throughput) *
+                               (1.0f - average(shader_bsdf_transparency(kg, sd)));
+    if (matte_weight > 0.0f) {
+      ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+      if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+        const float id = object_cryptomatte_id(kg, sd->object);
+        cryptomatte_buffer += kernel_write_id_pass(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+      }
+      if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+        const float id = shader_cryptomatte_id(kg, sd->shader);
+        cryptomatte_buffer += kernel_write_id_pass(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+      }
+      if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+        const float id = object_cryptomatte_asset_id(kg, sd->object);
+        cryptomatte_buffer += kernel_write_id_pass(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+      }
+    }
+  }
+
+  if (flag & PASSMASK(DIFFUSE_COLOR)) {
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color,
+                             shader_bsdf_diffuse(kg, sd) * throughput);
+  }
+  if (flag & PASSMASK(GLOSSY_COLOR)) {
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color,
+                             shader_bsdf_glossy(kg, sd) * throughput);
+  }
+  if (flag & PASSMASK(TRANSMISSION_COLOR)) {
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
+                             shader_bsdf_transmission(kg, sd) * throughput);
+  }
+  if (flag & PASSMASK(MIST)) {
+    /* Bring depth into 0..1 range. */
+    const float mist_start = kernel_data.film.mist_start;
+    const float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+    const float depth = camera_distance(kg, sd->P);
+    float mist = saturate((depth - mist_start) * mist_inv_depth);
+
+    /* Falloff */
+    const float mist_falloff = kernel_data.film.mist_falloff;
+
+    if (mist_falloff == 1.0f)
+      ;
+    else if (mist_falloff == 2.0f)
+      mist = mist * mist;
+    else if (mist_falloff == 0.5f)
+      mist = sqrtf(mist);
+    else
+      mist = powf(mist, mist_falloff);
+
+    /* Modulate by transparency */
+    const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+    const float3 alpha = shader_bsdf_alpha(kg, sd);
+    const float mist_output = (1.0f - mist) * average(throughput * alpha);
+
+    /* Note that the final value in the render buffer we want is 1 - mist_output,
+     * to avoid having to tracking this in the Integrator state we do the negation
+     * after rendering. */
+    kernel_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output);
+  }
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h
new file mode 100644
index 00000000000..a87eff3832e
--- /dev/null
+++ b/intern/cycles/kernel/film/read.h
@@ -0,0 +1,532 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* --------------------------------------------------------------------
+ * Common utilities.
+ */
+
+/* The input buffer contains transparency = 1 - alpha, this converts it to
+ * alpha. Also clamp since alpha might end up outside of 0..1 due to Russian
+ * roulette. */
+ccl_device_forceinline float film_transparency_to_alpha(float transparency)
+{
+  return saturate(1.0f - transparency);
+}
+
+ccl_device_inline float film_get_scale(ccl_global const KernelFilmConvert *ccl_restrict
+                                           kfilm_convert,
+                                       ccl_global const float *ccl_restrict buffer)
+{
+  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+    return kfilm_convert->scale;
+  }
+
+  if (kfilm_convert->pass_use_filter) {
+    const uint sample_count = *(
+        (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
+    return 1.0f / sample_count;
+  }
+
+  return 1.0f;
+}
+
+ccl_device_inline float film_get_scale_exposure(ccl_global const KernelFilmConvert *ccl_restrict
+                                                    kfilm_convert,
+                                                ccl_global const float *ccl_restrict buffer)
+{
+  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+    return kfilm_convert->scale_exposure;
+  }
+
+  const float scale = film_get_scale(kfilm_convert, buffer);
+
+  if (kfilm_convert->pass_use_exposure) {
+    return scale * kfilm_convert->exposure;
+  }
+
+  return scale;
+}
+
+ccl_device_inline bool film_get_scale_and_scale_exposure(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict scale,
+    ccl_private float *ccl_restrict scale_exposure)
+{
+  if (kfilm_convert->pass_sample_count == PASS_UNUSED) {
+    *scale = kfilm_convert->scale;
+    *scale_exposure = kfilm_convert->scale_exposure;
+    return true;
+  }
+
+  const uint sample_count = *(
+      (ccl_global const uint *)(buffer + kfilm_convert->pass_sample_count));
+  if (!sample_count) {
+    *scale = 0.0f;
+    *scale_exposure = 0.0f;
+    return false;
+  }
+
+  if (kfilm_convert->pass_use_filter) {
+    *scale = 1.0f / sample_count;
+  }
+  else {
+    *scale = 1.0f;
+  }
+
+  if (kfilm_convert->pass_use_exposure) {
+    *scale_exposure = *scale * kfilm_convert->exposure;
+  }
+  else {
+    *scale_exposure = *scale;
+  }
+
+  return true;
+}
+
+/* --------------------------------------------------------------------
+ * Float (scalar) passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_depth(ccl_global const KernelFilmConvert *ccl_restrict
+                                                     kfilm_convert,
+                                                 ccl_global const float *ccl_restrict buffer,
+                                                 ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  pixel[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
+}
+
+ccl_device_inline void film_get_pass_pixel_mist(ccl_global const KernelFilmConvert *ccl_restrict
+                                                    kfilm_convert,
+                                                ccl_global const float *ccl_restrict buffer,
+                                                ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  /* Note that we accumulate 1 - mist in the kernel to avoid having to
+   * track the mist values in the integrator state. */
+  pixel[0] = saturate(1.0f - f * scale_exposure);
+}
+
+ccl_device_inline void film_get_pass_pixel_sample_count(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
+   * meaningful value when adaptive sampler stopped rendering image way before the maximum
+   * number of samples was reached (for examples when number of samples is set to 0 in
+   * viewport). */
+
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  pixel[0] = __float_as_uint(f) * kfilm_convert->scale;
+}
+
+ccl_device_inline void film_get_pass_pixel_float(ccl_global const KernelFilmConvert *ccl_restrict
+                                                     kfilm_convert,
+                                                 ccl_global const float *ccl_restrict buffer,
+                                                 ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 1);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  const float f = *in;
+
+  pixel[0] = f * scale_exposure;
+}
+
+/* --------------------------------------------------------------------
+ * Float 3 passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_light_path(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 3);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  /* Read light pass. */
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  float3 f = make_float3(in[0], in[1], in[2]);
+
+  /* Optionally add indirect light pass. */
+  if (kfilm_convert->pass_indirect != PASS_UNUSED) {
+    ccl_global const float *in_indirect = buffer + kfilm_convert->pass_indirect;
+    const float3 f_indirect = make_float3(in_indirect[0], in_indirect[1], in_indirect[2]);
+    f += f_indirect;
+  }
+
+  /* Optionally divide out color. */
+  if (kfilm_convert->pass_divide != PASS_UNUSED) {
+    ccl_global const float *in_divide = buffer + kfilm_convert->pass_divide;
+    const float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
+    f = safe_divide_even_color(f, f_divide);
+
+    /* Exposure only, sample scale cancels out. */
+    f *= kfilm_convert->exposure;
+  }
+  else {
+    /* Sample scale and exposure. */
+    f *= film_get_scale_exposure(kfilm_convert, buffer);
+  }
+
+  pixel[0] = f.x;
+  pixel[1] = f.y;
+  pixel[2] = f.z;
+}
+
+ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmConvert *ccl_restrict
+                                                      kfilm_convert,
+                                                  ccl_global const float *ccl_restrict buffer,
+                                                  ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 3);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale_exposure = film_get_scale_exposure(kfilm_convert, buffer);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float3 f = make_float3(in[0], in[1], in[2]) * scale_exposure;
+
+  pixel[0] = f.x;
+  pixel[1] = f.y;
+  pixel[2] = f.z;
+}
+
+/* --------------------------------------------------------------------
+ * Float4 passes.
+ */
+
+ccl_device_inline void film_get_pass_pixel_motion(ccl_global const KernelFilmConvert *ccl_restrict
+                                                      kfilm_convert,
+                                                  ccl_global const float *ccl_restrict buffer,
+                                                  ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_motion_weight != PASS_UNUSED);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+  ccl_global const float *in_weight = buffer + kfilm_convert->pass_motion_weight;
+
+  const float weight = in_weight[0];
+  const float weight_inv = (weight > 0.0f) ? 1.0f / weight : 0.0f;
+
+  const float4 motion = make_float4(in[0], in[1], in[2], in[3]) * weight_inv;
+
+  pixel[0] = motion.x;
+  pixel[1] = motion.y;
+  pixel[2] = motion.z;
+  pixel[3] = motion.w;
+}
+
+ccl_device_inline void film_get_pass_pixel_cryptomatte(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  const float scale = film_get_scale(kfilm_convert, buffer);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float4 f = make_float4(in[0], in[1], in[2], in[3]);
+
+  /* x and z contain integer IDs, don't rescale them.
+   * y and w contain matte weights, they get scaled. */
+  pixel[0] = f.x;
+  pixel[1] = f.y * scale;
+  pixel[2] = f.z;
+  pixel[3] = f.w * scale;
+}
+
+ccl_device_inline void film_get_pass_pixel_float4(ccl_global const KernelFilmConvert *ccl_restrict
+                                                      kfilm_convert,
+                                                  ccl_global const float *ccl_restrict buffer,
+                                                  ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
+  const float alpha = in[3] * scale;
+
+  pixel[0] = color.x;
+  pixel[1] = color.y;
+  pixel[2] = color.z;
+  pixel[3] = alpha;
+}
+
+ccl_device_inline void film_get_pass_pixel_combined(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 4);
+
+  /* 3rd channel contains transparency = 1 - alpha for the combined pass. */
+
+  kernel_assert(kfilm_convert->num_components == 4);
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
+    pixel[0] = 0.0f;
+    pixel[1] = 0.0f;
+    pixel[2] = 0.0f;
+    pixel[3] = 0.0f;
+    return;
+  }
+
+  ccl_global const float *in = buffer + kfilm_convert->pass_offset;
+
+  const float3 color = make_float3(in[0], in[1], in[2]) * scale_exposure;
+  const float alpha = in[3] * scale;
+
+  pixel[0] = color.x;
+  pixel[1] = color.y;
+  pixel[2] = color.z;
+  pixel[3] = film_transparency_to_alpha(alpha);
+}
+
+/* --------------------------------------------------------------------
+ * Shadow catcher.
+ */
+
+ccl_device_inline float3 film_calculate_shadow_catcher_denoised(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer)
+{
+  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
+
+  ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
+
+  const float3 pixel = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]) * scale_exposure;
+
+  return pixel;
+}
+
+ccl_device_inline float3 safe_divide_shadow_catcher(float3 a, float3 b)
+{
+  float x, y, z;
+
+  x = (b.x != 0.0f) ? a.x / b.x : 1.0f;
+  y = (b.y != 0.0f) ? a.y / b.y : 1.0f;
+  z = (b.z != 0.0f) ? a.z / b.z : 1.0f;
+
+  return make_float3(x, y, z);
+}
+
+ccl_device_inline float3
+film_calculate_shadow_catcher(ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+                              ccl_global const float *ccl_restrict buffer)
+{
+  /* For the shadow catcher pass we divide combined pass by the shadow catcher.
+   * Note that denoised shadow catcher pass contains value which only needs ot be scaled (but not
+   * to be calculated as division). */
+
+  if (kfilm_convert->is_denoised) {
+    return film_calculate_shadow_catcher_denoised(kfilm_convert, buffer);
+  }
+
+  kernel_assert(kfilm_convert->pass_shadow_catcher_sample_count != PASS_UNUSED);
+
+  /* If there is no shadow catcher object in this pixel, there is no modification of the light
+   * needed, so return one. */
+  ccl_global const float *in_catcher_sample_count =
+      buffer + kfilm_convert->pass_shadow_catcher_sample_count;
+  const float num_samples = in_catcher_sample_count[0];
+  if (num_samples == 0.0f) {
+    return one_float3();
+  }
+
+  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+  ccl_global const float *in_catcher = buffer + kfilm_convert->pass_shadow_catcher;
+
+  /* NOTE: It is possible that the Shadow Catcher pass is requested as an output without actual
+   * shadow catcher objects in the scene. In this case there will be no auxiliary passes required
+   * for the decision (to save up memory). So delay the asserts to this point so that the number of
+   * samples check handles such configuration. */
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_combined != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
+
+  ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined;
+  ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
+
+  /* No scaling needed. The integration works in way that number of samples in the combined and
+   * shadow catcher passes are the same, and exposure is canceled during the division. */
+  const float3 color_catcher = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]);
+  const float3 color_combined = make_float3(in_combined[0], in_combined[1], in_combined[2]);
+  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]);
+
+  /* Need to ignore contribution of the matte object when doing division (otherwise there will be
+   * artifacts caused by anti-aliasing). Since combined pass is used for adaptive sampling and need
+   * to contain matte objects, we subtract matte objects contribution here. This is the same as if
+   * the matte objects were not accumulated to the combined pass. */
+  const float3 combined_no_matte = color_combined - color_matte;
+
+  const float3 shadow_catcher = safe_divide_shadow_catcher(combined_no_matte, color_catcher);
+
+  const float scale = film_get_scale(kfilm_convert, buffer);
+  const float transparency = in_combined[3] * scale;
+  const float alpha = film_transparency_to_alpha(transparency);
+
+  /* Alpha-over on white using transparency of the combined pass. This allows to eliminate
+   * artifacts which are happening on an edge of a shadow catcher when using transparent film.
+   * Note that we treat shadow catcher as straight alpha here because alpha got canceled out
+   * during the division. */
+  const float3 pixel = (1.0f - alpha) * one_float3() + alpha * shadow_catcher;
+
+  return pixel;
+}
+
+ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer)
+{
+  /* The approximation of the shadow is 1 - average(shadow_catcher_pass). A better approximation
+   * is possible.
+   *
+   * The matte is alpha-overed onto the shadow (which is kind of alpha-overing shadow onto footage,
+   * and then alpha-overing synthetic objects on top). */
+
+  kernel_assert(kfilm_convert->pass_offset != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_shadow_catcher != PASS_UNUSED);
+  kernel_assert(kfilm_convert->pass_shadow_catcher_matte != PASS_UNUSED);
+
+  float scale, scale_exposure;
+  if (!film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure)) {
+    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  }
+
+  ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
+
+  const float3 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer);
+  const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure;
+
+  const float transparency = in_matte[3] * scale;
+  const float alpha = saturate(1.0f - transparency);
+
+  const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;
+
+  if (kfilm_convert->use_approximate_shadow_catcher_background) {
+    kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
+
+    ccl_global const float *in_background = buffer + kfilm_convert->pass_background;
+    const float3 color_background = make_float3(
+                                        in_background[0], in_background[1], in_background[2]) *
+                                    scale_exposure;
+    const float3 alpha_over = color_matte + color_background * (1.0f - alpha_matte);
+    return make_float4(alpha_over.x, alpha_over.y, alpha_over.z, 1.0f);
+  }
+
+  return make_float4(color_matte.x, color_matte.y, color_matte.z, alpha_matte);
+}
+
+ccl_device_inline void film_get_pass_pixel_shadow_catcher(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components >= 3);
+
+  const float3 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer);
+
+  pixel[0] = pixel_value.x;
+  pixel[1] = pixel_value.y;
+  pixel[2] = pixel_value.z;
+}
+
+ccl_device_inline void film_get_pass_pixel_shadow_catcher_matte_with_shadow(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  kernel_assert(kfilm_convert->num_components == 3 || kfilm_convert->num_components == 4);
+
+  const float4 pixel_value = film_calculate_shadow_catcher_matte_with_shadow(kfilm_convert,
+                                                                             buffer);
+
+  pixel[0] = pixel_value.x;
+  pixel[1] = pixel_value.y;
+  pixel[2] = pixel_value.z;
+  if (kfilm_convert->num_components == 4) {
+    pixel[3] = pixel_value.w;
+  }
+}
+
+/* --------------------------------------------------------------------
+ * Compositing and overlays.
+ */
+
+ccl_device_inline void film_apply_pass_pixel_overlays_rgba(
+    ccl_global const KernelFilmConvert *ccl_restrict kfilm_convert,
+    ccl_global const float *ccl_restrict buffer,
+    ccl_private float *ccl_restrict pixel)
+{
+  if (kfilm_convert->show_active_pixels &&
+      kfilm_convert->pass_adaptive_aux_buffer != PASS_UNUSED) {
+    if (buffer[kfilm_convert->pass_adaptive_aux_buffer + 3] == 0.0f) {
+      const float3 active_rgb = make_float3(1.0f, 0.0f, 0.0f);
+      const float3 mix_rgb = interp(make_float3(pixel[0], pixel[1], pixel[2]), active_rgb, 0.5f);
+      pixel[0] = mix_rgb.x;
+      pixel[1] = mix_rgb.y;
+      pixel[2] = mix_rgb.z;
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/write_passes.h b/intern/cycles/kernel/film/write_passes.h
new file mode 100644
index 00000000000..9d379495629
--- /dev/null
+++ b/intern/cycles/kernel/film/write_passes.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifdef __KERNEL_GPU__
+#  define __ATOMIC_PASS_WRITE__
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+  atomic_add_and_fetch_float(buffer, value);
+#else
+  *buffer += value;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer,
+                                                float3 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+  ccl_global float *buf_x = buffer + 0;
+  ccl_global float *buf_y = buffer + 1;
+  ccl_global float *buf_z = buffer + 2;
+
+  atomic_add_and_fetch_float(buf_x, value.x);
+  atomic_add_and_fetch_float(buf_y, value.y);
+  atomic_add_and_fetch_float(buf_z, value.z);
+#else
+  buffer[0] += value.x;
+  buffer[1] += value.y;
+  buffer[2] += value.z;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer,
+                                                float4 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+  ccl_global float *buf_x = buffer + 0;
+  ccl_global float *buf_y = buffer + 1;
+  ccl_global float *buf_z = buffer + 2;
+  ccl_global float *buf_w = buffer + 3;
+
+  atomic_add_and_fetch_float(buf_x, value.x);
+  atomic_add_and_fetch_float(buf_y, value.y);
+  atomic_add_and_fetch_float(buf_z, value.z);
+  atomic_add_and_fetch_float(buf_w, value.w);
+#else
+  buffer[0] += value.x;
+  buffer[1] += value.y;
+  buffer[2] += value.z;
+  buffer[3] += value.w;
+#endif
+}
+
+ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
+{
+  return *buffer;
+}
+
+ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer)
+{
+  return make_float3(buffer[0], buffer[1], buffer[2]);
+}
+
+ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer)
+{
+  return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/attribute.h b/intern/cycles/kernel/geom/attribute.h
new file mode 100644
index 00000000000..848e0430caa
--- /dev/null
+++ b/intern/cycles/kernel/geom/attribute.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Attributes
+ *
+ * We support an arbitrary number of attributes on various mesh elements.
+ * On vertices, triangles, curve keys, curves, meshes and volume grids.
+ * Most of the code for attribute reading is in the primitive files.
+ *
+ * Lookup of attributes is different between OSL and SVM, as OSL is ustring
+ * based while for SVM we use integer ids. */
+
+ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd);
+
+ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
+    return ATTR_PRIM_SUBD;
+  }
+  else {
+    return ATTR_PRIM_GEOMETRY;
+  }
+}
+
+ccl_device_inline AttributeDescriptor attribute_not_found()
+{
+  const AttributeDescriptor desc = {
+      ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
+  return desc;
+}
+
+/* Find attribute based on ID */
+
+ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object)
+{
+  return kernel_tex_fetch(__objects, object).attribute_map_offset;
+}
+
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
+                                                     ccl_private const ShaderData *sd,
+                                                     uint id)
+{
+  if (sd->object == OBJECT_NONE) {
+    return attribute_not_found();
+  }
+
+  /* for SVM, find attribute by unique id */
+  uint attr_offset = object_attribute_map_offset(kg, sd->object);
+  attr_offset += attribute_primitive_type(kg, sd);
+  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+  while (attr_map.x != id) {
+    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
+      if (UNLIKELY(attr_map.y == 0)) {
+        return attribute_not_found();
+      }
+      else {
+        /* Chain jump to a different part of the table. */
+        attr_offset = attr_map.z;
+      }
+    }
+    else {
+      attr_offset += ATTR_PRIM_TYPES;
+    }
+    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+  }
+
+  AttributeDescriptor desc;
+  desc.element = (AttributeElement)attr_map.y;
+
+  if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
+      desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) {
+    return attribute_not_found();
+  }
+
+  /* return result */
+  desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+  desc.type = (NodeAttributeType)(attr_map.w & 0xff);
+  desc.flags = (AttributeFlag)(attr_map.w >> 8);
+
+  return desc;
+}
+
+/* Transform matrix attribute on meshes */
+
+ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
+                                                ccl_private const ShaderData *sd,
+                                                const AttributeDescriptor desc)
+{
+  Transform tfm;
+
+  tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
+  tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
+  tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
+
+  return tfm;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/curve.h b/intern/cycles/kernel/geom/curve.h
new file mode 100644
index 00000000000..7271193eef8
--- /dev/null
+++ b/intern/cycles/kernel/geom/curve.h
@@ -0,0 +1,328 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Curve Primitive
+ *
+ * Curve primitive for rendering hair and fur. These can be render as flat
+ * ribbons or curves with actual thickness. The curve can also be rendered as
+ * line segments rather than curves for better performance.
+ */
+
+#ifdef __HAIR__
+
+/* Reading attributes on various curve elements */
+
+ccl_device float curve_attribute_float(KernelGlobals kg,
+                                       ccl_private const ShaderData *sd,
+                                       const AttributeDescriptor desc,
+                                       ccl_private float *dx,
+                                       ccl_private float *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
+    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
+    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = 0.0f;
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+#  endif
+
+    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
+                                                                desc.offset;
+      return kernel_tex_fetch(__attributes_float, offset);
+    }
+    else {
+      return 0.0f;
+    }
+  }
+}
+
+ccl_device float2 curve_attribute_float2(KernelGlobals kg,
+                                         ccl_private const ShaderData *sd,
+                                         const AttributeDescriptor desc,
+                                         ccl_private float2 *dx,
+                                         ccl_private float2 *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
+    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
+    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+    /* idea: we can't derive any useful differentials here, but for tiled
+     * mipmap image caching it would be useful to avoid reading the highest
+     * detail level always. maybe a derivative based on the hair density
+     * could be computed somehow? */
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+#  endif
+
+    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
+                                                                desc.offset;
+      return kernel_tex_fetch(__attributes_float2, offset);
+    }
+    else {
+      return make_float2(0.0f, 0.0f);
+    }
+  }
+}
+
+ccl_device float3 curve_attribute_float3(KernelGlobals kg,
+                                         ccl_private const ShaderData *sd,
+                                         const AttributeDescriptor desc,
+                                         ccl_private float3 *dx,
+                                         ccl_private float3 *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
+    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
+    float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+#  endif
+
+    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
+                                                                desc.offset;
+      return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
+    }
+    else {
+      return make_float3(0.0f, 0.0f, 0.0f);
+    }
+  }
+}
+
+ccl_device float4 curve_attribute_float4(KernelGlobals kg,
+                                         ccl_private const ShaderData *sd,
+                                         const AttributeDescriptor desc,
+                                         ccl_private float4 *dx,
+                                         ccl_private float4 *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
+    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
+    float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * (f1 - f0);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#  endif
+
+    return (1.0f - sd->u) * f0 + sd->u * f1;
+  }
+  else {
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#  endif
+
+    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
+                                                                desc.offset;
+      return kernel_tex_fetch(__attributes_float3, offset);
+    }
+    else {
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+  }
+}
+
+/* Curve thickness */
+
+ccl_device float curve_thickness(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float r = 0.0f;
+
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
+    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
+    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+    int k1 = k0 + 1;
+
+    float4 P_curve[2];
+
+    if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
+      P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+      P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+    }
+    else {
+      motion_curve_keys_linear(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+    }
+
+    r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
+  }
+
+  return r * 2.0f;
+}
+
+/* Curve location for motion pass, linear interpolation between keys and
+ * ignoring radius because we do the same for the motion keys */
+
+ccl_device float3 curve_motion_center_location(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
+  int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+  int k1 = k0 + 1;
+
+  float4 P_curve[2];
+
+  P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
+  P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
+
+  return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
+}
+
+/* Curve tangent normal */
+
+ccl_device float3 curve_tangent_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
+
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
+
+    tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu)));
+    tgN = normalize(tgN);
+
+    /* need to find suitable scaled gd for corrected normal */
+#  if 0
+    tgN = normalize(tgN - gd * sd->dPdu);
+#  endif
+  }
+
+  return tgN;
+}
+
+/* Curve bounds utility function */
+
+ccl_device_inline void curvebounds(ccl_private float *lower,
+                                   ccl_private float *upper,
+                                   ccl_private float *extremta,
+                                   ccl_private float *extrema,
+                                   ccl_private float *extremtb,
+                                   ccl_private float *extremb,
+                                   float p0,
+                                   float p1,
+                                   float p2,
+                                   float p3)
+{
+  float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
+  float ta = -1.0f;
+  float tb = -1.0f;
+
+  *extremta = -1.0f;
+  *extremtb = -1.0f;
+  *upper = p0;
+  *lower = (p0 + p1) + (p2 + p3);
+  *extrema = *upper;
+  *extremb = *lower;
+
+  if (*lower >= *upper) {
+    *upper = *lower;
+    *lower = p0;
+  }
+
+  if (halfdiscroot >= 0) {
+    float inv3p3 = (1.0f / 3.0f) / p3;
+    halfdiscroot = sqrtf(halfdiscroot);
+    ta = (-p2 - halfdiscroot) * inv3p3;
+    tb = (-p2 + halfdiscroot) * inv3p3;
+  }
+
+  float t2;
+  float t3;
+
+  if (ta > 0.0f && ta < 1.0f) {
+    t2 = ta * ta;
+    t3 = t2 * ta;
+    *extremta = ta;
+    *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
+
+    *upper = fmaxf(*extrema, *upper);
+    *lower = fminf(*extrema, *lower);
+  }
+
+  if (tb > 0.0f && tb < 1.0f) {
+    t2 = tb * tb;
+    t3 = t2 * tb;
+    *extremtb = tb;
+    *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
+
+    *upper = fmaxf(*extremb, *upper);
+    *lower = fminf(*extremb, *lower);
+  }
+}
+
+#endif /* __HAIR__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/curve_intersect.h b/intern/cycles/kernel/geom/curve_intersect.h
new file mode 100644
index 00000000000..fb0b80b281f
--- /dev/null
+++ b/intern/cycles/kernel/geom/curve_intersect.h
@@ -0,0 +1,771 @@
+/*
+ * Copyright 2009-2020 Intel Corporation. Adapted from Embree with
+ * with modifications.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Curve primitive intersection functions.
+ *
+ * The code here was adapted from curve_intersector_sweep.h in Embree, to get
+ * an exact match between Embree CPU ray-tracing and our GPU ray-tracing. */
+
+#define CURVE_NUM_BEZIER_SUBDIVISIONS 3
+#define CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE (CURVE_NUM_BEZIER_SUBDIVISIONS + 1)
+#define CURVE_NUM_BEZIER_STEPS 2
+#define CURVE_NUM_JACOBIAN_ITERATIONS 5
+
+#ifdef __HAIR__
+
+/* Catmull-rom curve evaluation. */
+
+ccl_device_inline float4 catmull_rom_basis_eval(const float4 curve[4], float u)
+{
+  const float t = u;
+  const float s = 1.0f - u;
+  const float n0 = -t * s * s;
+  const float n1 = 2.0f + t * t * (3.0f * t - 5.0f);
+  const float n2 = 2.0f + s * s * (3.0f * s - 5.0f);
+  const float n3 = -s * t * t;
+  return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
+
+ccl_device_inline float4 catmull_rom_basis_derivative(const float4 curve[4], float u)
+{
+  const float t = u;
+  const float s = 1.0f - u;
+  const float n0 = -s * s + 2.0f * s * t;
+  const float n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
+  const float n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
+  const float n3 = -2.0f * s * t + t * t;
+  return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
+
+ccl_device_inline float4 catmull_rom_basis_derivative2(const float4 curve[4], float u)
+{
+
+  const float t = u;
+  const float n0 = -3.0f * t + 2.0f;
+  const float n1 = 9.0f * t - 5.0f;
+  const float n2 = -9.0f * t + 4.0f;
+  const float n3 = 3.0f * t - 1.0f;
+  return (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
+}
+
+/* Thick Curve */
+
+ccl_device_inline float3 dnormalize(const float3 p, const float3 dp)
+{
+  const float pp = dot(p, p);
+  const float pdp = dot(p, dp);
+  return (pp * dp - pdp * p) / (pp * sqrtf(pp));
+}
+
+ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const float3 Q1mQ0)
+{
+  const float3 N = cross(PmQ0, Q1mQ0);
+  const float3 D = Q1mQ0;
+  return dot(N, N) / dot(D, D);
+}
+
+ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
+                                          const float3 cylinder_end,
+                                          const float cylinder_radius,
+                                          const float3 ray_dir,
+                                          ccl_private float2 *t_o,
+                                          ccl_private float *u0_o,
+                                          ccl_private float3 *Ng0_o,
+                                          ccl_private float *u1_o,
+                                          ccl_private float3 *Ng1_o)
+{
+  /* Calculate quadratic equation to solve. */
+  const float rl = 1.0f / len(cylinder_end - cylinder_start);
+  const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
+  const float3 O = -P0, dO = ray_dir;
+
+  const float dOdO = dot(dO, dO);
+  const float OdO = dot(dO, O);
+  const float OO = dot(O, O);
+  const float dOz = dot(dP, dO);
+  const float Oz = dot(dP, O);
+
+  const float A = dOdO - sqr(dOz);
+  const float B = 2.0f * (OdO - dOz * Oz);
+  const float C = OO - sqr(Oz) - sqr(cylinder_radius);
+
+  /* We miss the cylinder if determinant is smaller than zero. */
+  const float D = B * B - 4.0f * A * C;
+  if (!(D >= 0.0f)) {
+    *t_o = make_float2(FLT_MAX, -FLT_MAX);
+    return false;
+  }
+
+  /* Special case for rays that are parallel to the cylinder. */
+  const float eps = 16.0f * FLT_EPSILON * max(fabsf(dOdO), fabsf(sqr(dOz)));
+  if (fabsf(A) < eps) {
+    if (C <= 0.0f) {
+      *t_o = make_float2(-FLT_MAX, FLT_MAX);
+      return true;
+    }
+    else {
+      *t_o = make_float2(-FLT_MAX, FLT_MAX);
+      return false;
+    }
+  }
+
+  /* Standard case for rays that are not parallel to the cylinder. */
+  const float Q = sqrtf(D);
+  const float rcp_2A = 1.0f / (2.0f * A);
+  const float t0 = (-B - Q) * rcp_2A;
+  const float t1 = (-B + Q) * rcp_2A;
+
+  /* Calculates u and Ng for near hit. */
+  {
+    *u0_o = (t0 * dOz + Oz) * rl;
+    const float3 Pr = t0 * ray_dir;
+    const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
+    *Ng0_o = Pr - Pl;
+  }
+
+  /* Calculates u and Ng for far hit. */
+  {
+    *u1_o = (t1 * dOz + Oz) * rl;
+    const float3 Pr = t1 * ray_dir;
+    const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
+    *Ng1_o = Pr - Pl;
+  }
+
+  *t_o = make_float2(t0, t1);
+
+  return true;
+}
+
+ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
+{
+  const float3 O = -P;
+  const float3 D = ray_dir;
+  const float ON = dot(O, N);
+  const float DN = dot(D, N);
+  const float min_rcp_input = 1e-18f;
+  const bool eps = fabsf(DN) < min_rcp_input;
+  const float t = -ON / DN;
+  const float lower = (eps || DN < 0.0f) ? -FLT_MAX : t;
+  const float upper = (eps || DN > 0.0f) ? FLT_MAX : t;
+  return make_float2(lower, upper);
+}
+
+ccl_device bool curve_intersect_iterative(const float3 ray_dir,
+                                          ccl_private float *ray_tfar,
+                                          const float dt,
+                                          const float4 curve[4],
+                                          float u,
+                                          float t,
+                                          const bool use_backfacing,
+                                          ccl_private Intersection *isect)
+{
+  const float length_ray_dir = len(ray_dir);
+
+  /* Error of curve evaluations is proportional to largest coordinate. */
+  const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
+  const float4 box_max = max(min(curve[0], curve[1]), max(curve[2], curve[3]));
+  const float4 box_abs = max(fabs(box_min), fabs(box_max));
+  const float P_err = 16.0f * FLT_EPSILON *
+                      max(box_abs.x, max(box_abs.y, max(box_abs.z, box_abs.w)));
+  const float radius_max = box_max.w;
+
+  for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
+    const float3 Q = ray_dir * t;
+    const float3 dQdt = ray_dir;
+    const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
+
+    const float4 P4 = catmull_rom_basis_eval(curve, u);
+    const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
+
+    const float3 P = float4_to_float3(P4);
+    const float3 dPdu = float4_to_float3(dPdu4);
+    const float radius = P4.w;
+    const float dradiusdu = dPdu4.w;
+
+    const float3 ddPdu = float4_to_float3(catmull_rom_basis_derivative2(curve, u));
+
+    const float3 R = Q - P;
+    const float len_R = len(R);
+    const float R_err = max(Q_err, P_err);
+    const float3 dRdu = -dPdu;
+    const float3 dRdt = dQdt;
+
+    const float3 T = normalize(dPdu);
+    const float3 dTdu = dnormalize(dPdu, ddPdu);
+    const float cos_err = P_err / len(dPdu);
+
+    const float f = dot(R, T);
+    const float f_err = len_R * P_err + R_err + cos_err * (1.0f + len_R);
+    const float dfdu = dot(dRdu, T) + dot(R, dTdu);
+    const float dfdt = dot(dRdt, T);
+
+    const float K = dot(R, R) - sqr(f);
+    const float dKdu = (dot(R, dRdu) - f * dfdu);
+    const float dKdt = (dot(R, dRdt) - f * dfdt);
+    const float rsqrt_K = inversesqrtf(K);
+
+    const float g = sqrtf(K) - radius;
+    const float g_err = R_err + f_err + 16.0f * FLT_EPSILON * radius_max;
+    const float dgdu = dKdu * rsqrt_K - dradiusdu;
+    const float dgdt = dKdt * rsqrt_K;
+
+    const float invdet = 1.0f / (dfdu * dgdt - dgdu * dfdt);
+    u -= (dgdt * f - dfdt * g) * invdet;
+    t -= (-dgdu * f + dfdu * g) * invdet;
+
+    if (fabsf(f) < f_err && fabsf(g) < g_err) {
+      t += dt;
+      if (!(0.0f <= t && t <= *ray_tfar)) {
+        return false; /* Rejects NaNs */
+      }
+      if (!(u >= 0.0f && u <= 1.0f)) {
+        return false; /* Rejects NaNs */
+      }
+
+      /* Back-face culling. */
+      const float3 R = normalize(Q - P);
+      const float3 U = dradiusdu * R + dPdu;
+      const float3 V = cross(dPdu, R);
+      const float3 Ng = cross(V, U);
+      if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
+        return false;
+      }
+
+      /* Record intersection. */
+      *ray_tfar = t;
+      isect->t = t;
+      isect->u = u;
+      isect->v = 0.0f;
+
+      return true;
+    }
+  }
+  return false;
+}
+
+ccl_device bool curve_intersect_recursive(const float3 ray_orig,
+                                          const float3 ray_dir,
+                                          float ray_tfar,
+                                          float4 curve[4],
+                                          ccl_private Intersection *isect)
+{
+  /* Move ray closer to make intersection stable. */
+  const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
+  const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
+  const float3 ref = ray_orig + ray_dir * dt;
+  const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
+  curve[0] -= ref4;
+  curve[1] -= ref4;
+  curve[2] -= ref4;
+  curve[3] -= ref4;
+
+  const bool use_backfacing = false;
+  const float step_size = 1.0f / (float)(CURVE_NUM_BEZIER_STEPS);
+
+  int depth = 0;
+
+  /* todo: optimize stack for GPU somehow? Possibly some bitflags are enough, and
+   * u0/u1 can be derived from the depth. */
+  struct {
+    float u0, u1;
+    int i;
+  } stack[CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE];
+
+  bool found = false;
+
+  float u0 = 0.0f;
+  float u1 = 1.0f;
+  int i = 0;
+
+  while (1) {
+    for (; i < CURVE_NUM_BEZIER_STEPS; i++) {
+      const float step = i * step_size;
+
+      /* Subdivide curve. */
+      const float dscale = (u1 - u0) * (1.0f / 3.0f) * step_size;
+      const float vu0 = mix(u0, u1, step);
+      const float vu1 = mix(u0, u1, step + step_size);
+
+      const float4 P0 = catmull_rom_basis_eval(curve, vu0);
+      const float4 dP0du = dscale * catmull_rom_basis_derivative(curve, vu0);
+      const float4 P3 = catmull_rom_basis_eval(curve, vu1);
+      const float4 dP3du = dscale * catmull_rom_basis_derivative(curve, vu1);
+
+      const float4 P1 = P0 + dP0du;
+      const float4 P2 = P3 - dP3du;
+
+      /* Calculate bounding cylinders. */
+      const float rr1 = sqr_point_to_line_distance(float4_to_float3(dP0du),
+                                                   float4_to_float3(P3 - P0));
+      const float rr2 = sqr_point_to_line_distance(float4_to_float3(dP3du),
+                                                   float4_to_float3(P3 - P0));
+      const float maxr12 = sqrtf(max(rr1, rr2));
+      const float one_plus_ulp = 1.0f + 2.0f * FLT_EPSILON;
+      const float one_minus_ulp = 1.0f - 2.0f * FLT_EPSILON;
+      float r_outer = max(max(P0.w, P1.w), max(P2.w, P3.w)) + maxr12;
+      float r_inner = min(min(P0.w, P1.w), min(P2.w, P3.w)) - maxr12;
+      r_outer = one_plus_ulp * r_outer;
+      r_inner = max(0.0f, one_minus_ulp * r_inner);
+      bool valid = true;
+
+      /* Intersect with outer cylinder. */
+      float2 tc_outer;
+      float u_outer0, u_outer1;
+      float3 Ng_outer0, Ng_outer1;
+      valid = cylinder_intersect(float4_to_float3(P0),
+                                 float4_to_float3(P3),
+                                 r_outer,
+                                 ray_dir,
+                                 &tc_outer,
+                                 &u_outer0,
+                                 &Ng_outer0,
+                                 &u_outer1,
+                                 &Ng_outer1);
+      if (!valid) {
+        continue;
+      }
+
+      /* Intersect with cap-planes. */
+      float2 tp = make_float2(-dt, ray_tfar - dt);
+      tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
+      const float2 h0 = half_plane_intersect(
+          float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
+      tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
+      const float2 h1 = half_plane_intersect(
+          float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
+      tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
+      valid = tp.x <= tp.y;
+      if (!valid) {
+        continue;
+      }
+
+      /* Clamp and correct u parameter. */
+      u_outer0 = clamp(u_outer0, 0.0f, 1.0f);
+      u_outer1 = clamp(u_outer1, 0.0f, 1.0f);
+      u_outer0 = mix(u0, u1, (step + u_outer0) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
+      u_outer1 = mix(u0, u1, (step + u_outer1) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
+
+      /* Intersect with inner cylinder. */
+      float2 tc_inner;
+      float u_inner0, u_inner1;
+      float3 Ng_inner0, Ng_inner1;
+      const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
+                                                  float4_to_float3(P3),
+                                                  r_inner,
+                                                  ray_dir,
+                                                  &tc_inner,
+                                                  &u_inner0,
+                                                  &Ng_inner0,
+                                                  &u_inner1,
+                                                  &Ng_inner1);
+
+      /* At the unstable area we subdivide deeper. */
+#  if 0
+      const bool unstable0 = (!valid_inner) |
+                             (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
+      const bool unstable1 = (!valid_inner) |
+                             (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
+#  else
+      /* On the GPU appears to be a little faster if always enabled. */
+      (void)valid_inner;
+
+      const bool unstable0 = true;
+      const bool unstable1 = true;
+#  endif
+
+      /* Subtract the inner interval from the current hit interval. */
+      float2 tp0 = make_float2(tp.x, min(tp.y, tc_inner.x));
+      float2 tp1 = make_float2(max(tp.x, tc_inner.y), tp.y);
+      bool valid0 = valid && (tp0.x <= tp0.y);
+      bool valid1 = valid && (tp1.x <= tp1.y);
+      if (!(valid0 || valid1)) {
+        continue;
+      }
+
+      /* Process one or two hits. */
+      bool recurse = false;
+      if (valid0) {
+        const int termDepth = unstable0 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
+                                          CURVE_NUM_BEZIER_SUBDIVISIONS;
+        if (depth >= termDepth) {
+          found |= curve_intersect_iterative(
+              ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
+        }
+        else {
+          recurse = true;
+        }
+      }
+
+      if (valid1 && (tp1.x + dt <= ray_tfar)) {
+        const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
+                                          CURVE_NUM_BEZIER_SUBDIVISIONS;
+        if (depth >= termDepth) {
+          found |= curve_intersect_iterative(
+              ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
+        }
+        else {
+          recurse = true;
+        }
+      }
+
+      if (recurse) {
+        stack[depth].u0 = u0;
+        stack[depth].u1 = u1;
+        stack[depth].i = i + 1;
+        depth++;
+
+        u0 = vu0;
+        u1 = vu1;
+        i = -1;
+      }
+    }
+
+    if (depth > 0) {
+      depth--;
+      u0 = stack[depth].u0;
+      u1 = stack[depth].u1;
+      i = stack[depth].i;
+    }
+    else {
+      break;
+    }
+  }
+
+  return found;
+}
+
+/* Ribbons */
+
+ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, const float r)
+{
+  /* Performs culling against a cylinder. */
+  const float2 dp = p2 - p1;
+  const float num = dp.x * p1.y - dp.y * p1.x;
+  const float den2 = dot(dp, dp);
+  return num * num <= r * r * den2;
+}
+
+/**
+ * Intersects a ray with a quad with back-face culling
+ * enabled. The quad v0,v1,v2,v3 is split into two triangles
+ * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
+ * triangles gets intersected.
+ */
+ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
+                                             const float3 quad_v0,
+                                             const float3 quad_v1,
+                                             const float3 quad_v2,
+                                             const float3 quad_v3,
+                                             ccl_private float *u_o,
+                                             ccl_private float *v_o,
+                                             ccl_private float *t_o)
+{
+  /* Calculate vertices relative to ray origin? */
+  const float3 O = make_float3(0.0f, 0.0f, 0.0f);
+  const float3 D = make_float3(0.0f, 0.0f, 1.0f);
+  const float3 va = quad_v0 - O;
+  const float3 vb = quad_v1 - O;
+  const float3 vc = quad_v2 - O;
+  const float3 vd = quad_v3 - O;
+
+  const float3 edb = vb - vd;
+  const float WW = dot(cross(vd, edb), D);
+  const float3 v0 = (WW <= 0.0f) ? va : vc;
+  const float3 v1 = (WW <= 0.0f) ? vb : vd;
+  const float3 v2 = (WW <= 0.0f) ? vd : vb;
+
+  /* Calculate edges? */
+  const float3 e0 = v2 - v0;
+  const float3 e1 = v0 - v1;
+
+  /* perform edge tests */
+  const float U = dot(cross(v0, e0), D);
+  const float V = dot(cross(v1, e1), D);
+  if (!(max(U, V) <= 0.0f)) {
+    return false;
+  }
+
+  /* Calculate geometry normal and denominator? */
+  const float3 Ng = cross(e1, e0);
+  const float den = dot(Ng, D);
+  const float rcpDen = 1.0f / den;
+
+  /* Perform depth test? */
+  const float t = rcpDen * dot(v0, Ng);
+  if (!(0.0f <= t && t <= ray_tfar)) {
+    return false;
+  }
+
+  /* Avoid division by 0? */
+  if (!(den != 0.0f)) {
+    return false;
+  }
+
+  /* Update hit information? */
+  *t_o = t;
+  *u_o = U * rcpDen;
+  *v_o = V * rcpDen;
+  *u_o = (WW <= 0.0f) ? *u_o : 1.0f - *u_o;
+  *v_o = (WW <= 0.0f) ? *v_o : 1.0f - *v_o;
+  return true;
+}
+
+ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
+{
+  const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
+  const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
+  ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
+  ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
+  ray_space[2] = ray_dir;
+}
+
+ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
+                                             const float3 ray_org,
+                                             const float4 P4)
+{
+  float3 P = float4_to_float3(P4) - ray_org;
+  return make_float4(dot(ray_space[0], P), dot(ray_space[1], P), dot(ray_space[2], P), P4.w);
+}
+
+ccl_device_inline bool ribbon_intersect(const float3 ray_org,
+                                        const float3 ray_dir,
+                                        float ray_tfar,
+                                        const int N,
+                                        float4 curve[4],
+                                        ccl_private Intersection *isect)
+{
+  /* Transform control points into ray space. */
+  float3 ray_space[3];
+  ribbon_ray_space(ray_dir, ray_space);
+
+  curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
+  curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
+  curve[2] = ribbon_to_ray_space(ray_space, ray_org, curve[2]);
+  curve[3] = ribbon_to_ray_space(ray_space, ray_org, curve[3]);
+
+  const float4 mx = max(max(fabs(curve[0]), fabs(curve[1])), max(fabs(curve[2]), fabs(curve[3])));
+  const float eps = 4.0f * FLT_EPSILON * max(max(mx.x, mx.y), max(mx.z, mx.w));
+  const float step_size = 1.0f / (float)N;
+
+  /* Evaluate first point and radius scaled normal direction. */
+  float4 p0 = catmull_rom_basis_eval(curve, 0.0f);
+  float3 dp0dt = float4_to_float3(catmull_rom_basis_derivative(curve, 0.0f));
+  if (max3(fabs(dp0dt)) < eps) {
+    const float4 p1 = catmull_rom_basis_eval(curve, step_size);
+    dp0dt = float4_to_float3(p1 - p0);
+  }
+  float3 wn0 = normalize(make_float3(dp0dt.y, -dp0dt.x, 0.0f)) * p0.w;
+
+  /* Evaluate the bezier curve. */
+  for (int i = 0; i < N; i++) {
+    const float u = i * step_size;
+    const float4 p1 = catmull_rom_basis_eval(curve, u + step_size);
+    const bool valid = cylinder_culling_test(
+        make_float2(p0.x, p0.y), make_float2(p1.x, p1.y), max(p0.w, p1.w));
+
+    /* Evaluate next point. */
+    float3 dp1dt = float4_to_float3(catmull_rom_basis_derivative(curve, u + step_size));
+    dp1dt = (max3(fabs(dp1dt)) < eps) ? float4_to_float3(p1 - p0) : dp1dt;
+    const float3 wn1 = normalize(make_float3(dp1dt.y, -dp1dt.x, 0.0f)) * p1.w;
+
+    if (valid) {
+      /* Construct quad coordinates. */
+      const float3 lp0 = float4_to_float3(p0) + wn0;
+      const float3 lp1 = float4_to_float3(p1) + wn1;
+      const float3 up0 = float4_to_float3(p0) - wn0;
+      const float3 up1 = float4_to_float3(p1) - wn1;
+
+      /* Intersect quad. */
+      float vu, vv, vt;
+      bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt);
+
+      if (valid0) {
+        /* ignore self intersections */
+        const float avoidance_factor = 2.0f;
+        if (avoidance_factor != 0.0f) {
+          float r = mix(p0.w, p1.w, vu);
+          valid0 = vt > avoidance_factor * r;
+        }
+
+        if (valid0) {
+          vv = 2.0f * vv - 1.0f;
+
+          /* Record intersection. */
+          ray_tfar = vt;
+          isect->t = vt;
+          isect->u = u + vu * step_size;
+          isect->v = vv;
+          return true;
+        }
+      }
+    }
+
+    /* Store point for next step. */
+    p0 = p1;
+    wn0 = wn1;
+  }
+  return false;
+}
+
+ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
+                                            ccl_private Intersection *isect,
+                                            const float3 P,
+                                            const float3 dir,
+                                            const float tmax,
+                                            int object,
+                                            int prim,
+                                            float time,
+                                            int type)
+{
+  const bool is_motion = (type & PRIMITIVE_ALL_MOTION);
+
+  KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
+
+  int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type);
+  int k1 = k0 + 1;
+  int ka = max(k0 - 1, kcurve.first_key);
+  int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
+
+  float4 curve[4];
+  if (!is_motion) {
+    curve[0] = kernel_tex_fetch(__curve_keys, ka);
+    curve[1] = kernel_tex_fetch(__curve_keys, k0);
+    curve[2] = kernel_tex_fetch(__curve_keys, k1);
+    curve[3] = kernel_tex_fetch(__curve_keys, kb);
+  }
+  else {
+    motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve);
+  }
+
+  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+    /* todo: adaptive number of subdivisions could help performance here. */
+    const int subdivisions = kernel_data.bvh.curve_subdivisions;
+    if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) {
+      isect->prim = prim;
+      isect->object = object;
+      isect->type = type;
+      return true;
+    }
+
+    return false;
+  }
+  else {
+    if (curve_intersect_recursive(P, dir, tmax, curve, isect)) {
+      isect->prim = prim;
+      isect->object = object;
+      isect->type = type;
+      return true;
+    }
+
+    return false;
+  }
+}
+
+ccl_device_inline void curve_shader_setup(KernelGlobals kg,
+                                          ccl_private ShaderData *sd,
+                                          float3 P,
+                                          float3 D,
+                                          float t,
+                                          const int isect_object,
+                                          const int isect_prim)
+{
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_inverse_transform(kg, sd);
+
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D * t);
+    D = safe_normalize_len(D, &t);
+  }
+
+  KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim);
+
+  int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+  int k1 = k0 + 1;
+  int ka = max(k0 - 1, kcurve.first_key);
+  int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
+
+  float4 P_curve[4];
+
+  if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
+    P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+    P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+    P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+    P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+  }
+  else {
+    motion_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+  }
+
+  P = P + D * t;
+
+  const float4 dPdu4 = catmull_rom_basis_derivative(P_curve, sd->u);
+  const float3 dPdu = float4_to_float3(dPdu4);
+
+  if (sd->type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
+    /* Rounded smooth normals for ribbons, to approximate thick curve shape. */
+    const float3 tangent = normalize(dPdu);
+    const float3 bitangent = normalize(cross(tangent, -D));
+    const float sine = sd->v;
+    const float cosine = safe_sqrtf(1.0f - sine * sine);
+
+    sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent)));
+    sd->Ng = -D;
+
+#  if 0
+    /* This approximates the position and geometric normal of a thick curve too,
+     * but gives too many issues with wrong self intersections. */
+    const float dPdu_radius = dPdu4.w;
+    sd->Ng = sd->N;
+    P += sd->N * dPdu_radius;
+#  endif
+  }
+  else {
+    /* Thick curves, compute normal using direction from inside the curve.
+     * This could be optimized by recording the normal in the intersection,
+     * however for Optix this would go beyond the size of the payload. */
+    /* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small
+     * radius). In this case use the view direction to approximate the normal. */
+    const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u));
+    const float3 Ng = (!isequal_float3(P, P_inside)) ? normalize(P - P_inside) : -sd->I;
+
+    sd->N = Ng;
+    sd->Ng = Ng;
+    sd->v = 0.0f;
+  }
+
+#  ifdef __DPDU__
+  /* dPdu/dPdv */
+  sd->dPdu = dPdu;
+  sd->dPdv = cross(dPdu, sd->Ng);
+#  endif
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_transform(kg, sd);
+    P = transform_point(&tfm, P);
+  }
+
+  sd->P = P;
+  sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id;
+}
+
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 4de824cc277..9d023375a35 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -17,21 +17,21 @@
 #pragma once
 
 // clang-format off
-#include "kernel/geom/geom_attribute.h"
-#include "kernel/geom/geom_object.h"
+#include "kernel/geom/attribute.h"
+#include "kernel/geom/object.h"
 #ifdef __PATCH_EVAL__
-#  include "kernel/geom/geom_patch.h"
+#  include "kernel/geom/patch.h"
 #endif
-#include "kernel/geom/geom_triangle.h"
-#include "kernel/geom/geom_subd_triangle.h"
-#include "kernel/geom/geom_triangle_intersect.h"
-#include "kernel/geom/geom_motion_triangle.h"
-#include "kernel/geom/geom_motion_triangle_intersect.h"
-#include "kernel/geom/geom_motion_triangle_shader.h"
-#include "kernel/geom/geom_motion_curve.h"
-#include "kernel/geom/geom_curve.h"
-#include "kernel/geom/geom_curve_intersect.h"
-#include "kernel/geom/geom_volume.h"
-#include "kernel/geom/geom_primitive.h"
-#include "kernel/geom/geom_shader_data.h"
+#include "kernel/geom/triangle.h"
+#include "kernel/geom/subd_triangle.h"
+#include "kernel/geom/triangle_intersect.h"
+#include "kernel/geom/motion_triangle.h"
+#include "kernel/geom/motion_triangle_intersect.h"
+#include "kernel/geom/motion_triangle_shader.h"
+#include "kernel/geom/motion_curve.h"
+#include "kernel/geom/curve.h"
+#include "kernel/geom/curve_intersect.h"
+#include "kernel/geom/volume.h"
+#include "kernel/geom/primitive.h"
+#include "kernel/geom/shader_data.h"
 // clang-format on
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
deleted file mode 100644
index 848e0430caa..00000000000
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Attributes
- *
- * We support an arbitrary number of attributes on various mesh elements.
- * On vertices, triangles, curve keys, curves, meshes and volume grids.
- * Most of the code for attribute reading is in the primitive files.
- *
- * Lookup of attributes is different between OSL and SVM, as OSL is ustring
- * based while for SVM we use integer ids. */
-
-ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd);
-
-ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
-    return ATTR_PRIM_SUBD;
-  }
-  else {
-    return ATTR_PRIM_GEOMETRY;
-  }
-}
-
-ccl_device_inline AttributeDescriptor attribute_not_found()
-{
-  const AttributeDescriptor desc = {
-      ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
-  return desc;
-}
-
-/* Find attribute based on ID */
-
-ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object)
-{
-  return kernel_tex_fetch(__objects, object).attribute_map_offset;
-}
-
-ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
-                                                     ccl_private const ShaderData *sd,
-                                                     uint id)
-{
-  if (sd->object == OBJECT_NONE) {
-    return attribute_not_found();
-  }
-
-  /* for SVM, find attribute by unique id */
-  uint attr_offset = object_attribute_map_offset(kg, sd->object);
-  attr_offset += attribute_primitive_type(kg, sd);
-  uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
-  while (attr_map.x != id) {
-    if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
-      if (UNLIKELY(attr_map.y == 0)) {
-        return attribute_not_found();
-      }
-      else {
-        /* Chain jump to a different part of the table. */
-        attr_offset = attr_map.z;
-      }
-    }
-    else {
-      attr_offset += ATTR_PRIM_TYPES;
-    }
-    attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-  }
-
-  AttributeDescriptor desc;
-  desc.element = (AttributeElement)attr_map.y;
-
-  if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
-      desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) {
-    return attribute_not_found();
-  }
-
-  /* return result */
-  desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
-  desc.type = (NodeAttributeType)(attr_map.w & 0xff);
-  desc.flags = (AttributeFlag)(attr_map.w >> 8);
-
-  return desc;
-}
-
-/* Transform matrix attribute on meshes */
-
-ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
-                                                ccl_private const ShaderData *sd,
-                                                const AttributeDescriptor desc)
-{
-  Transform tfm;
-
-  tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
-  tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
-  tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
-
-  return tfm;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
deleted file mode 100644
index 7271193eef8..00000000000
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Curve Primitive
- *
- * Curve primitive for rendering hair and fur. These can be render as flat
- * ribbons or curves with actual thickness. The curve can also be rendered as
- * line segments rather than curves for better performance.
- */
-
-#ifdef __HAIR__
-
-/* Reading attributes on various curve elements */
-
-ccl_device float curve_attribute_float(KernelGlobals kg,
-                                       ccl_private const ShaderData *sd,
-                                       const AttributeDescriptor desc,
-                                       ccl_private float *dx,
-                                       ccl_private float *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
-    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
-    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-    int k1 = k0 + 1;
-
-    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
-    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * (f1 - f0);
-    if (dy)
-      *dy = 0.0f;
-#  endif
-
-    return (1.0f - sd->u) * f0 + sd->u * f1;
-  }
-  else {
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = 0.0f;
-    if (dy)
-      *dy = 0.0f;
-#  endif
-
-    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
-                                                                desc.offset;
-      return kernel_tex_fetch(__attributes_float, offset);
-    }
-    else {
-      return 0.0f;
-    }
-  }
-}
-
-ccl_device float2 curve_attribute_float2(KernelGlobals kg,
-                                         ccl_private const ShaderData *sd,
-                                         const AttributeDescriptor desc,
-                                         ccl_private float2 *dx,
-                                         ccl_private float2 *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
-    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
-    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-    int k1 = k0 + 1;
-
-    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0);
-    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1);
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * (f1 - f0);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-#  endif
-
-    return (1.0f - sd->u) * f0 + sd->u * f1;
-  }
-  else {
-    /* idea: we can't derive any useful differentials here, but for tiled
-     * mipmap image caching it would be useful to avoid reading the highest
-     * detail level always. maybe a derivative based on the hair density
-     * could be computed somehow? */
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = make_float2(0.0f, 0.0f);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-#  endif
-
-    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
-                                                                desc.offset;
-      return kernel_tex_fetch(__attributes_float2, offset);
-    }
-    else {
-      return make_float2(0.0f, 0.0f);
-    }
-  }
-}
-
-ccl_device float3 curve_attribute_float3(KernelGlobals kg,
-                                         ccl_private const ShaderData *sd,
-                                         const AttributeDescriptor desc,
-                                         ccl_private float3 *dx,
-                                         ccl_private float3 *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
-    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
-    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-    int k1 = k0 + 1;
-
-    float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
-    float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * (f1 - f0);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-#  endif
-
-    return (1.0f - sd->u) * f0 + sd->u * f1;
-  }
-  else {
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = make_float3(0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-#  endif
-
-    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
-                                                                desc.offset;
-      return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
-    }
-    else {
-      return make_float3(0.0f, 0.0f, 0.0f);
-    }
-  }
-}
-
-ccl_device float4 curve_attribute_float4(KernelGlobals kg,
-                                         ccl_private const ShaderData *sd,
-                                         const AttributeDescriptor desc,
-                                         ccl_private float4 *dx,
-                                         ccl_private float4 *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
-    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
-    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-    int k1 = k0 + 1;
-
-    float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0);
-    float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + k1);
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * (f1 - f0);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#  endif
-
-    return (1.0f - sd->u) * f0 + sd->u * f1;
-  }
-  else {
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#  endif
-
-    if (desc.element & (ATTR_ELEMENT_CURVE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_CURVE) ? desc.offset + sd->prim :
-                                                                desc.offset;
-      return kernel_tex_fetch(__attributes_float3, offset);
-    }
-    else {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    }
-  }
-}
-
-/* Curve thickness */
-
-ccl_device float curve_thickness(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float r = 0.0f;
-
-  if (sd->type & PRIMITIVE_ALL_CURVE) {
-    KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
-    int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-    int k1 = k0 + 1;
-
-    float4 P_curve[2];
-
-    if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
-      P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
-      P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
-    }
-    else {
-      motion_curve_keys_linear(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
-    }
-
-    r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
-  }
-
-  return r * 2.0f;
-}
-
-/* Curve location for motion pass, linear interpolation between keys and
- * ignoring radius because we do the same for the motion keys */
-
-ccl_device float3 curve_motion_center_location(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  KernelCurve curve = kernel_tex_fetch(__curves, sd->prim);
-  int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-  int k1 = k0 + 1;
-
-  float4 P_curve[2];
-
-  P_curve[0] = kernel_tex_fetch(__curve_keys, k0);
-  P_curve[1] = kernel_tex_fetch(__curve_keys, k1);
-
-  return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
-}
-
-/* Curve tangent normal */
-
-ccl_device float3 curve_tangent_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float3 tgN = make_float3(0.0f, 0.0f, 0.0f);
-
-  if (sd->type & PRIMITIVE_ALL_CURVE) {
-
-    tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu)));
-    tgN = normalize(tgN);
-
-    /* need to find suitable scaled gd for corrected normal */
-#  if 0
-    tgN = normalize(tgN - gd * sd->dPdu);
-#  endif
-  }
-
-  return tgN;
-}
-
-/* Curve bounds utility function */
-
-ccl_device_inline void curvebounds(ccl_private float *lower,
-                                   ccl_private float *upper,
-                                   ccl_private float *extremta,
-                                   ccl_private float *extrema,
-                                   ccl_private float *extremtb,
-                                   ccl_private float *extremb,
-                                   float p0,
-                                   float p1,
-                                   float p2,
-                                   float p3)
-{
-  float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
-  float ta = -1.0f;
-  float tb = -1.0f;
-
-  *extremta = -1.0f;
-  *extremtb = -1.0f;
-  *upper = p0;
-  *lower = (p0 + p1) + (p2 + p3);
-  *extrema = *upper;
-  *extremb = *lower;
-
-  if (*lower >= *upper) {
-    *upper = *lower;
-    *lower = p0;
-  }
-
-  if (halfdiscroot >= 0) {
-    float inv3p3 = (1.0f / 3.0f) / p3;
-    halfdiscroot = sqrtf(halfdiscroot);
-    ta = (-p2 - halfdiscroot) * inv3p3;
-    tb = (-p2 + halfdiscroot) * inv3p3;
-  }
-
-  float t2;
-  float t3;
-
-  if (ta > 0.0f && ta < 1.0f) {
-    t2 = ta * ta;
-    t3 = t2 * ta;
-    *extremta = ta;
-    *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
-
-    *upper = fmaxf(*extrema, *upper);
-    *lower = fminf(*extrema, *lower);
-  }
-
-  if (tb > 0.0f && tb < 1.0f) {
-    t2 = tb * tb;
-    t3 = t2 * tb;
-    *extremtb = tb;
-    *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
-
-    *upper = fmaxf(*extremb, *upper);
-    *lower = fminf(*extremb, *lower);
-  }
-}
-
-#endif /* __HAIR__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
deleted file mode 100644
index fb0b80b281f..00000000000
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ /dev/null
@@ -1,771 +0,0 @@
-/*
- * Copyright 2009-2020 Intel Corporation. Adapted from Embree with
- * with modifications.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Curve primitive intersection functions.
- *
- * The code here was adapted from curve_intersector_sweep.h in Embree, to get
- * an exact match between Embree CPU ray-tracing and our GPU ray-tracing. */
-
-#define CURVE_NUM_BEZIER_SUBDIVISIONS 3
-#define CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE (CURVE_NUM_BEZIER_SUBDIVISIONS + 1)
-#define CURVE_NUM_BEZIER_STEPS 2
-#define CURVE_NUM_JACOBIAN_ITERATIONS 5
-
-#ifdef __HAIR__
-
-/* Catmull-rom curve evaluation. */
-
-ccl_device_inline float4 catmull_rom_basis_eval(const float4 curve[4], float u)
-{
-  const float t = u;
-  const float s = 1.0f - u;
-  const float n0 = -t * s * s;
-  const float n1 = 2.0f + t * t * (3.0f * t - 5.0f);
-  const float n2 = 2.0f + s * s * (3.0f * s - 5.0f);
-  const float n3 = -s * t * t;
-  return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
-}
-
-ccl_device_inline float4 catmull_rom_basis_derivative(const float4 curve[4], float u)
-{
-  const float t = u;
-  const float s = 1.0f - u;
-  const float n0 = -s * s + 2.0f * s * t;
-  const float n1 = 2.0f * t * (3.0f * t - 5.0f) + 3.0f * t * t;
-  const float n2 = 2.0f * s * (3.0f * t + 2.0f) - 3.0f * s * s;
-  const float n3 = -2.0f * s * t + t * t;
-  return 0.5f * (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
-}
-
-ccl_device_inline float4 catmull_rom_basis_derivative2(const float4 curve[4], float u)
-{
-
-  const float t = u;
-  const float n0 = -3.0f * t + 2.0f;
-  const float n1 = 9.0f * t - 5.0f;
-  const float n2 = -9.0f * t + 4.0f;
-  const float n3 = 3.0f * t - 1.0f;
-  return (curve[0] * n0 + curve[1] * n1 + curve[2] * n2 + curve[3] * n3);
-}
-
-/* Thick Curve */
-
-ccl_device_inline float3 dnormalize(const float3 p, const float3 dp)
-{
-  const float pp = dot(p, p);
-  const float pdp = dot(p, dp);
-  return (pp * dp - pdp * p) / (pp * sqrtf(pp));
-}
-
-ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const float3 Q1mQ0)
-{
-  const float3 N = cross(PmQ0, Q1mQ0);
-  const float3 D = Q1mQ0;
-  return dot(N, N) / dot(D, D);
-}
-
-ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
-                                          const float3 cylinder_end,
-                                          const float cylinder_radius,
-                                          const float3 ray_dir,
-                                          ccl_private float2 *t_o,
-                                          ccl_private float *u0_o,
-                                          ccl_private float3 *Ng0_o,
-                                          ccl_private float *u1_o,
-                                          ccl_private float3 *Ng1_o)
-{
-  /* Calculate quadratic equation to solve. */
-  const float rl = 1.0f / len(cylinder_end - cylinder_start);
-  const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
-  const float3 O = -P0, dO = ray_dir;
-
-  const float dOdO = dot(dO, dO);
-  const float OdO = dot(dO, O);
-  const float OO = dot(O, O);
-  const float dOz = dot(dP, dO);
-  const float Oz = dot(dP, O);
-
-  const float A = dOdO - sqr(dOz);
-  const float B = 2.0f * (OdO - dOz * Oz);
-  const float C = OO - sqr(Oz) - sqr(cylinder_radius);
-
-  /* We miss the cylinder if determinant is smaller than zero. */
-  const float D = B * B - 4.0f * A * C;
-  if (!(D >= 0.0f)) {
-    *t_o = make_float2(FLT_MAX, -FLT_MAX);
-    return false;
-  }
-
-  /* Special case for rays that are parallel to the cylinder. */
-  const float eps = 16.0f * FLT_EPSILON * max(fabsf(dOdO), fabsf(sqr(dOz)));
-  if (fabsf(A) < eps) {
-    if (C <= 0.0f) {
-      *t_o = make_float2(-FLT_MAX, FLT_MAX);
-      return true;
-    }
-    else {
-      *t_o = make_float2(-FLT_MAX, FLT_MAX);
-      return false;
-    }
-  }
-
-  /* Standard case for rays that are not parallel to the cylinder. */
-  const float Q = sqrtf(D);
-  const float rcp_2A = 1.0f / (2.0f * A);
-  const float t0 = (-B - Q) * rcp_2A;
-  const float t1 = (-B + Q) * rcp_2A;
-
-  /* Calculates u and Ng for near hit. */
-  {
-    *u0_o = (t0 * dOz + Oz) * rl;
-    const float3 Pr = t0 * ray_dir;
-    const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
-    *Ng0_o = Pr - Pl;
-  }
-
-  /* Calculates u and Ng for far hit. */
-  {
-    *u1_o = (t1 * dOz + Oz) * rl;
-    const float3 Pr = t1 * ray_dir;
-    const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
-    *Ng1_o = Pr - Pl;
-  }
-
-  *t_o = make_float2(t0, t1);
-
-  return true;
-}
-
-ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
-{
-  const float3 O = -P;
-  const float3 D = ray_dir;
-  const float ON = dot(O, N);
-  const float DN = dot(D, N);
-  const float min_rcp_input = 1e-18f;
-  const bool eps = fabsf(DN) < min_rcp_input;
-  const float t = -ON / DN;
-  const float lower = (eps || DN < 0.0f) ? -FLT_MAX : t;
-  const float upper = (eps || DN > 0.0f) ? FLT_MAX : t;
-  return make_float2(lower, upper);
-}
-
-ccl_device bool curve_intersect_iterative(const float3 ray_dir,
-                                          ccl_private float *ray_tfar,
-                                          const float dt,
-                                          const float4 curve[4],
-                                          float u,
-                                          float t,
-                                          const bool use_backfacing,
-                                          ccl_private Intersection *isect)
-{
-  const float length_ray_dir = len(ray_dir);
-
-  /* Error of curve evaluations is proportional to largest coordinate. */
-  const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
-  const float4 box_max = max(min(curve[0], curve[1]), max(curve[2], curve[3]));
-  const float4 box_abs = max(fabs(box_min), fabs(box_max));
-  const float P_err = 16.0f * FLT_EPSILON *
-                      max(box_abs.x, max(box_abs.y, max(box_abs.z, box_abs.w)));
-  const float radius_max = box_max.w;
-
-  for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
-    const float3 Q = ray_dir * t;
-    const float3 dQdt = ray_dir;
-    const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
-
-    const float4 P4 = catmull_rom_basis_eval(curve, u);
-    const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
-
-    const float3 P = float4_to_float3(P4);
-    const float3 dPdu = float4_to_float3(dPdu4);
-    const float radius = P4.w;
-    const float dradiusdu = dPdu4.w;
-
-    const float3 ddPdu = float4_to_float3(catmull_rom_basis_derivative2(curve, u));
-
-    const float3 R = Q - P;
-    const float len_R = len(R);
-    const float R_err = max(Q_err, P_err);
-    const float3 dRdu = -dPdu;
-    const float3 dRdt = dQdt;
-
-    const float3 T = normalize(dPdu);
-    const float3 dTdu = dnormalize(dPdu, ddPdu);
-    const float cos_err = P_err / len(dPdu);
-
-    const float f = dot(R, T);
-    const float f_err = len_R * P_err + R_err + cos_err * (1.0f + len_R);
-    const float dfdu = dot(dRdu, T) + dot(R, dTdu);
-    const float dfdt = dot(dRdt, T);
-
-    const float K = dot(R, R) - sqr(f);
-    const float dKdu = (dot(R, dRdu) - f * dfdu);
-    const float dKdt = (dot(R, dRdt) - f * dfdt);
-    const float rsqrt_K = inversesqrtf(K);
-
-    const float g = sqrtf(K) - radius;
-    const float g_err = R_err + f_err + 16.0f * FLT_EPSILON * radius_max;
-    const float dgdu = dKdu * rsqrt_K - dradiusdu;
-    const float dgdt = dKdt * rsqrt_K;
-
-    const float invdet = 1.0f / (dfdu * dgdt - dgdu * dfdt);
-    u -= (dgdt * f - dfdt * g) * invdet;
-    t -= (-dgdu * f + dfdu * g) * invdet;
-
-    if (fabsf(f) < f_err && fabsf(g) < g_err) {
-      t += dt;
-      if (!(0.0f <= t && t <= *ray_tfar)) {
-        return false; /* Rejects NaNs */
-      }
-      if (!(u >= 0.0f && u <= 1.0f)) {
-        return false; /* Rejects NaNs */
-      }
-
-      /* Back-face culling. */
-      const float3 R = normalize(Q - P);
-      const float3 U = dradiusdu * R + dPdu;
-      const float3 V = cross(dPdu, R);
-      const float3 Ng = cross(V, U);
-      if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
-        return false;
-      }
-
-      /* Record intersection. */
-      *ray_tfar = t;
-      isect->t = t;
-      isect->u = u;
-      isect->v = 0.0f;
-
-      return true;
-    }
-  }
-  return false;
-}
-
-ccl_device bool curve_intersect_recursive(const float3 ray_orig,
-                                          const float3 ray_dir,
-                                          float ray_tfar,
-                                          float4 curve[4],
-                                          ccl_private Intersection *isect)
-{
-  /* Move ray closer to make intersection stable. */
-  const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
-  const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
-  const float3 ref = ray_orig + ray_dir * dt;
-  const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
-  curve[0] -= ref4;
-  curve[1] -= ref4;
-  curve[2] -= ref4;
-  curve[3] -= ref4;
-
-  const bool use_backfacing = false;
-  const float step_size = 1.0f / (float)(CURVE_NUM_BEZIER_STEPS);
-
-  int depth = 0;
-
-  /* todo: optimize stack for GPU somehow? Possibly some bitflags are enough, and
-   * u0/u1 can be derived from the depth. */
-  struct {
-    float u0, u1;
-    int i;
-  } stack[CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE];
-
-  bool found = false;
-
-  float u0 = 0.0f;
-  float u1 = 1.0f;
-  int i = 0;
-
-  while (1) {
-    for (; i < CURVE_NUM_BEZIER_STEPS; i++) {
-      const float step = i * step_size;
-
-      /* Subdivide curve. */
-      const float dscale = (u1 - u0) * (1.0f / 3.0f) * step_size;
-      const float vu0 = mix(u0, u1, step);
-      const float vu1 = mix(u0, u1, step + step_size);
-
-      const float4 P0 = catmull_rom_basis_eval(curve, vu0);
-      const float4 dP0du = dscale * catmull_rom_basis_derivative(curve, vu0);
-      const float4 P3 = catmull_rom_basis_eval(curve, vu1);
-      const float4 dP3du = dscale * catmull_rom_basis_derivative(curve, vu1);
-
-      const float4 P1 = P0 + dP0du;
-      const float4 P2 = P3 - dP3du;
-
-      /* Calculate bounding cylinders. */
-      const float rr1 = sqr_point_to_line_distance(float4_to_float3(dP0du),
-                                                   float4_to_float3(P3 - P0));
-      const float rr2 = sqr_point_to_line_distance(float4_to_float3(dP3du),
-                                                   float4_to_float3(P3 - P0));
-      const float maxr12 = sqrtf(max(rr1, rr2));
-      const float one_plus_ulp = 1.0f + 2.0f * FLT_EPSILON;
-      const float one_minus_ulp = 1.0f - 2.0f * FLT_EPSILON;
-      float r_outer = max(max(P0.w, P1.w), max(P2.w, P3.w)) + maxr12;
-      float r_inner = min(min(P0.w, P1.w), min(P2.w, P3.w)) - maxr12;
-      r_outer = one_plus_ulp * r_outer;
-      r_inner = max(0.0f, one_minus_ulp * r_inner);
-      bool valid = true;
-
-      /* Intersect with outer cylinder. */
-      float2 tc_outer;
-      float u_outer0, u_outer1;
-      float3 Ng_outer0, Ng_outer1;
-      valid = cylinder_intersect(float4_to_float3(P0),
-                                 float4_to_float3(P3),
-                                 r_outer,
-                                 ray_dir,
-                                 &tc_outer,
-                                 &u_outer0,
-                                 &Ng_outer0,
-                                 &u_outer1,
-                                 &Ng_outer1);
-      if (!valid) {
-        continue;
-      }
-
-      /* Intersect with cap-planes. */
-      float2 tp = make_float2(-dt, ray_tfar - dt);
-      tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
-      const float2 h0 = half_plane_intersect(
-          float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
-      tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
-      const float2 h1 = half_plane_intersect(
-          float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
-      tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
-      valid = tp.x <= tp.y;
-      if (!valid) {
-        continue;
-      }
-
-      /* Clamp and correct u parameter. */
-      u_outer0 = clamp(u_outer0, 0.0f, 1.0f);
-      u_outer1 = clamp(u_outer1, 0.0f, 1.0f);
-      u_outer0 = mix(u0, u1, (step + u_outer0) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
-      u_outer1 = mix(u0, u1, (step + u_outer1) * (1.0f / (float)(CURVE_NUM_BEZIER_STEPS + 1)));
-
-      /* Intersect with inner cylinder. */
-      float2 tc_inner;
-      float u_inner0, u_inner1;
-      float3 Ng_inner0, Ng_inner1;
-      const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
-                                                  float4_to_float3(P3),
-                                                  r_inner,
-                                                  ray_dir,
-                                                  &tc_inner,
-                                                  &u_inner0,
-                                                  &Ng_inner0,
-                                                  &u_inner1,
-                                                  &Ng_inner1);
-
-      /* At the unstable area we subdivide deeper. */
-#  if 0
-      const bool unstable0 = (!valid_inner) |
-                             (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
-      const bool unstable1 = (!valid_inner) |
-                             (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
-#  else
-      /* On the GPU appears to be a little faster if always enabled. */
-      (void)valid_inner;
-
-      const bool unstable0 = true;
-      const bool unstable1 = true;
-#  endif
-
-      /* Subtract the inner interval from the current hit interval. */
-      float2 tp0 = make_float2(tp.x, min(tp.y, tc_inner.x));
-      float2 tp1 = make_float2(max(tp.x, tc_inner.y), tp.y);
-      bool valid0 = valid && (tp0.x <= tp0.y);
-      bool valid1 = valid && (tp1.x <= tp1.y);
-      if (!(valid0 || valid1)) {
-        continue;
-      }
-
-      /* Process one or two hits. */
-      bool recurse = false;
-      if (valid0) {
-        const int termDepth = unstable0 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
-                                          CURVE_NUM_BEZIER_SUBDIVISIONS;
-        if (depth >= termDepth) {
-          found |= curve_intersect_iterative(
-              ray_dir, &ray_tfar, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
-        }
-        else {
-          recurse = true;
-        }
-      }
-
-      if (valid1 && (tp1.x + dt <= ray_tfar)) {
-        const int termDepth = unstable1 ? CURVE_NUM_BEZIER_SUBDIVISIONS_UNSTABLE :
-                                          CURVE_NUM_BEZIER_SUBDIVISIONS;
-        if (depth >= termDepth) {
-          found |= curve_intersect_iterative(
-              ray_dir, &ray_tfar, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
-        }
-        else {
-          recurse = true;
-        }
-      }
-
-      if (recurse) {
-        stack[depth].u0 = u0;
-        stack[depth].u1 = u1;
-        stack[depth].i = i + 1;
-        depth++;
-
-        u0 = vu0;
-        u1 = vu1;
-        i = -1;
-      }
-    }
-
-    if (depth > 0) {
-      depth--;
-      u0 = stack[depth].u0;
-      u1 = stack[depth].u1;
-      i = stack[depth].i;
-    }
-    else {
-      break;
-    }
-  }
-
-  return found;
-}
-
-/* Ribbons */
-
-ccl_device_inline bool cylinder_culling_test(const float2 p1, const float2 p2, const float r)
-{
-  /* Performs culling against a cylinder. */
-  const float2 dp = p2 - p1;
-  const float num = dp.x * p1.y - dp.y * p1.x;
-  const float den2 = dot(dp, dp);
-  return num * num <= r * r * den2;
-}
-
-/**
- * Intersects a ray with a quad with back-face culling
- * enabled. The quad v0,v1,v2,v3 is split into two triangles
- * v0,v1,v3 and v2,v3,v1. The edge v1,v2 decides which of the two
- * triangles gets intersected.
- */
-ccl_device_inline bool ribbon_intersect_quad(const float ray_tfar,
-                                             const float3 quad_v0,
-                                             const float3 quad_v1,
-                                             const float3 quad_v2,
-                                             const float3 quad_v3,
-                                             ccl_private float *u_o,
-                                             ccl_private float *v_o,
-                                             ccl_private float *t_o)
-{
-  /* Calculate vertices relative to ray origin? */
-  const float3 O = make_float3(0.0f, 0.0f, 0.0f);
-  const float3 D = make_float3(0.0f, 0.0f, 1.0f);
-  const float3 va = quad_v0 - O;
-  const float3 vb = quad_v1 - O;
-  const float3 vc = quad_v2 - O;
-  const float3 vd = quad_v3 - O;
-
-  const float3 edb = vb - vd;
-  const float WW = dot(cross(vd, edb), D);
-  const float3 v0 = (WW <= 0.0f) ? va : vc;
-  const float3 v1 = (WW <= 0.0f) ? vb : vd;
-  const float3 v2 = (WW <= 0.0f) ? vd : vb;
-
-  /* Calculate edges? */
-  const float3 e0 = v2 - v0;
-  const float3 e1 = v0 - v1;
-
-  /* perform edge tests */
-  const float U = dot(cross(v0, e0), D);
-  const float V = dot(cross(v1, e1), D);
-  if (!(max(U, V) <= 0.0f)) {
-    return false;
-  }
-
-  /* Calculate geometry normal and denominator? */
-  const float3 Ng = cross(e1, e0);
-  const float den = dot(Ng, D);
-  const float rcpDen = 1.0f / den;
-
-  /* Perform depth test? */
-  const float t = rcpDen * dot(v0, Ng);
-  if (!(0.0f <= t && t <= ray_tfar)) {
-    return false;
-  }
-
-  /* Avoid division by 0? */
-  if (!(den != 0.0f)) {
-    return false;
-  }
-
-  /* Update hit information? */
-  *t_o = t;
-  *u_o = U * rcpDen;
-  *v_o = V * rcpDen;
-  *u_o = (WW <= 0.0f) ? *u_o : 1.0f - *u_o;
-  *v_o = (WW <= 0.0f) ? *v_o : 1.0f - *v_o;
-  return true;
-}
-
-ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
-{
-  const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
-  const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
-  ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
-  ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
-  ray_space[2] = ray_dir;
-}
-
-ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
-                                             const float3 ray_org,
-                                             const float4 P4)
-{
-  float3 P = float4_to_float3(P4) - ray_org;
-  return make_float4(dot(ray_space[0], P), dot(ray_space[1], P), dot(ray_space[2], P), P4.w);
-}
-
-ccl_device_inline bool ribbon_intersect(const float3 ray_org,
-                                        const float3 ray_dir,
-                                        float ray_tfar,
-                                        const int N,
-                                        float4 curve[4],
-                                        ccl_private Intersection *isect)
-{
-  /* Transform control points into ray space. */
-  float3 ray_space[3];
-  ribbon_ray_space(ray_dir, ray_space);
-
-  curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
-  curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
-  curve[2] = ribbon_to_ray_space(ray_space, ray_org, curve[2]);
-  curve[3] = ribbon_to_ray_space(ray_space, ray_org, curve[3]);
-
-  const float4 mx = max(max(fabs(curve[0]), fabs(curve[1])), max(fabs(curve[2]), fabs(curve[3])));
-  const float eps = 4.0f * FLT_EPSILON * max(max(mx.x, mx.y), max(mx.z, mx.w));
-  const float step_size = 1.0f / (float)N;
-
-  /* Evaluate first point and radius scaled normal direction. */
-  float4 p0 = catmull_rom_basis_eval(curve, 0.0f);
-  float3 dp0dt = float4_to_float3(catmull_rom_basis_derivative(curve, 0.0f));
-  if (max3(fabs(dp0dt)) < eps) {
-    const float4 p1 = catmull_rom_basis_eval(curve, step_size);
-    dp0dt = float4_to_float3(p1 - p0);
-  }
-  float3 wn0 = normalize(make_float3(dp0dt.y, -dp0dt.x, 0.0f)) * p0.w;
-
-  /* Evaluate the bezier curve. */
-  for (int i = 0; i < N; i++) {
-    const float u = i * step_size;
-    const float4 p1 = catmull_rom_basis_eval(curve, u + step_size);
-    const bool valid = cylinder_culling_test(
-        make_float2(p0.x, p0.y), make_float2(p1.x, p1.y), max(p0.w, p1.w));
-
-    /* Evaluate next point. */
-    float3 dp1dt = float4_to_float3(catmull_rom_basis_derivative(curve, u + step_size));
-    dp1dt = (max3(fabs(dp1dt)) < eps) ? float4_to_float3(p1 - p0) : dp1dt;
-    const float3 wn1 = normalize(make_float3(dp1dt.y, -dp1dt.x, 0.0f)) * p1.w;
-
-    if (valid) {
-      /* Construct quad coordinates. */
-      const float3 lp0 = float4_to_float3(p0) + wn0;
-      const float3 lp1 = float4_to_float3(p1) + wn1;
-      const float3 up0 = float4_to_float3(p0) - wn0;
-      const float3 up1 = float4_to_float3(p1) - wn1;
-
-      /* Intersect quad. */
-      float vu, vv, vt;
-      bool valid0 = ribbon_intersect_quad(ray_tfar, lp0, lp1, up1, up0, &vu, &vv, &vt);
-
-      if (valid0) {
-        /* ignore self intersections */
-        const float avoidance_factor = 2.0f;
-        if (avoidance_factor != 0.0f) {
-          float r = mix(p0.w, p1.w, vu);
-          valid0 = vt > avoidance_factor * r;
-        }
-
-        if (valid0) {
-          vv = 2.0f * vv - 1.0f;
-
-          /* Record intersection. */
-          ray_tfar = vt;
-          isect->t = vt;
-          isect->u = u + vu * step_size;
-          isect->v = vv;
-          return true;
-        }
-      }
-    }
-
-    /* Store point for next step. */
-    p0 = p1;
-    wn0 = wn1;
-  }
-  return false;
-}
-
-ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
-                                            ccl_private Intersection *isect,
-                                            const float3 P,
-                                            const float3 dir,
-                                            const float tmax,
-                                            int object,
-                                            int prim,
-                                            float time,
-                                            int type)
-{
-  const bool is_motion = (type & PRIMITIVE_ALL_MOTION);
-
-  KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
-
-  int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type);
-  int k1 = k0 + 1;
-  int ka = max(k0 - 1, kcurve.first_key);
-  int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
-
-  float4 curve[4];
-  if (!is_motion) {
-    curve[0] = kernel_tex_fetch(__curve_keys, ka);
-    curve[1] = kernel_tex_fetch(__curve_keys, k0);
-    curve[2] = kernel_tex_fetch(__curve_keys, k1);
-    curve[3] = kernel_tex_fetch(__curve_keys, kb);
-  }
-  else {
-    motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve);
-  }
-
-  if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
-    /* todo: adaptive number of subdivisions could help performance here. */
-    const int subdivisions = kernel_data.bvh.curve_subdivisions;
-    if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) {
-      isect->prim = prim;
-      isect->object = object;
-      isect->type = type;
-      return true;
-    }
-
-    return false;
-  }
-  else {
-    if (curve_intersect_recursive(P, dir, tmax, curve, isect)) {
-      isect->prim = prim;
-      isect->object = object;
-      isect->type = type;
-      return true;
-    }
-
-    return false;
-  }
-}
-
-ccl_device_inline void curve_shader_setup(KernelGlobals kg,
-                                          ccl_private ShaderData *sd,
-                                          float3 P,
-                                          float3 D,
-                                          float t,
-                                          const int isect_object,
-                                          const int isect_prim)
-{
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_inverse_transform(kg, sd);
-
-    P = transform_point(&tfm, P);
-    D = transform_direction(&tfm, D * t);
-    D = safe_normalize_len(D, &t);
-  }
-
-  KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim);
-
-  int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-  int k1 = k0 + 1;
-  int ka = max(k0 - 1, kcurve.first_key);
-  int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
-
-  float4 P_curve[4];
-
-  if (!(sd->type & PRIMITIVE_ALL_MOTION)) {
-    P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
-    P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
-    P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
-    P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
-  }
-  else {
-    motion_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
-  }
-
-  P = P + D * t;
-
-  const float4 dPdu4 = catmull_rom_basis_derivative(P_curve, sd->u);
-  const float3 dPdu = float4_to_float3(dPdu4);
-
-  if (sd->type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) {
-    /* Rounded smooth normals for ribbons, to approximate thick curve shape. */
-    const float3 tangent = normalize(dPdu);
-    const float3 bitangent = normalize(cross(tangent, -D));
-    const float sine = sd->v;
-    const float cosine = safe_sqrtf(1.0f - sine * sine);
-
-    sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent)));
-    sd->Ng = -D;
-
-#  if 0
-    /* This approximates the position and geometric normal of a thick curve too,
-     * but gives too many issues with wrong self intersections. */
-    const float dPdu_radius = dPdu4.w;
-    sd->Ng = sd->N;
-    P += sd->N * dPdu_radius;
-#  endif
-  }
-  else {
-    /* Thick curves, compute normal using direction from inside the curve.
-     * This could be optimized by recording the normal in the intersection,
-     * however for Optix this would go beyond the size of the payload. */
-    /* NOTE: It is possible that P will be the same as P_inside (precision issues, or very small
-     * radius). In this case use the view direction to approximate the normal. */
-    const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, sd->u));
-    const float3 Ng = (!isequal_float3(P, P_inside)) ? normalize(P - P_inside) : -sd->I;
-
-    sd->N = Ng;
-    sd->Ng = Ng;
-    sd->v = 0.0f;
-  }
-
-#  ifdef __DPDU__
-  /* dPdu/dPdv */
-  sd->dPdu = dPdu;
-  sd->dPdv = cross(dPdu, sd->Ng);
-#  endif
-
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_transform(kg, sd);
-    P = transform_point(&tfm, P);
-  }
-
-  sd->P = P;
-  sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id;
-}
-
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
deleted file mode 100644
index 2dd213d43f6..00000000000
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Motion Curve Primitive
- *
- * These are stored as regular curves, plus extra positions and radii at times
- * other than the frame center. Computing the curve keys at a given ray time is
- * a matter of interpolation of the two steps between which the ray time lies.
- *
- * The extra curve keys are stored as ATTR_STD_MOTION_VERTEX_POSITION.
- */
-
-#ifdef __HAIR__
-
-ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg,
-                                                         int offset,
-                                                         int numkeys,
-                                                         int numsteps,
-                                                         int step,
-                                                         int k0,
-                                                         int k1,
-                                                         float4 keys[2])
-{
-  if (step == numsteps) {
-    /* center step: regular key location */
-    keys[0] = kernel_tex_fetch(__curve_keys, k0);
-    keys[1] = kernel_tex_fetch(__curve_keys, k1);
-  }
-  else {
-    /* center step is not stored in this array */
-    if (step > numsteps)
-      step--;
-
-    offset += step * numkeys;
-
-    keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
-    keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
-  }
-}
-
-/* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys_linear(
-    KernelGlobals kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
-{
-  /* get motion info */
-  int numsteps, numkeys;
-  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
-  /* figure out which steps we need to fetch and their interpolation factor */
-  const int maxstep = numsteps * 2;
-  const int step = min((int)(time * maxstep), maxstep - 1);
-  const float t = time * maxstep - step;
-
-  /* find attribute */
-  const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* fetch key coordinates */
-  float4 next_keys[2];
-
-  motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step, k0, k1, keys);
-  motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
-
-  /* interpolate between steps */
-  keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
-  keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
-}
-
-ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg,
-                                                  int offset,
-                                                  int numkeys,
-                                                  int numsteps,
-                                                  int step,
-                                                  int k0,
-                                                  int k1,
-                                                  int k2,
-                                                  int k3,
-                                                  float4 keys[4])
-{
-  if (step == numsteps) {
-    /* center step: regular key location */
-    keys[0] = kernel_tex_fetch(__curve_keys, k0);
-    keys[1] = kernel_tex_fetch(__curve_keys, k1);
-    keys[2] = kernel_tex_fetch(__curve_keys, k2);
-    keys[3] = kernel_tex_fetch(__curve_keys, k3);
-  }
-  else {
-    /* center step is not stored in this array */
-    if (step > numsteps)
-      step--;
-
-    offset += step * numkeys;
-
-    keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
-    keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
-    keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
-    keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
-  }
-}
-
-/* return 2 curve key locations */
-ccl_device_inline void motion_curve_keys(KernelGlobals kg,
-                                         int object,
-                                         int prim,
-                                         float time,
-                                         int k0,
-                                         int k1,
-                                         int k2,
-                                         int k3,
-                                         float4 keys[4])
-{
-  /* get motion info */
-  int numsteps, numkeys;
-  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
-  /* figure out which steps we need to fetch and their interpolation factor */
-  const int maxstep = numsteps * 2;
-  const int step = min((int)(time * maxstep), maxstep - 1);
-  const float t = time * maxstep - step;
-
-  /* find attribute */
-  const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* fetch key coordinates */
-  float4 next_keys[4];
-
-  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
-  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
-
-  /* interpolate between steps */
-  keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
-  keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
-  keys[2] = (1.0f - t) * keys[2] + t * next_keys[2];
-  keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
-}
-
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
deleted file mode 100644
index 69d15f950ec..00000000000
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Motion Triangle Primitive
- *
- * These are stored as regular triangles, plus extra positions and normals at
- * times other than the frame center. Computing the triangle vertex positions
- * or normals at a given ray time is a matter of interpolation of the two steps
- * between which the ray time lies.
- *
- * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
- * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
- */
-
-#pragma once
-
-#include "kernel/bvh/bvh_util.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Time interpolation of vertex positions and normals */
-
-ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg,
-                                                      uint4 tri_vindex,
-                                                      int offset,
-                                                      int numverts,
-                                                      int numsteps,
-                                                      int step,
-                                                      float3 verts[3])
-{
-  if (step == numsteps) {
-    /* center step: regular vertex location */
-    verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
-    verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
-    verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
-  }
-  else {
-    /* center step not store in this array */
-    if (step > numsteps)
-      step--;
-
-    offset += step * numverts;
-
-    verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
-    verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
-    verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
-  }
-}
-
-ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg,
-                                                        uint4 tri_vindex,
-                                                        int offset,
-                                                        int numverts,
-                                                        int numsteps,
-                                                        int step,
-                                                        float3 normals[3])
-{
-  if (step == numsteps) {
-    /* center step: regular vertex location */
-    normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
-    normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
-    normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
-  }
-  else {
-    /* center step is not stored in this array */
-    if (step > numsteps)
-      step--;
-
-    offset += step * numverts;
-
-    normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
-    normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
-    normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
-  }
-}
-
-ccl_device_inline void motion_triangle_vertices(
-    KernelGlobals kg, int object, int prim, float time, float3 verts[3])
-{
-  /* get motion info */
-  int numsteps, numverts;
-  object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
-  /* figure out which steps we need to fetch and their interpolation factor */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  /* find attribute */
-  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* fetch vertex coordinates */
-  float3 next_verts[3];
-  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
-
-  /* interpolate between steps */
-  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
-  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
-  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
-}
-
-ccl_device_inline float3 motion_triangle_smooth_normal(
-    KernelGlobals kg, float3 Ng, int object, int prim, float u, float v, float time)
-{
-  /* get motion info */
-  int numsteps, numverts;
-  object_motion_info(kg, object, &numsteps, &numverts, NULL);
-
-  /* figure out which steps we need to fetch and their interpolation factor */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  /* find attribute */
-  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* fetch normals */
-  float3 normals[3], next_normals[3];
-  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-
-  motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-  motion_triangle_normals_for_step(
-      kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
-
-  /* interpolate between steps */
-  normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
-  normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
-  normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
-
-  /* interpolate between vertices */
-  float w = 1.0f - u - v;
-  float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]);
-
-  return is_zero(N) ? Ng : N;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
deleted file mode 100644
index 256e7add21e..00000000000
--- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Motion Triangle Primitive
- *
- * These are stored as regular triangles, plus extra positions and normals at
- * times other than the frame center. Computing the triangle vertex positions
- * or normals at a given ray time is a matter of interpolation of the two steps
- * between which the ray time lies.
- *
- * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
- * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Refine triangle intersection to more precise hit point. For rays that travel
- * far the precision is often not so good, this reintersects the primitive from
- * a closer distance.
- */
-
-ccl_device_inline float3 motion_triangle_refine(KernelGlobals kg,
-                                                ccl_private ShaderData *sd,
-                                                float3 P,
-                                                float3 D,
-                                                float t,
-                                                const int isect_object,
-                                                const int isect_prim,
-                                                float3 verts[3])
-{
-#ifdef __INTERSECTION_REFINE__
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    if (UNLIKELY(t == 0.0f)) {
-      return P;
-    }
-    const Transform tfm = object_get_inverse_transform(kg, sd);
-
-    P = transform_point(&tfm, P);
-    D = transform_direction(&tfm, D * t);
-    D = normalize_len(D, &t);
-  }
-
-  P = P + D * t;
-
-  /* Compute refined intersection distance. */
-  const float3 e1 = verts[0] - verts[2];
-  const float3 e2 = verts[1] - verts[2];
-  const float3 s1 = cross(D, e2);
-
-  const float invdivisor = 1.0f / dot(s1, e1);
-  const float3 d = P - verts[2];
-  const float3 s2 = cross(d, e1);
-  float rt = dot(e2, s2) * invdivisor;
-
-  /* Compute refined position. */
-  P = P + D * rt;
-
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_transform(kg, sd);
-    P = transform_point(&tfm, P);
-  }
-
-  return P;
-#else
-  return P + D * t;
-#endif
-}
-
-/* Same as above, except that t is assumed to be in object space
- * for instancing.
- */
-
-#ifdef __BVH_LOCAL__
-#  if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
-ccl_device_noinline
-#  else
-ccl_device_inline
-#  endif
-    float3
-    motion_triangle_refine_local(KernelGlobals kg,
-                                 ccl_private ShaderData *sd,
-                                 float3 P,
-                                 float3 D,
-                                 float t,
-                                 const int isect_object,
-                                 const int isect_prim,
-                                 float3 verts[3])
-{
-#  ifdef __KERNEL_OPTIX__
-  /* t is always in world space with OptiX. */
-  return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts);
-#  else
-#    ifdef __INTERSECTION_REFINE__
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_inverse_transform(kg, sd);
-
-    P = transform_point(&tfm, P);
-    D = transform_direction(&tfm, D);
-    D = normalize(D);
-  }
-
-  P = P + D * t;
-
-  /* compute refined intersection distance */
-  const float3 e1 = verts[0] - verts[2];
-  const float3 e2 = verts[1] - verts[2];
-  const float3 s1 = cross(D, e2);
-
-  const float invdivisor = 1.0f / dot(s1, e1);
-  const float3 d = P - verts[2];
-  const float3 s2 = cross(d, e1);
-  float rt = dot(e2, s2) * invdivisor;
-
-  P = P + D * rt;
-
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_transform(kg, sd);
-    P = transform_point(&tfm, P);
-  }
-
-  return P;
-#    else  /* __INTERSECTION_REFINE__ */
-  return P + D * t;
-#    endif /* __INTERSECTION_REFINE__ */
-#  endif
-}
-#endif /* __BVH_LOCAL__ */
-
-/* Ray intersection. We simply compute the vertex positions at the given ray
- * time and do a ray intersection with the resulting triangle.
- */
-
-ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
-                                                 ccl_private Intersection *isect,
-                                                 float3 P,
-                                                 float3 dir,
-                                                 float tmax,
-                                                 float time,
-                                                 uint visibility,
-                                                 int object,
-                                                 int prim_addr)
-{
-  /* Primitive index for vertex location lookup. */
-  int prim = kernel_tex_fetch(__prim_index, prim_addr);
-  int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object;
-  /* Get vertex locations for intersection. */
-  float3 verts[3];
-  motion_triangle_vertices(kg, fobject, prim, time, verts);
-  /* Ray-triangle intersection, unoptimized. */
-  float t, u, v;
-  if (ray_triangle_intersect(P,
-                             dir,
-                             tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-                             (ssef *)verts,
-#else
-                             verts[0],
-                             verts[1],
-                             verts[2],
-#endif
-                             &u,
-                             &v,
-                             &t)) {
-#ifdef __VISIBILITY_FLAG__
-    /* Visibility flag test. we do it here under the assumption
-     * that most triangles are culled by node flags.
-     */
-    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
-#endif
-    {
-      isect->t = t;
-      isect->u = u;
-      isect->v = v;
-      isect->prim = prim;
-      isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
-                                                object;
-      isect->type = PRIMITIVE_MOTION_TRIANGLE;
-      return true;
-    }
-  }
-  return false;
-}
-
-/* Special ray intersection routines for local intersections. In that case we
- * only want to intersect with primitives in the same object, and if case of
- * multiple hits we pick a single random primitive as the intersection point.
- * Returns whether traversal should be stopped.
- */
-#ifdef __BVH_LOCAL__
-ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
-                                                       ccl_private LocalIntersection *local_isect,
-                                                       float3 P,
-                                                       float3 dir,
-                                                       float time,
-                                                       int object,
-                                                       int local_object,
-                                                       int prim_addr,
-                                                       float tmax,
-                                                       ccl_private uint *lcg_state,
-                                                       int max_hits)
-{
-  /* Only intersect with matching object, for instanced objects we
-   * already know we are only intersecting the right object. */
-  if (object == OBJECT_NONE) {
-    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
-      return false;
-    }
-  }
-
-  /* Primitive index for vertex location lookup. */
-  int prim = kernel_tex_fetch(__prim_index, prim_addr);
-  /* Get vertex locations for intersection. */
-  float3 verts[3];
-  motion_triangle_vertices(kg, local_object, prim, time, verts);
-  /* Ray-triangle intersection, unoptimized. */
-  float t, u, v;
-  if (!ray_triangle_intersect(P,
-                              dir,
-                              tmax,
-#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-                              (ssef *)verts,
-#  else
-                              verts[0],
-                              verts[1],
-                              verts[2],
-#  endif
-                              &u,
-                              &v,
-                              &t)) {
-    return false;
-  }
-
-  /* If no actual hit information is requested, just return here. */
-  if (max_hits == 0) {
-    return true;
-  }
-
-  int hit;
-  if (lcg_state) {
-    /* Record up to max_hits intersections. */
-    for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
-      if (local_isect->hits[i].t == t) {
-        return false;
-      }
-    }
-
-    local_isect->num_hits++;
-
-    if (local_isect->num_hits <= max_hits) {
-      hit = local_isect->num_hits - 1;
-    }
-    else {
-      /* Reservoir sampling: if we are at the maximum number of
-       * hits, randomly replace element or skip it.
-       */
-      hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
-
-      if (hit >= max_hits)
-        return false;
-    }
-  }
-  else {
-    /* Record closest intersection only. */
-    if (local_isect->num_hits && t > local_isect->hits[0].t) {
-      return false;
-    }
-
-    hit = 0;
-    local_isect->num_hits = 1;
-  }
-
-  /* Record intersection. */
-  ccl_private Intersection *isect = &local_isect->hits[hit];
-  isect->t = t;
-  isect->u = u;
-  isect->v = v;
-  isect->prim = prim;
-  isect->object = local_object;
-  isect->type = PRIMITIVE_MOTION_TRIANGLE;
-
-  /* Record geometric normal. */
-  local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
-
-  return false;
-}
-#endif /* __BVH_LOCAL__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
deleted file mode 100644
index fc7c181882e..00000000000
--- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Motion Triangle Primitive
- *
- * These are stored as regular triangles, plus extra positions and normals at
- * times other than the frame center. Computing the triangle vertex positions
- * or normals at a given ray time is a matter of interpolation of the two steps
- * between which the ray time lies.
- *
- * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
- * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Setup of motion triangle specific parts of ShaderData, moved into this one
- * function to more easily share computation of interpolated positions and
- * normals */
-
-/* return 3 triangle vertex normals */
-ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
-                                                      ccl_private ShaderData *sd,
-                                                      const float3 P,
-                                                      const float3 D,
-                                                      const float ray_t,
-                                                      const int isect_object,
-                                                      const int isect_prim,
-                                                      bool is_local)
-{
-  /* Get shader. */
-  sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
-  /* Get motion info. */
-  /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
-   * can we de-duplicate something here?
-   */
-  int numsteps, numverts;
-  object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
-  /* Figure out which steps we need to fetch and their interpolation factor. */
-  int maxstep = numsteps * 2;
-  int step = min((int)(sd->time * maxstep), maxstep - 1);
-  float t = sd->time * maxstep - step;
-  /* Find attribute. */
-  int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-  /* Fetch vertex coordinates. */
-  float3 verts[3], next_verts[3];
-  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
-  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
-  /* Interpolate between steps. */
-  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
-  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
-  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
-  /* Compute refined position. */
-#ifdef __BVH_LOCAL__
-  if (is_local) {
-    sd->P = motion_triangle_refine_local(kg, sd, P, D, ray_t, isect_object, isect_prim, verts);
-  }
-  else
-#endif /* __BVH_LOCAL__*/
-  {
-    sd->P = motion_triangle_refine(kg, sd, P, D, ray_t, isect_object, isect_prim, verts);
-  }
-  /* Compute face normal. */
-  float3 Ng;
-  if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-    Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
-  }
-  else {
-    Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
-  }
-  sd->Ng = Ng;
-  sd->N = Ng;
-  /* Compute derivatives of P w.r.t. uv. */
-#ifdef __DPDU__
-  sd->dPdu = (verts[0] - verts[2]);
-  sd->dPdv = (verts[1] - verts[2]);
-#endif
-  /* Compute smooth normal. */
-  if (sd->shader & SHADER_SMOOTH_NORMAL) {
-    /* Find attribute. */
-    int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL);
-    kernel_assert(offset != ATTR_STD_NOT_FOUND);
-    /* Fetch vertex coordinates. */
-    float3 normals[3], next_normals[3];
-    motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
-    motion_triangle_normals_for_step(
-        kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
-    /* Interpolate between steps. */
-    normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
-    normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
-    normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
-    /* Interpolate between vertices. */
-    float u = sd->u;
-    float v = sd->v;
-    float w = 1.0f - u - v;
-    sd->N = (u * normals[0] + v * normals[1] + w * normals[2]);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
deleted file mode 100644
index 34a9d639d9d..00000000000
--- a/intern/cycles/kernel/geom/geom_object.h
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Object Primitive
- *
- * All mesh and curve primitives are part of an object. The same mesh and curves
- * may be instanced multiple times by different objects.
- *
- * If the mesh is not instanced multiple times, the object will not be explicitly
- * stored as a primitive in the BVH, rather the bare triangles are curved are
- * directly primitives in the BVH with world space locations applied, and the object
- * ID is looked up afterwards. */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Object attributes, for now a fixed size and contents */
-
-enum ObjectTransform {
-  OBJECT_TRANSFORM = 0,
-  OBJECT_INVERSE_TRANSFORM = 1,
-};
-
-enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 };
-
-/* Object to world space transformation */
-
-ccl_device_inline Transform object_fetch_transform(KernelGlobals kg,
-                                                   int object,
-                                                   enum ObjectTransform type)
-{
-  if (type == OBJECT_INVERSE_TRANSFORM) {
-    return kernel_tex_fetch(__objects, object).itfm;
-  }
-  else {
-    return kernel_tex_fetch(__objects, object).tfm;
-  }
-}
-
-/* Lamp to world space transformation */
-
-ccl_device_inline Transform lamp_fetch_transform(KernelGlobals kg, int lamp, bool inverse)
-{
-  if (inverse) {
-    return kernel_tex_fetch(__lights, lamp).itfm;
-  }
-  else {
-    return kernel_tex_fetch(__lights, lamp).tfm;
-  }
-}
-
-/* Object to world space transformation for motion vectors */
-
-ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg,
-                                                               int object,
-                                                               enum ObjectVectorTransform type)
-{
-  int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type;
-  return kernel_tex_fetch(__object_motion_pass, offset);
-}
-
-/* Motion blurred object transformations */
-
-#ifdef __OBJECT_MOTION__
-ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals kg, int object, float time)
-{
-  const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
-  ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
-  const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
-
-  Transform tfm;
-  transform_motion_array_interpolate(&tfm, motion, num_steps, time);
-
-  return tfm;
-}
-
-ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
-                                                               int object,
-                                                               float time,
-                                                               ccl_private Transform *itfm)
-{
-  int object_flag = kernel_tex_fetch(__object_flag, object);
-  if (object_flag & SD_OBJECT_MOTION) {
-    /* if we do motion blur */
-    Transform tfm = object_fetch_transform_motion(kg, object, time);
-
-    if (itfm)
-      *itfm = transform_quick_inverse(tfm);
-
-    return tfm;
-  }
-  else {
-    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-    if (itfm)
-      *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
-    return tfm;
-  }
-}
-#endif
-
-/* Get transform matrix for shading point. */
-
-ccl_device_inline Transform object_get_transform(KernelGlobals kg,
-                                                 ccl_private const ShaderData *sd)
-{
-#ifdef __OBJECT_MOTION__
-  return (sd->object_flag & SD_OBJECT_MOTION) ?
-             sd->ob_tfm_motion :
-             object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-#else
-  return object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-#endif
-}
-
-ccl_device_inline Transform object_get_inverse_transform(KernelGlobals kg,
-                                                         ccl_private const ShaderData *sd)
-{
-#ifdef __OBJECT_MOTION__
-  return (sd->object_flag & SD_OBJECT_MOTION) ?
-             sd->ob_itfm_motion :
-             object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-#else
-  return object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-#endif
-}
-/* Transform position from object to world space */
-
-ccl_device_inline void object_position_transform(KernelGlobals kg,
-                                                 ccl_private const ShaderData *sd,
-                                                 ccl_private float3 *P)
-{
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    *P = transform_point_auto(&sd->ob_tfm_motion, *P);
-    return;
-  }
-#endif
-
-  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-  *P = transform_point(&tfm, *P);
-}
-
-/* Transform position from world to object space */
-
-ccl_device_inline void object_inverse_position_transform(KernelGlobals kg,
-                                                         ccl_private const ShaderData *sd,
-                                                         ccl_private float3 *P)
-{
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    *P = transform_point_auto(&sd->ob_itfm_motion, *P);
-    return;
-  }
-#endif
-
-  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-  *P = transform_point(&tfm, *P);
-}
-
-/* Transform normal from world to object space */
-
-ccl_device_inline void object_inverse_normal_transform(KernelGlobals kg,
-                                                       ccl_private const ShaderData *sd,
-                                                       ccl_private float3 *N)
-{
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
-      *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm_motion, *N));
-    }
-    return;
-  }
-#endif
-
-  if (sd->object != OBJECT_NONE) {
-    Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-    *N = normalize(transform_direction_transposed(&tfm, *N));
-  }
-  else if (sd->type == PRIMITIVE_LAMP) {
-    Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
-    *N = normalize(transform_direction_transposed(&tfm, *N));
-  }
-}
-
-/* Transform normal from object to world space */
-
-ccl_device_inline void object_normal_transform(KernelGlobals kg,
-                                               ccl_private const ShaderData *sd,
-                                               ccl_private float3 *N)
-{
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm_motion, *N));
-    return;
-  }
-#endif
-
-  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-  *N = normalize(transform_direction_transposed(&tfm, *N));
-}
-
-/* Transform direction vector from object to world space */
-
-ccl_device_inline void object_dir_transform(KernelGlobals kg,
-                                            ccl_private const ShaderData *sd,
-                                            ccl_private float3 *D)
-{
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    *D = transform_direction_auto(&sd->ob_tfm_motion, *D);
-    return;
-  }
-#endif
-
-  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-  *D = transform_direction(&tfm, *D);
-}
-
-/* Transform direction vector from world to object space */
-
-ccl_device_inline void object_inverse_dir_transform(KernelGlobals kg,
-                                                    ccl_private const ShaderData *sd,
-                                                    ccl_private float3 *D)
-{
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    *D = transform_direction_auto(&sd->ob_itfm_motion, *D);
-    return;
-  }
-#endif
-
-  const Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
-  *D = transform_direction(&tfm, *D);
-}
-
-/* Object center position */
-
-ccl_device_inline float3 object_location(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  if (sd->object == OBJECT_NONE)
-    return make_float3(0.0f, 0.0f, 0.0f);
-
-#ifdef __OBJECT_MOTION__
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    return make_float3(sd->ob_tfm_motion.x.w, sd->ob_tfm_motion.y.w, sd->ob_tfm_motion.z.w);
-  }
-#endif
-
-  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
-  return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
-}
-
-/* Color of the object */
-
-ccl_device_inline float3 object_color(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return make_float3(0.0f, 0.0f, 0.0f);
-
-  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
-  return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
-}
-
-/* Pass ID number of object */
-
-ccl_device_inline float object_pass_id(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return 0.0f;
-
-  return kernel_tex_fetch(__objects, object).pass_id;
-}
-
-/* Per lamp random number for shader variation */
-
-ccl_device_inline float lamp_random_number(KernelGlobals kg, int lamp)
-{
-  if (lamp == LAMP_NONE)
-    return 0.0f;
-
-  return kernel_tex_fetch(__lights, lamp).random;
-}
-
-/* Per object random number for shader variation */
-
-ccl_device_inline float object_random_number(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return 0.0f;
-
-  return kernel_tex_fetch(__objects, object).random_number;
-}
-
-/* Particle ID from which this object was generated */
-
-ccl_device_inline int object_particle_id(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return 0;
-
-  return kernel_tex_fetch(__objects, object).particle_index;
-}
-
-/* Generated texture coordinate on surface from where object was instanced */
-
-ccl_device_inline float3 object_dupli_generated(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return make_float3(0.0f, 0.0f, 0.0f);
-
-  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
-  return make_float3(
-      kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
-}
-
-/* UV texture coordinate on surface from where object was instanced */
-
-ccl_device_inline float3 object_dupli_uv(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return make_float3(0.0f, 0.0f, 0.0f);
-
-  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
-  return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
-}
-
-/* Information about mesh for motion blurred triangles and curves */
-
-ccl_device_inline void object_motion_info(KernelGlobals kg,
-                                          int object,
-                                          ccl_private int *numsteps,
-                                          ccl_private int *numverts,
-                                          ccl_private int *numkeys)
-{
-  if (numkeys) {
-    *numkeys = kernel_tex_fetch(__objects, object).numkeys;
-  }
-
-  if (numsteps)
-    *numsteps = kernel_tex_fetch(__objects, object).numsteps;
-  if (numverts)
-    *numverts = kernel_tex_fetch(__objects, object).numverts;
-}
-
-/* Offset to an objects patch map */
-
-ccl_device_inline uint object_patch_map_offset(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return 0;
-
-  return kernel_tex_fetch(__objects, object).patch_map_offset;
-}
-
-/* Volume step size */
-
-ccl_device_inline float object_volume_density(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE) {
-    return 1.0f;
-  }
-
-  return kernel_tex_fetch(__objects, object).volume_density;
-}
-
-ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE) {
-    return kernel_data.background.volume_step_size;
-  }
-
-  return kernel_tex_fetch(__object_volume_step, object);
-}
-
-/* Pass ID for shader */
-
-ccl_device int shader_pass_id(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
-}
-
-/* Cryptomatte ID */
-
-ccl_device_inline float object_cryptomatte_id(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return 0.0f;
-
-  return kernel_tex_fetch(__objects, object).cryptomatte_object;
-}
-
-ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object)
-{
-  if (object == OBJECT_NONE)
-    return 0;
-
-  return kernel_tex_fetch(__objects, object).cryptomatte_asset;
-}
-
-/* Particle data from which object was instanced */
-
-ccl_device_inline uint particle_index(KernelGlobals kg, int particle)
-{
-  return kernel_tex_fetch(__particles, particle).index;
-}
-
-ccl_device float particle_age(KernelGlobals kg, int particle)
-{
-  return kernel_tex_fetch(__particles, particle).age;
-}
-
-ccl_device float particle_lifetime(KernelGlobals kg, int particle)
-{
-  return kernel_tex_fetch(__particles, particle).lifetime;
-}
-
-ccl_device float particle_size(KernelGlobals kg, int particle)
-{
-  return kernel_tex_fetch(__particles, particle).size;
-}
-
-ccl_device float4 particle_rotation(KernelGlobals kg, int particle)
-{
-  return kernel_tex_fetch(__particles, particle).rotation;
-}
-
-ccl_device float3 particle_location(KernelGlobals kg, int particle)
-{
-  return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
-}
-
-ccl_device float3 particle_velocity(KernelGlobals kg, int particle)
-{
-  return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
-}
-
-ccl_device float3 particle_angular_velocity(KernelGlobals kg, int particle)
-{
-  return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
-}
-
-/* Object intersection in BVH */
-
-ccl_device_inline float3 bvh_clamp_direction(float3 dir)
-{
-  const float ooeps = 8.271806E-25f;
-  return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
-                     (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
-                     (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
-}
-
-ccl_device_inline float3 bvh_inverse_direction(float3 dir)
-{
-  return rcp(dir);
-}
-
-/* Transform ray into object space to enter static object in BVH */
-
-ccl_device_inline float bvh_instance_push(KernelGlobals kg,
-                                          int object,
-                                          ccl_private const Ray *ray,
-                                          ccl_private float3 *P,
-                                          ccl_private float3 *dir,
-                                          ccl_private float3 *idir)
-{
-  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
-  *P = transform_point(&tfm, ray->P);
-
-  float len;
-  *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
-  *idir = bvh_inverse_direction(*dir);
-
-  return len;
-}
-
-/* Transform ray to exit static object in BVH. */
-
-ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
-                                         int object,
-                                         ccl_private const Ray *ray,
-                                         ccl_private float3 *P,
-                                         ccl_private float3 *dir,
-                                         ccl_private float3 *idir,
-                                         float t)
-{
-  if (t != FLT_MAX) {
-    Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-    t /= len(transform_direction(&tfm, ray->D));
-  }
-
-  *P = ray->P;
-  *dir = bvh_clamp_direction(ray->D);
-  *idir = bvh_inverse_direction(*dir);
-
-  return t;
-}
-
-/* Same as above, but returns scale factor to apply to multiple intersection distances */
-
-ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
-                                               int object,
-                                               ccl_private const Ray *ray,
-                                               ccl_private float3 *P,
-                                               ccl_private float3 *dir,
-                                               ccl_private float3 *idir,
-                                               ccl_private float *t_fac)
-{
-  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-  *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
-
-  *P = ray->P;
-  *dir = bvh_clamp_direction(ray->D);
-  *idir = bvh_inverse_direction(*dir);
-}
-
-#ifdef __OBJECT_MOTION__
-/* Transform ray into object space to enter motion blurred object in BVH */
-
-ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
-                                                 int object,
-                                                 ccl_private const Ray *ray,
-                                                 ccl_private float3 *P,
-                                                 ccl_private float3 *dir,
-                                                 ccl_private float3 *idir,
-                                                 ccl_private Transform *itfm)
-{
-  object_fetch_transform_motion_test(kg, object, ray->time, itfm);
-
-  *P = transform_point(itfm, ray->P);
-
-  float len;
-  *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
-  *idir = bvh_inverse_direction(*dir);
-
-  return len;
-}
-
-/* Transform ray to exit motion blurred object in BVH. */
-
-ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
-                                                int object,
-                                                ccl_private const Ray *ray,
-                                                ccl_private float3 *P,
-                                                ccl_private float3 *dir,
-                                                ccl_private float3 *idir,
-                                                float t,
-                                                ccl_private Transform *itfm)
-{
-  if (t != FLT_MAX) {
-    t /= len(transform_direction(itfm, ray->D));
-  }
-
-  *P = ray->P;
-  *dir = bvh_clamp_direction(ray->D);
-  *idir = bvh_inverse_direction(*dir);
-
-  return t;
-}
-
-/* Same as above, but returns scale factor to apply to multiple intersection distances */
-
-ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
-                                                      int object,
-                                                      ccl_private const Ray *ray,
-                                                      ccl_private float3 *P,
-                                                      ccl_private float3 *dir,
-                                                      ccl_private float3 *idir,
-                                                      ccl_private float *t_fac,
-                                                      ccl_private Transform *itfm)
-{
-  *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
-  *P = ray->P;
-  *dir = bvh_clamp_direction(ray->D);
-  *idir = bvh_inverse_direction(*dir);
-}
-
-#endif
-
-/* TODO: This can be removed when we know if no devices will require explicit
- * address space qualifiers for this case. */
-
-#define object_position_transform_auto object_position_transform
-#define object_dir_transform_auto object_dir_transform
-#define object_normal_transform_auto object_normal_transform
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
deleted file mode 100644
index bf1a06220aa..00000000000
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Based on code from OpenSubdiv released under this license:
- *
- * Copyright 2013 Pixar
- *
- * Licensed under the Apache License, Version 2.0 (the "Apache License")
- * with the following modification; you may not use this file except in
- * compliance with the Apache License and the following modification to it:
- * Section 6. Trademarks. is deleted and replaced with:
- *
- * 6. Trademarks. This License does not grant permission to use the trade
- *   names, trademarks, service marks, or product names of the Licensor
- *   and its affiliates, except as required to comply with Section 4(c) of
- *   the License and to reproduce the content of the NOTICE file.
- *
- * You may obtain a copy of the Apache License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Apache License with the above modification is
- * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the Apache License for the specific
- * language governing permissions and limitations under the Apache License.
- */
-
-#pragma once
-
-#include "util/util_color.h"
-
-CCL_NAMESPACE_BEGIN
-
-typedef struct PatchHandle {
-  int array_index, patch_index, vert_index;
-} PatchHandle;
-
-ccl_device_inline int patch_map_resolve_quadrant(float median,
-                                                 ccl_private float *u,
-                                                 ccl_private float *v)
-{
-  int quadrant = -1;
-
-  if (*u < median) {
-    if (*v < median) {
-      quadrant = 0;
-    }
-    else {
-      quadrant = 1;
-      *v -= median;
-    }
-  }
-  else {
-    if (*v < median) {
-      quadrant = 3;
-    }
-    else {
-      quadrant = 2;
-      *v -= median;
-    }
-    *u -= median;
-  }
-
-  return quadrant;
-}
-
-/* retrieve PatchHandle from patch coords */
-
-ccl_device_inline PatchHandle
-patch_map_find_patch(KernelGlobals kg, int object, int patch, float u, float v)
-{
-  PatchHandle handle;
-
-  kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
-
-  int node = (object_patch_map_offset(kg, object) + patch) / 2;
-  float median = 0.5f;
-
-  for (int depth = 0; depth < 0xff; depth++) {
-    float delta = median * 0.5f;
-
-    int quadrant = patch_map_resolve_quadrant(median, &u, &v);
-    kernel_assert(quadrant >= 0);
-
-    uint child = kernel_tex_fetch(__patches, node + quadrant);
-
-    /* is the quadrant a hole? */
-    if (!(child & PATCH_MAP_NODE_IS_SET)) {
-      handle.array_index = -1;
-      return handle;
-    }
-
-    uint index = child & PATCH_MAP_NODE_INDEX_MASK;
-
-    if (child & PATCH_MAP_NODE_IS_LEAF) {
-      handle.array_index = kernel_tex_fetch(__patches, index + 0);
-      handle.patch_index = kernel_tex_fetch(__patches, index + 1);
-      handle.vert_index = kernel_tex_fetch(__patches, index + 2);
-
-      return handle;
-    }
-    else {
-      node = index;
-    }
-
-    median = delta;
-  }
-
-  /* no leaf found */
-  kernel_assert(0);
-
-  handle.array_index = -1;
-  return handle;
-}
-
-ccl_device_inline void patch_eval_bspline_weights(float t,
-                                                  ccl_private float *point,
-                                                  ccl_private float *deriv)
-{
-  /* The four uniform cubic B-Spline basis functions evaluated at t */
-  float inv_6 = 1.0f / 6.0f;
-
-  float t2 = t * t;
-  float t3 = t * t2;
-
-  point[0] = inv_6 * (1.0f - 3.0f * (t - t2) - t3);
-  point[1] = inv_6 * (4.0f - 6.0f * t2 + 3.0f * t3);
-  point[2] = inv_6 * (1.0f + 3.0f * (t + t2 - t3));
-  point[3] = inv_6 * t3;
-
-  /* Derivatives of the above four basis functions at t */
-  deriv[0] = -0.5f * t2 + t - 0.5f;
-  deriv[1] = 1.5f * t2 - 2.0f * t;
-  deriv[2] = -1.5f * t2 + t + 0.5f;
-  deriv[3] = 0.5f * t2;
-}
-
-ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits,
-                                                          ccl_private float *s,
-                                                          ccl_private float *t)
-{
-  int boundary = ((bits >> 8) & 0xf);
-
-  if (boundary & 1) {
-    t[2] -= t[0];
-    t[1] += 2 * t[0];
-    t[0] = 0;
-  }
-
-  if (boundary & 2) {
-    s[1] -= s[3];
-    s[2] += 2 * s[3];
-    s[3] = 0;
-  }
-
-  if (boundary & 4) {
-    t[1] -= t[3];
-    t[2] += 2 * t[3];
-    t[3] = 0;
-  }
-
-  if (boundary & 8) {
-    s[2] -= s[0];
-    s[1] += 2 * s[0];
-    s[0] = 0;
-  }
-}
-
-ccl_device_inline int patch_eval_depth(uint patch_bits)
-{
-  return (patch_bits & 0xf);
-}
-
-ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
-{
-  bool non_quad_root = (patch_bits >> 4) & 0x1;
-  int depth = patch_eval_depth(patch_bits);
-
-  if (non_quad_root) {
-    return 1.0f / (float)(1 << (depth - 1));
-  }
-  else {
-    return 1.0f / (float)(1 << depth);
-  }
-}
-
-ccl_device_inline void patch_eval_normalize_coords(uint patch_bits,
-                                                   ccl_private float *u,
-                                                   ccl_private float *v)
-{
-  float frac = patch_eval_param_fraction(patch_bits);
-
-  int iu = (patch_bits >> 22) & 0x3ff;
-  int iv = (patch_bits >> 12) & 0x3ff;
-
-  /* top left corner */
-  float pu = (float)iu * frac;
-  float pv = (float)iv * frac;
-
-  /* normalize uv coordinates */
-  *u = (*u - pu) / frac;
-  *v = (*v - pv) / frac;
-}
-
-/* retrieve patch control indices */
-
-ccl_device_inline int patch_eval_indices(KernelGlobals kg,
-                                         ccl_private const PatchHandle *handle,
-                                         int channel,
-                                         int indices[PATCH_MAX_CONTROL_VERTS])
-{
-  int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
-
-  /* XXX: regular patches only */
-  for (int i = 0; i < 16; i++) {
-    indices[i] = kernel_tex_fetch(__patches, index_base + i);
-  }
-
-  return 16;
-}
-
-/* evaluate patch basis functions */
-
-ccl_device_inline void patch_eval_basis(KernelGlobals kg,
-                                        ccl_private const PatchHandle *handle,
-                                        float u,
-                                        float v,
-                                        float weights[PATCH_MAX_CONTROL_VERTS],
-                                        float weights_du[PATCH_MAX_CONTROL_VERTS],
-                                        float weights_dv[PATCH_MAX_CONTROL_VERTS])
-{
-  uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
-  float d_scale = 1 << patch_eval_depth(patch_bits);
-
-  bool non_quad_root = (patch_bits >> 4) & 0x1;
-  if (non_quad_root) {
-    d_scale *= 0.5f;
-  }
-
-  patch_eval_normalize_coords(patch_bits, &u, &v);
-
-  /* XXX: regular patches only for now. */
-
-  float s[4], t[4], ds[4], dt[4];
-
-  patch_eval_bspline_weights(u, s, ds);
-  patch_eval_bspline_weights(v, t, dt);
-
-  patch_eval_adjust_boundary_weights(patch_bits, s, t);
-  patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
-
-  for (int k = 0; k < 4; k++) {
-    for (int l = 0; l < 4; l++) {
-      weights[4 * k + l] = s[l] * t[k];
-      weights_du[4 * k + l] = ds[l] * t[k] * d_scale;
-      weights_dv[4 * k + l] = s[l] * dt[k] * d_scale;
-    }
-  }
-}
-
-/* generic function for evaluating indices and weights from patch coords */
-
-ccl_device_inline int patch_eval_control_verts(KernelGlobals kg,
-                                               int object,
-                                               int patch,
-                                               float u,
-                                               float v,
-                                               int channel,
-                                               int indices[PATCH_MAX_CONTROL_VERTS],
-                                               float weights[PATCH_MAX_CONTROL_VERTS],
-                                               float weights_du[PATCH_MAX_CONTROL_VERTS],
-                                               float weights_dv[PATCH_MAX_CONTROL_VERTS])
-{
-  PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
-  kernel_assert(handle.array_index >= 0);
-
-  int num_control = patch_eval_indices(kg, &handle, channel, indices);
-  patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
-
-  return num_control;
-}
-
-/* functions for evaluating attributes on patches */
-
-ccl_device float patch_eval_float(KernelGlobals kg,
-                                  ccl_private const ShaderData *sd,
-                                  int offset,
-                                  int patch,
-                                  float u,
-                                  float v,
-                                  int channel,
-                                  ccl_private float *du,
-                                  ccl_private float *dv)
-{
-  int indices[PATCH_MAX_CONTROL_VERTS];
-  float weights[PATCH_MAX_CONTROL_VERTS];
-  float weights_du[PATCH_MAX_CONTROL_VERTS];
-  float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-  int num_control = patch_eval_control_verts(
-      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
-
-  float val = 0.0f;
-  if (du)
-    *du = 0.0f;
-  if (dv)
-    *dv = 0.0f;
-
-  for (int i = 0; i < num_control; i++) {
-    float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
-
-    val += v * weights[i];
-    if (du)
-      *du += v * weights_du[i];
-    if (dv)
-      *dv += v * weights_dv[i];
-  }
-
-  return val;
-}
-
-ccl_device float2 patch_eval_float2(KernelGlobals kg,
-                                    ccl_private const ShaderData *sd,
-                                    int offset,
-                                    int patch,
-                                    float u,
-                                    float v,
-                                    int channel,
-                                    ccl_private float2 *du,
-                                    ccl_private float2 *dv)
-{
-  int indices[PATCH_MAX_CONTROL_VERTS];
-  float weights[PATCH_MAX_CONTROL_VERTS];
-  float weights_du[PATCH_MAX_CONTROL_VERTS];
-  float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-  int num_control = patch_eval_control_verts(
-      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
-
-  float2 val = make_float2(0.0f, 0.0f);
-  if (du)
-    *du = make_float2(0.0f, 0.0f);
-  if (dv)
-    *dv = make_float2(0.0f, 0.0f);
-
-  for (int i = 0; i < num_control; i++) {
-    float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]);
-
-    val += v * weights[i];
-    if (du)
-      *du += v * weights_du[i];
-    if (dv)
-      *dv += v * weights_dv[i];
-  }
-
-  return val;
-}
-
-ccl_device float3 patch_eval_float3(KernelGlobals kg,
-                                    ccl_private const ShaderData *sd,
-                                    int offset,
-                                    int patch,
-                                    float u,
-                                    float v,
-                                    int channel,
-                                    ccl_private float3 *du,
-                                    ccl_private float3 *dv)
-{
-  int indices[PATCH_MAX_CONTROL_VERTS];
-  float weights[PATCH_MAX_CONTROL_VERTS];
-  float weights_du[PATCH_MAX_CONTROL_VERTS];
-  float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-  int num_control = patch_eval_control_verts(
-      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
-
-  float3 val = make_float3(0.0f, 0.0f, 0.0f);
-  if (du)
-    *du = make_float3(0.0f, 0.0f, 0.0f);
-  if (dv)
-    *dv = make_float3(0.0f, 0.0f, 0.0f);
-
-  for (int i = 0; i < num_control; i++) {
-    float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
-
-    val += v * weights[i];
-    if (du)
-      *du += v * weights_du[i];
-    if (dv)
-      *dv += v * weights_dv[i];
-  }
-
-  return val;
-}
-
-ccl_device float4 patch_eval_float4(KernelGlobals kg,
-                                    ccl_private const ShaderData *sd,
-                                    int offset,
-                                    int patch,
-                                    float u,
-                                    float v,
-                                    int channel,
-                                    ccl_private float4 *du,
-                                    ccl_private float4 *dv)
-{
-  int indices[PATCH_MAX_CONTROL_VERTS];
-  float weights[PATCH_MAX_CONTROL_VERTS];
-  float weights_du[PATCH_MAX_CONTROL_VERTS];
-  float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-  int num_control = patch_eval_control_verts(
-      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
-
-  float4 val = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  if (du)
-    *du = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  if (dv)
-    *dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-  for (int i = 0; i < num_control; i++) {
-    float4 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]);
-
-    val += v * weights[i];
-    if (du)
-      *du += v * weights_du[i];
-    if (dv)
-      *dv += v * weights_dv[i];
-  }
-
-  return val;
-}
-
-ccl_device float4 patch_eval_uchar4(KernelGlobals kg,
-                                    ccl_private const ShaderData *sd,
-                                    int offset,
-                                    int patch,
-                                    float u,
-                                    float v,
-                                    int channel,
-                                    ccl_private float4 *du,
-                                    ccl_private float4 *dv)
-{
-  int indices[PATCH_MAX_CONTROL_VERTS];
-  float weights[PATCH_MAX_CONTROL_VERTS];
-  float weights_du[PATCH_MAX_CONTROL_VERTS];
-  float weights_dv[PATCH_MAX_CONTROL_VERTS];
-
-  int num_control = patch_eval_control_verts(
-      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
-
-  float4 val = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  if (du)
-    *du = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  if (dv)
-    *dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-  for (int i = 0; i < num_control; i++) {
-    float4 v = color_srgb_to_linear_v4(
-        color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])));
-
-    val += v * weights[i];
-    if (du)
-      *du += v * weights_du[i];
-    if (dv)
-      *dv += v * weights_dv[i];
-  }
-
-  return val;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
deleted file mode 100644
index bc559e3c812..00000000000
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Primitive Utilities
- *
- * Generic functions to look up mesh, curve and volume primitive attributes for
- * shading and render passes. */
-
-#pragma once
-
-#include "kernel/camera/camera_projection.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Surface Attributes
- *
- * Read geometry attributes for surface shading. This is distinct from volume
- * attributes for performance, mainly for GPU performance to avoid bringing in
- * heavy volume interpolation code. */
-
-ccl_device_inline float primitive_surface_attribute_float(KernelGlobals kg,
-                                                          ccl_private const ShaderData *sd,
-                                                          const AttributeDescriptor desc,
-                                                          ccl_private float *dx,
-                                                          ccl_private float *dy)
-{
-  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
-    if (subd_triangle_patch(kg, sd) == ~0)
-      return triangle_attribute_float(kg, sd, desc, dx, dy);
-    else
-      return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
-  }
-#ifdef __HAIR__
-  else if (sd->type & PRIMITIVE_ALL_CURVE) {
-    return curve_attribute_float(kg, sd, desc, dx, dy);
-  }
-#endif
-  else {
-    if (dx)
-      *dx = 0.0f;
-    if (dy)
-      *dy = 0.0f;
-    return 0.0f;
-  }
-}
-
-ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals kg,
-                                                            ccl_private const ShaderData *sd,
-                                                            const AttributeDescriptor desc,
-                                                            ccl_private float2 *dx,
-                                                            ccl_private float2 *dy)
-{
-  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
-    if (subd_triangle_patch(kg, sd) == ~0)
-      return triangle_attribute_float2(kg, sd, desc, dx, dy);
-    else
-      return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
-  }
-#ifdef __HAIR__
-  else if (sd->type & PRIMITIVE_ALL_CURVE) {
-    return curve_attribute_float2(kg, sd, desc, dx, dy);
-  }
-#endif
-  else {
-    if (dx)
-      *dx = make_float2(0.0f, 0.0f);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-    return make_float2(0.0f, 0.0f);
-  }
-}
-
-ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals kg,
-                                                            ccl_private const ShaderData *sd,
-                                                            const AttributeDescriptor desc,
-                                                            ccl_private float3 *dx,
-                                                            ccl_private float3 *dy)
-{
-  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
-    if (subd_triangle_patch(kg, sd) == ~0)
-      return triangle_attribute_float3(kg, sd, desc, dx, dy);
-    else
-      return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
-  }
-#ifdef __HAIR__
-  else if (sd->type & PRIMITIVE_ALL_CURVE) {
-    return curve_attribute_float3(kg, sd, desc, dx, dy);
-  }
-#endif
-  else {
-    if (dx)
-      *dx = make_float3(0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-    return make_float3(0.0f, 0.0f, 0.0f);
-  }
-}
-
-ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals kg,
-                                                                 ccl_private const ShaderData *sd,
-                                                                 const AttributeDescriptor desc,
-                                                                 ccl_private float4 *dx,
-                                                                 ccl_private float4 *dy)
-{
-  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
-    if (subd_triangle_patch(kg, sd) == ~0)
-      return triangle_attribute_float4(kg, sd, desc, dx, dy);
-    else
-      return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
-  }
-#ifdef __HAIR__
-  else if (sd->type & PRIMITIVE_ALL_CURVE) {
-    return curve_attribute_float4(kg, sd, desc, dx, dy);
-  }
-#endif
-  else {
-    if (dx)
-      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  }
-}
-
-#ifdef __VOLUME__
-/* Volume Attributes
- *
- * Read geometry attributes for volume shading. This is distinct from surface
- * attributes for performance, mainly for GPU performance to avoid bringing in
- * heavy volume interpolation code. */
-
-ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderData *sd,
-                                                     const AttributeDescriptor desc)
-{
-  return sd->type == PRIMITIVE_VOLUME;
-}
-
-ccl_device_inline float primitive_volume_attribute_float(KernelGlobals kg,
-                                                         ccl_private const ShaderData *sd,
-                                                         const AttributeDescriptor desc)
-{
-  if (primitive_is_volume_attribute(sd, desc)) {
-    return volume_attribute_value_to_float(volume_attribute_float4(kg, sd, desc));
-  }
-  else {
-    return 0.0f;
-  }
-}
-
-ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals kg,
-                                                           ccl_private const ShaderData *sd,
-                                                           const AttributeDescriptor desc)
-{
-  if (primitive_is_volume_attribute(sd, desc)) {
-    return volume_attribute_value_to_float3(volume_attribute_float4(kg, sd, desc));
-  }
-  else {
-    return make_float3(0.0f, 0.0f, 0.0f);
-  }
-}
-
-ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals kg,
-                                                           ccl_private const ShaderData *sd,
-                                                           const AttributeDescriptor desc)
-{
-  if (primitive_is_volume_attribute(sd, desc)) {
-    return volume_attribute_float4(kg, sd, desc);
-  }
-  else {
-    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  }
-}
-#endif
-
-/* Default UV coordinate */
-
-ccl_device_inline float3 primitive_uv(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
-
-  if (desc.offset == ATTR_STD_NOT_FOUND)
-    return make_float3(0.0f, 0.0f, 0.0f);
-
-  float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
-  return make_float3(uv.x, uv.y, 1.0f);
-}
-
-/* Ptex coordinates */
-
-ccl_device bool primitive_ptex(KernelGlobals kg,
-                               ccl_private ShaderData *sd,
-                               ccl_private float2 *uv,
-                               ccl_private int *face_id)
-{
-  /* storing ptex data as attributes is not memory efficient but simple for tests */
-  const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
-  const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
-
-  if (desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
-    return false;
-
-  float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
-  float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
-
-  *uv = make_float2(uv3.x, uv3.y);
-  *face_id = (int)face_id_f;
-
-  return true;
-}
-
-/* Surface tangent */
-
-ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd)
-{
-#ifdef __HAIR__
-  if (sd->type & PRIMITIVE_ALL_CURVE)
-#  ifdef __DPDU__
-    return normalize(sd->dPdu);
-#  else
-    return make_float3(0.0f, 0.0f, 0.0f);
-#  endif
-#endif
-
-  /* try to create spherical tangent from generated coordinates */
-  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
-
-  if (desc.offset != ATTR_STD_NOT_FOUND) {
-    float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-    data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
-    object_normal_transform(kg, sd, &data);
-    return cross(sd->N, normalize(cross(data, sd->N)));
-  }
-  else {
-    /* otherwise use surface derivatives */
-#ifdef __DPDU__
-    return normalize(sd->dPdu);
-#else
-    return make_float3(0.0f, 0.0f, 0.0f);
-#endif
-  }
-}
-
-/* Motion vector for motion pass */
-
-ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg,
-                                                 ccl_private const ShaderData *sd)
-{
-  /* center position */
-  float3 center;
-
-#ifdef __HAIR__
-  bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
-  if (is_curve_primitive) {
-    center = curve_motion_center_location(kg, sd);
-
-    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      object_position_transform(kg, sd, &center);
-    }
-  }
-  else
-#endif
-    center = sd->P;
-
-  float3 motion_pre = center, motion_post = center;
-
-  /* deformation motion */
-  AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
-
-  if (desc.offset != ATTR_STD_NOT_FOUND) {
-    /* get motion info */
-    int numverts, numkeys;
-    object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
-
-    /* lookup attributes */
-    motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-
-    desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys;
-    motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-
-#ifdef __HAIR__
-    if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
-      object_position_transform(kg, sd, &motion_pre);
-      object_position_transform(kg, sd, &motion_post);
-    }
-#endif
-  }
-
-  /* object motion. note that depending on the mesh having motion vectors, this
-   * transformation was set match the world/object space of motion_pre/post */
-  Transform tfm;
-
-  tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
-  motion_pre = transform_point(&tfm, motion_pre);
-
-  tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
-  motion_post = transform_point(&tfm, motion_post);
-
-  float3 motion_center;
-
-  /* camera motion, for perspective/orthographic motion.pre/post will be a
-   * world-to-raster matrix, for panorama it's world-to-camera */
-  if (kernel_data.cam.type != CAMERA_PANORAMA) {
-    ProjectionTransform projection = kernel_data.cam.worldtoraster;
-    motion_center = transform_perspective(&projection, center);
-
-    projection = kernel_data.cam.perspective_pre;
-    motion_pre = transform_perspective(&projection, motion_pre);
-
-    projection = kernel_data.cam.perspective_post;
-    motion_post = transform_perspective(&projection, motion_post);
-  }
-  else {
-    tfm = kernel_data.cam.worldtocamera;
-    motion_center = normalize(transform_point(&tfm, center));
-    motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center));
-    motion_center.x *= kernel_data.cam.width;
-    motion_center.y *= kernel_data.cam.height;
-
-    tfm = kernel_data.cam.motion_pass_pre;
-    motion_pre = normalize(transform_point(&tfm, motion_pre));
-    motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
-    motion_pre.x *= kernel_data.cam.width;
-    motion_pre.y *= kernel_data.cam.height;
-
-    tfm = kernel_data.cam.motion_pass_post;
-    motion_post = normalize(transform_point(&tfm, motion_post));
-    motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
-    motion_post.x *= kernel_data.cam.width;
-    motion_post.y *= kernel_data.cam.height;
-  }
-
-  motion_pre = motion_pre - motion_center;
-  motion_post = motion_center - motion_post;
-
-  return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h
deleted file mode 100644
index 46bda2b656c..00000000000
--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Functions to initialize ShaderData given.
- *
- * Could be from an incoming ray, intersection or sampled position. */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* ShaderData setup from incoming ray */
-
-#ifdef __OBJECT_MOTION__
-ccl_device void shader_setup_object_transforms(KernelGlobals kg,
-                                               ccl_private ShaderData *ccl_restrict sd,
-                                               float time)
-{
-  if (sd->object_flag & SD_OBJECT_MOTION) {
-    sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time);
-    sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion);
-  }
-}
-#endif
-
-/* TODO: break this up if it helps reduce register pressure to load data from
- * global memory as we write it to shader-data. */
-ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
-                                             ccl_private ShaderData *ccl_restrict sd,
-                                             ccl_private const Ray *ccl_restrict ray,
-                                             ccl_private const Intersection *ccl_restrict isect)
-{
-  /* Read intersection data into shader globals.
-   *
-   * TODO: this is redundant, could potentially remove some of this from
-   * ShaderData but would need to ensure that it also works for shadow
-   * shader evaluation. */
-  sd->u = isect->u;
-  sd->v = isect->v;
-  sd->ray_length = isect->t;
-  sd->type = isect->type;
-  sd->object = isect->object;
-  sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
-  sd->prim = isect->prim;
-  sd->lamp = LAMP_NONE;
-  sd->flag = 0;
-
-  /* Read matrices and time. */
-  sd->time = ray->time;
-
-#ifdef __OBJECT_MOTION__
-  shader_setup_object_transforms(kg, sd, ray->time);
-#endif
-
-  /* Read ray data into shader globals. */
-  sd->I = -ray->D;
-
-#ifdef __HAIR__
-  if (sd->type & PRIMITIVE_ALL_CURVE) {
-    /* curve */
-    curve_shader_setup(kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim);
-  }
-  else
-#endif
-      if (sd->type & PRIMITIVE_TRIANGLE) {
-    /* static triangle */
-    float3 Ng = triangle_normal(kg, sd);
-    sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
-
-    /* vectors */
-    sd->P = triangle_refine(kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim);
-    sd->Ng = Ng;
-    sd->N = Ng;
-
-    /* smooth normal */
-    if (sd->shader & SHADER_SMOOTH_NORMAL)
-      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
-
-#ifdef __DPDU__
-    /* dPdu/dPdv */
-    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
-#endif
-  }
-  else {
-    /* motion triangle */
-    motion_triangle_shader_setup(
-        kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim, false);
-  }
-
-  sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    /* instance transform */
-    object_normal_transform_auto(kg, sd, &sd->N);
-    object_normal_transform_auto(kg, sd, &sd->Ng);
-#ifdef __DPDU__
-    object_dir_transform_auto(kg, sd, &sd->dPdu);
-    object_dir_transform_auto(kg, sd, &sd->dPdv);
-#endif
-  }
-
-  /* backfacing test */
-  bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
-
-  if (backfacing) {
-    sd->flag |= SD_BACKFACING;
-    sd->Ng = -sd->Ng;
-    sd->N = -sd->N;
-#ifdef __DPDU__
-    sd->dPdu = -sd->dPdu;
-    sd->dPdv = -sd->dPdv;
-#endif
-  }
-
-#ifdef __RAY_DIFFERENTIALS__
-  /* differentials */
-  differential_transfer_compact(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, sd->ray_length);
-  differential_incoming_compact(&sd->dI, ray->D, ray->dD);
-  differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
-#endif
-}
-
-/* ShaderData setup from position sampled on mesh */
-
-ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
-                                                ccl_private ShaderData *ccl_restrict sd,
-                                                const float3 P,
-                                                const float3 Ng,
-                                                const float3 I,
-                                                int shader,
-                                                int object,
-                                                int prim,
-                                                float u,
-                                                float v,
-                                                float t,
-                                                float time,
-                                                bool object_space,
-                                                int lamp)
-{
-  /* vectors */
-  sd->P = P;
-  sd->N = Ng;
-  sd->Ng = Ng;
-  sd->I = I;
-  sd->shader = shader;
-  if (prim != PRIM_NONE)
-    sd->type = PRIMITIVE_TRIANGLE;
-  else if (lamp != LAMP_NONE)
-    sd->type = PRIMITIVE_LAMP;
-  else
-    sd->type = PRIMITIVE_NONE;
-
-  /* primitive */
-  sd->object = object;
-  sd->lamp = LAMP_NONE;
-  /* Currently no access to bvh prim index for strand sd->prim. */
-  sd->prim = prim;
-  sd->u = u;
-  sd->v = v;
-  sd->time = time;
-  sd->ray_length = t;
-
-  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-  sd->object_flag = 0;
-  if (sd->object != OBJECT_NONE) {
-    sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
-
-#ifdef __OBJECT_MOTION__
-    shader_setup_object_transforms(kg, sd, time);
-#endif
-  }
-  else if (lamp != LAMP_NONE) {
-    sd->lamp = lamp;
-  }
-
-  /* transform into world space */
-  if (object_space) {
-    object_position_transform_auto(kg, sd, &sd->P);
-    object_normal_transform_auto(kg, sd, &sd->Ng);
-    sd->N = sd->Ng;
-    object_dir_transform_auto(kg, sd, &sd->I);
-  }
-
-  if (sd->type & PRIMITIVE_TRIANGLE) {
-    /* smooth normal */
-    if (sd->shader & SHADER_SMOOTH_NORMAL) {
-      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
-
-      if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-        object_normal_transform_auto(kg, sd, &sd->N);
-      }
-    }
-
-    /* dPdu/dPdv */
-#ifdef __DPDU__
-    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
-
-    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      object_dir_transform_auto(kg, sd, &sd->dPdu);
-      object_dir_transform_auto(kg, sd, &sd->dPdv);
-    }
-#endif
-  }
-  else {
-#ifdef __DPDU__
-    sd->dPdu = zero_float3();
-    sd->dPdv = zero_float3();
-#endif
-  }
-
-  /* backfacing test */
-  if (sd->prim != PRIM_NONE) {
-    bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
-
-    if (backfacing) {
-      sd->flag |= SD_BACKFACING;
-      sd->Ng = -sd->Ng;
-      sd->N = -sd->N;
-#ifdef __DPDU__
-      sd->dPdu = -sd->dPdu;
-      sd->dPdv = -sd->dPdv;
-#endif
-    }
-  }
-
-#ifdef __RAY_DIFFERENTIALS__
-  /* no ray differentials here yet */
-  sd->dP = differential3_zero();
-  sd->dI = differential3_zero();
-  sd->du = differential_zero();
-  sd->dv = differential_zero();
-#endif
-}
-
-/* ShaderData setup for displacement */
-
-ccl_device void shader_setup_from_displace(KernelGlobals kg,
-                                           ccl_private ShaderData *ccl_restrict sd,
-                                           int object,
-                                           int prim,
-                                           float u,
-                                           float v)
-{
-  float3 P, Ng, I = zero_float3();
-  int shader;
-
-  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
-  /* force smooth shading for displacement */
-  shader |= SHADER_SMOOTH_NORMAL;
-
-  shader_setup_from_sample(
-      kg,
-      sd,
-      P,
-      Ng,
-      I,
-      shader,
-      object,
-      prim,
-      u,
-      v,
-      0.0f,
-      0.5f,
-      !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
-      LAMP_NONE);
-}
-
-/* ShaderData setup for point on curve. */
-
-ccl_device void shader_setup_from_curve(KernelGlobals kg,
-                                        ccl_private ShaderData *ccl_restrict sd,
-                                        int object,
-                                        int prim,
-                                        int segment,
-                                        float u)
-{
-  /* Primitive */
-  sd->type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE_THICK, segment);
-  sd->lamp = LAMP_NONE;
-  sd->prim = prim;
-  sd->u = u;
-  sd->v = 0.0f;
-  sd->time = 0.5f;
-  sd->ray_length = 0.0f;
-
-  /* Shader */
-  sd->shader = kernel_tex_fetch(__curves, prim).shader_id;
-  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-
-  /* Object */
-  sd->object = object;
-  sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
-#ifdef __OBJECT_MOTION__
-  shader_setup_object_transforms(kg, sd, sd->time);
-#endif
-
-  /* Get control points. */
-  KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
-
-  int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
-  int k1 = k0 + 1;
-  int ka = max(k0 - 1, kcurve.first_key);
-  int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
-
-  float4 P_curve[4];
-
-  P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
-  P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
-  P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
-  P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
-
-  /* Interpolate position and tangent. */
-  sd->P = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
-#ifdef __DPDU__
-  sd->dPdu = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
-#endif
-
-  /* Transform into world space */
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    object_position_transform_auto(kg, sd, &sd->P);
-#ifdef __DPDU__
-    object_dir_transform_auto(kg, sd, &sd->dPdu);
-#endif
-  }
-
-  /* No view direction, normals or bitangent. */
-  sd->I = zero_float3();
-  sd->N = zero_float3();
-  sd->Ng = zero_float3();
-#ifdef __DPDU__
-  sd->dPdv = zero_float3();
-#endif
-
-  /* No ray differentials currently. */
-#ifdef __RAY_DIFFERENTIALS__
-  sd->dP = differential3_zero();
-  sd->dI = differential3_zero();
-  sd->du = differential_zero();
-  sd->dv = differential_zero();
-#endif
-}
-
-/* ShaderData setup from ray into background */
-
-ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
-                                                    ccl_private ShaderData *ccl_restrict sd,
-                                                    const float3 ray_P,
-                                                    const float3 ray_D,
-                                                    const float ray_time)
-{
-  /* for NDC coordinates */
-  sd->ray_P = ray_P;
-
-  /* vectors */
-  sd->P = ray_D;
-  sd->N = -ray_D;
-  sd->Ng = -ray_D;
-  sd->I = -ray_D;
-  sd->shader = kernel_data.background.surface_shader;
-  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-  sd->object_flag = 0;
-  sd->time = ray_time;
-  sd->ray_length = 0.0f;
-
-  sd->object = OBJECT_NONE;
-  sd->lamp = LAMP_NONE;
-  sd->prim = PRIM_NONE;
-  sd->u = 0.0f;
-  sd->v = 0.0f;
-
-#ifdef __DPDU__
-  /* dPdu/dPdv */
-  sd->dPdu = zero_float3();
-  sd->dPdv = zero_float3();
-#endif
-
-#ifdef __RAY_DIFFERENTIALS__
-  /* differentials */
-  sd->dP = differential3_zero(); /* TODO: ray->dP */
-  differential_incoming(&sd->dI, sd->dP);
-  sd->du = differential_zero();
-  sd->dv = differential_zero();
-#endif
-}
-
-/* ShaderData setup from point inside volume */
-
-#ifdef __VOLUME__
-ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
-                                                ccl_private ShaderData *ccl_restrict sd,
-                                                ccl_private const Ray *ccl_restrict ray)
-{
-
-  /* vectors */
-  sd->P = ray->P;
-  sd->N = -ray->D;
-  sd->Ng = -ray->D;
-  sd->I = -ray->D;
-  sd->shader = SHADER_NONE;
-  sd->flag = 0;
-  sd->object_flag = 0;
-  sd->time = ray->time;
-  sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
-
-  sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
-  sd->lamp = LAMP_NONE;
-  sd->prim = PRIM_NONE;
-  sd->type = PRIMITIVE_VOLUME;
-
-  sd->u = 0.0f;
-  sd->v = 0.0f;
-
-#  ifdef __DPDU__
-  /* dPdu/dPdv */
-  sd->dPdu = zero_float3();
-  sd->dPdv = zero_float3();
-#  endif
-
-#  ifdef __RAY_DIFFERENTIALS__
-  /* differentials */
-  sd->dP = differential3_zero(); /* TODO ray->dD */
-  differential_incoming(&sd->dI, sd->dP);
-  sd->du = differential_zero();
-  sd->dv = differential_zero();
-#  endif
-
-  /* for NDC coordinates */
-  sd->ray_P = ray->P;
-  sd->ray_dP = ray->dP;
-}
-#endif /* __VOLUME__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
deleted file mode 100644
index 8a9a3f71231..00000000000
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ /dev/null
@@ -1,687 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Functions for retrieving attributes on triangles produced from subdivision meshes */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Patch index for triangle, -1 if not subdivision triangle */
-
-ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
-}
-
-/* UV coords of triangle within patch */
-
-ccl_device_inline void subd_triangle_patch_uv(KernelGlobals kg,
-                                              ccl_private const ShaderData *sd,
-                                              float2 uv[3])
-{
-  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-
-  uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x);
-  uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y);
-  uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z);
-}
-
-/* Vertex indices of patch */
-
-ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals kg, int patch)
-{
-  uint4 indices;
-
-  indices.x = kernel_tex_fetch(__patches, patch + 0);
-  indices.y = kernel_tex_fetch(__patches, patch + 1);
-  indices.z = kernel_tex_fetch(__patches, patch + 2);
-  indices.w = kernel_tex_fetch(__patches, patch + 3);
-
-  return indices;
-}
-
-/* Originating face for patch */
-
-ccl_device_inline uint subd_triangle_patch_face(KernelGlobals kg, int patch)
-{
-  return kernel_tex_fetch(__patches, patch + 4);
-}
-
-/* Number of corners on originating face */
-
-ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals kg, int patch)
-{
-  return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
-}
-
-/* Indices of the four corners that are used by the patch */
-
-ccl_device_inline void subd_triangle_patch_corners(KernelGlobals kg, int patch, int corners[4])
-{
-  uint4 data;
-
-  data.x = kernel_tex_fetch(__patches, patch + 4);
-  data.y = kernel_tex_fetch(__patches, patch + 5);
-  data.z = kernel_tex_fetch(__patches, patch + 6);
-  data.w = kernel_tex_fetch(__patches, patch + 7);
-
-  int num_corners = data.y & 0xffff;
-
-  if (num_corners == 4) {
-    /* quad */
-    corners[0] = data.z;
-    corners[1] = data.z + 1;
-    corners[2] = data.z + 2;
-    corners[3] = data.z + 3;
-  }
-  else {
-    /* ngon */
-    int c = data.y >> 16;
-
-    corners[0] = data.z + c;
-    corners[1] = data.z + mod(c + 1, num_corners);
-    corners[2] = data.w;
-    corners[3] = data.z + mod(c - 1, num_corners);
-  }
-}
-
-/* Reading attributes on various subdivision triangle elements */
-
-ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
-                                                        ccl_private const ShaderData *sd,
-                                                        const AttributeDescriptor desc,
-                                                        ccl_private float *dx,
-                                                        ccl_private float *dy)
-{
-  int patch = subd_triangle_patch(kg, sd);
-
-#ifdef __PATCH_EVAL__
-  if (desc.flags & ATTR_SUBDIVIDED) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    float2 dpdu = uv[0] - uv[2];
-    float2 dpdv = uv[1] - uv[2];
-
-    /* p is [s, t] */
-    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-    float a, dads, dadt;
-    a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx || dy) {
-      float dsdu = dpdu.x;
-      float dtdu = dpdu.y;
-      float dsdv = dpdv.x;
-      float dtdv = dpdv.y;
-
-      if (dx) {
-        float dudx = sd->du.dx;
-        float dvdx = sd->dv.dx;
-
-        float dsdx = dsdu * dudx + dsdv * dvdx;
-        float dtdx = dtdu * dudx + dtdv * dvdx;
-
-        *dx = dads * dsdx + dadt * dtdx;
-      }
-      if (dy) {
-        float dudy = sd->du.dy;
-        float dvdy = sd->dv.dy;
-
-        float dsdy = dsdu * dudy + dsdv * dvdy;
-        float dtdy = dtdu * dudy + dtdv * dvdy;
-
-        *dy = dads * dsdy + dadt * dtdy;
-      }
-    }
-#  endif
-
-    return a;
-  }
-  else
-#endif /* __PATCH_EVAL__ */
-      if (desc.element == ATTR_ELEMENT_FACE) {
-    if (dx)
-      *dx = 0.0f;
-    if (dy)
-      *dy = 0.0f;
-
-    return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
-  }
-  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    uint4 v = subd_triangle_patch_indices(kg, patch);
-
-    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
-    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
-    float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
-    float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_CORNER) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    int corners[4];
-    subd_triangle_patch_corners(kg, patch, corners);
-
-    float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
-    float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
-    float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
-    float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
-    if (dx)
-      *dx = 0.0f;
-    if (dy)
-      *dy = 0.0f;
-
-    return kernel_tex_fetch(__attributes_float, desc.offset);
-  }
-  else {
-    if (dx)
-      *dx = 0.0f;
-    if (dy)
-      *dy = 0.0f;
-
-    return 0.0f;
-  }
-}
-
-ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
-                                                          ccl_private const ShaderData *sd,
-                                                          const AttributeDescriptor desc,
-                                                          ccl_private float2 *dx,
-                                                          ccl_private float2 *dy)
-{
-  int patch = subd_triangle_patch(kg, sd);
-
-#ifdef __PATCH_EVAL__
-  if (desc.flags & ATTR_SUBDIVIDED) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    float2 dpdu = uv[0] - uv[2];
-    float2 dpdv = uv[1] - uv[2];
-
-    /* p is [s, t] */
-    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-    float2 a, dads, dadt;
-
-    a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx || dy) {
-      float dsdu = dpdu.x;
-      float dtdu = dpdu.y;
-      float dsdv = dpdv.x;
-      float dtdv = dpdv.y;
-
-      if (dx) {
-        float dudx = sd->du.dx;
-        float dvdx = sd->dv.dx;
-
-        float dsdx = dsdu * dudx + dsdv * dvdx;
-        float dtdx = dtdu * dudx + dtdv * dvdx;
-
-        *dx = dads * dsdx + dadt * dtdx;
-      }
-      if (dy) {
-        float dudy = sd->du.dy;
-        float dvdy = sd->dv.dy;
-
-        float dsdy = dsdu * dudy + dsdv * dvdy;
-        float dtdy = dtdu * dudy + dtdv * dvdy;
-
-        *dy = dads * dsdy + dadt * dtdy;
-      }
-    }
-#  endif
-
-    return a;
-  }
-  else
-#endif /* __PATCH_EVAL__ */
-      if (desc.element == ATTR_ELEMENT_FACE) {
-    if (dx)
-      *dx = make_float2(0.0f, 0.0f);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-
-    return kernel_tex_fetch(__attributes_float2,
-                            desc.offset + subd_triangle_patch_face(kg, patch));
-  }
-  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    uint4 v = subd_triangle_patch_indices(kg, patch);
-
-    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x);
-    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y);
-    float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z);
-    float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w);
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_CORNER) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    int corners[4];
-    subd_triangle_patch_corners(kg, patch, corners);
-
-    float2 f0, f1, f2, f3;
-
-    f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset);
-    f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset);
-    f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset);
-    f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset);
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
-    if (dx)
-      *dx = make_float2(0.0f, 0.0f);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-
-    return kernel_tex_fetch(__attributes_float2, desc.offset);
-  }
-  else {
-    if (dx)
-      *dx = make_float2(0.0f, 0.0f);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-
-    return make_float2(0.0f, 0.0f);
-  }
-}
-
-ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
-                                                          ccl_private const ShaderData *sd,
-                                                          const AttributeDescriptor desc,
-                                                          ccl_private float3 *dx,
-                                                          ccl_private float3 *dy)
-{
-  int patch = subd_triangle_patch(kg, sd);
-
-#ifdef __PATCH_EVAL__
-  if (desc.flags & ATTR_SUBDIVIDED) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    float2 dpdu = uv[0] - uv[2];
-    float2 dpdv = uv[1] - uv[2];
-
-    /* p is [s, t] */
-    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-    float3 a, dads, dadt;
-    a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx || dy) {
-      float dsdu = dpdu.x;
-      float dtdu = dpdu.y;
-      float dsdv = dpdv.x;
-      float dtdv = dpdv.y;
-
-      if (dx) {
-        float dudx = sd->du.dx;
-        float dvdx = sd->dv.dx;
-
-        float dsdx = dsdu * dudx + dsdv * dvdx;
-        float dtdx = dtdu * dudx + dtdv * dvdx;
-
-        *dx = dads * dsdx + dadt * dtdx;
-      }
-      if (dy) {
-        float dudy = sd->du.dy;
-        float dvdy = sd->dv.dy;
-
-        float dsdy = dsdu * dudy + dsdv * dvdy;
-        float dtdy = dtdu * dudy + dtdv * dvdy;
-
-        *dy = dads * dsdy + dadt * dtdy;
-      }
-    }
-#  endif
-
-    return a;
-  }
-  else
-#endif /* __PATCH_EVAL__ */
-      if (desc.element == ATTR_ELEMENT_FACE) {
-    if (dx)
-      *dx = make_float3(0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-
-    return float4_to_float3(
-        kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
-  }
-  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    uint4 v = subd_triangle_patch_indices(kg, patch);
-
-    float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
-    float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
-    float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
-    float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_CORNER) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    int corners[4];
-    subd_triangle_patch_corners(kg, patch, corners);
-
-    float3 f0, f1, f2, f3;
-
-    f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
-    f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
-    f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
-    f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
-    if (dx)
-      *dx = make_float3(0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-
-    return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
-  }
-  else {
-    if (dx)
-      *dx = make_float3(0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-
-    return make_float3(0.0f, 0.0f, 0.0f);
-  }
-}
-
-ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
-                                                          ccl_private const ShaderData *sd,
-                                                          const AttributeDescriptor desc,
-                                                          ccl_private float4 *dx,
-                                                          ccl_private float4 *dy)
-{
-  int patch = subd_triangle_patch(kg, sd);
-
-#ifdef __PATCH_EVAL__
-  if (desc.flags & ATTR_SUBDIVIDED) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    float2 dpdu = uv[0] - uv[2];
-    float2 dpdv = uv[1] - uv[2];
-
-    /* p is [s, t] */
-    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
-
-    float4 a, dads, dadt;
-    if (desc.type == NODE_ATTR_RGBA) {
-      a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-    }
-    else {
-      a = patch_eval_float4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
-    }
-
-#  ifdef __RAY_DIFFERENTIALS__
-    if (dx || dy) {
-      float dsdu = dpdu.x;
-      float dtdu = dpdu.y;
-      float dsdv = dpdv.x;
-      float dtdv = dpdv.y;
-
-      if (dx) {
-        float dudx = sd->du.dx;
-        float dvdx = sd->dv.dx;
-
-        float dsdx = dsdu * dudx + dsdv * dvdx;
-        float dtdx = dtdu * dudx + dtdv * dvdx;
-
-        *dx = dads * dsdx + dadt * dtdx;
-      }
-      if (dy) {
-        float dudy = sd->du.dy;
-        float dvdy = sd->dv.dy;
-
-        float dsdy = dsdu * dudy + dsdv * dvdy;
-        float dtdy = dtdu * dudy + dtdv * dvdy;
-
-        *dy = dads * dsdy + dadt * dtdy;
-      }
-    }
-#  endif
-
-    return a;
-  }
-  else
-#endif /* __PATCH_EVAL__ */
-      if (desc.element == ATTR_ELEMENT_FACE) {
-    if (dx)
-      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-    return kernel_tex_fetch(__attributes_float3,
-                            desc.offset + subd_triangle_patch_face(kg, patch));
-  }
-  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    uint4 v = subd_triangle_patch_indices(kg, patch);
-
-    float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x);
-    float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y);
-    float4 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z);
-    float4 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w);
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float4 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float4 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float4 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
-    float2 uv[3];
-    subd_triangle_patch_uv(kg, sd, uv);
-
-    int corners[4];
-    subd_triangle_patch_corners(kg, patch, corners);
-
-    float4 f0, f1, f2, f3;
-
-    if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
-      f0 = color_srgb_to_linear_v4(
-          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)));
-      f1 = color_srgb_to_linear_v4(
-          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)));
-      f2 = color_srgb_to_linear_v4(
-          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)));
-      f3 = color_srgb_to_linear_v4(
-          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)));
-    }
-    else {
-      f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset);
-      f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset);
-      f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset);
-      f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset);
-    }
-
-    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
-      f1 = (f1 + f0) * 0.5f;
-      f3 = (f3 + f0) * 0.5f;
-    }
-
-    float4 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
-    float4 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
-    float4 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
-    if (dy)
-      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
-#endif
-
-    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
-  }
-  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
-    if (dx)
-      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-    return kernel_tex_fetch(__attributes_float3, desc.offset);
-  }
-  else {
-    if (dx)
-      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
deleted file mode 100644
index 233e901c7ca..00000000000
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Triangle Primitive
- *
- * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
- * ray intersection we use a precomputed triangle storage to accelerate
- * intersection at the cost of more memory usage */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Normal on triangle. */
-ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderData *sd)
-{
-  /* load triangle vertices */
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-  const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
-  const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
-  const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
-
-  /* return normal */
-  if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-    return normalize(cross(v2 - v0, v1 - v0));
-  }
-  else {
-    return normalize(cross(v1 - v0, v2 - v0));
-  }
-}
-
-/* Point and normal on triangle. */
-ccl_device_inline void triangle_point_normal(KernelGlobals kg,
-                                             int object,
-                                             int prim,
-                                             float u,
-                                             float v,
-                                             ccl_private float3 *P,
-                                             ccl_private float3 *Ng,
-                                             ccl_private int *shader)
-{
-  /* load triangle vertices */
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-  float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
-  float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
-  float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
-  /* compute point */
-  float t = 1.0f - u - v;
-  *P = (u * v0 + v * v1 + t * v2);
-  /* get object flags */
-  int object_flag = kernel_tex_fetch(__object_flag, object);
-  /* compute normal */
-  if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-    *Ng = normalize(cross(v2 - v0, v1 - v0));
-  }
-  else {
-    *Ng = normalize(cross(v1 - v0, v2 - v0));
-  }
-  /* shader`*/
-  *shader = kernel_tex_fetch(__tri_shader, prim);
-}
-
-/* Triangle vertex locations */
-
-ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3])
-{
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-  P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
-  P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
-  P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
-}
-
-/* Triangle vertex locations and vertex normals */
-
-ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg,
-                                                     int prim,
-                                                     float3 P[3],
-                                                     float3 N[3])
-{
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-  P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
-  P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
-  P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
-  N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
-  N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
-  N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
-}
-
-/* Interpolate smooth vertex normal from vertices */
-
-ccl_device_inline float3
-triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
-{
-  /* load triangle vertices */
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-  float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
-  float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
-  float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
-
-  float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
-
-  return is_zero(N) ? Ng : N;
-}
-
-ccl_device_inline float3 triangle_smooth_normal_unnormalized(
-    KernelGlobals kg, ccl_private const ShaderData *sd, float3 Ng, int prim, float u, float v)
-{
-  /* load triangle vertices */
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-  float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
-  float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
-  float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
-
-  /* ensure that the normals are in object space */
-  if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) {
-    object_inverse_normal_transform(kg, sd, &n0);
-    object_inverse_normal_transform(kg, sd, &n1);
-    object_inverse_normal_transform(kg, sd, &n2);
-  }
-
-  float3 N = (1.0f - u - v) * n2 + u * n0 + v * n1;
-
-  return is_zero(N) ? Ng : N;
-}
-
-/* Ray differentials on triangle */
-
-ccl_device_inline void triangle_dPdudv(KernelGlobals kg,
-                                       int prim,
-                                       ccl_private float3 *dPdu,
-                                       ccl_private float3 *dPdv)
-{
-  /* fetch triangle vertex coordinates */
-  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
-  const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
-  const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
-  const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
-
-  /* compute derivatives of P w.r.t. uv */
-  *dPdu = (p0 - p2);
-  *dPdv = (p1 - p2);
-}
-
-/* Reading attributes on various triangle elements */
-
-ccl_device float triangle_attribute_float(KernelGlobals kg,
-                                          ccl_private const ShaderData *sd,
-                                          const AttributeDescriptor desc,
-                                          ccl_private float *dx,
-                                          ccl_private float *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
-    float f0, f1, f2;
-
-    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
-      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-      f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
-      f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
-      f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
-    }
-    else {
-      const int tri = desc.offset + sd->prim * 3;
-      f0 = kernel_tex_fetch(__attributes_float, tri + 0);
-      f1 = kernel_tex_fetch(__attributes_float, tri + 1);
-      f2 = kernel_tex_fetch(__attributes_float, tri + 2);
-    }
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
-    if (dy)
-      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
-#endif
-
-    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
-  }
-  else {
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = 0.0f;
-    if (dy)
-      *dy = 0.0f;
-#endif
-
-    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
-                                                               desc.offset;
-      return kernel_tex_fetch(__attributes_float, offset);
-    }
-    else {
-      return 0.0f;
-    }
-  }
-}
-
-ccl_device float2 triangle_attribute_float2(KernelGlobals kg,
-                                            ccl_private const ShaderData *sd,
-                                            const AttributeDescriptor desc,
-                                            ccl_private float2 *dx,
-                                            ccl_private float2 *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
-    float2 f0, f1, f2;
-
-    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
-      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-      f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x);
-      f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y);
-      f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z);
-    }
-    else {
-      const int tri = desc.offset + sd->prim * 3;
-      f0 = kernel_tex_fetch(__attributes_float2, tri + 0);
-      f1 = kernel_tex_fetch(__attributes_float2, tri + 1);
-      f2 = kernel_tex_fetch(__attributes_float2, tri + 2);
-    }
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
-    if (dy)
-      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
-#endif
-
-    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
-  }
-  else {
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = make_float2(0.0f, 0.0f);
-    if (dy)
-      *dy = make_float2(0.0f, 0.0f);
-#endif
-
-    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
-                                                               desc.offset;
-      return kernel_tex_fetch(__attributes_float2, offset);
-    }
-    else {
-      return make_float2(0.0f, 0.0f);
-    }
-  }
-}
-
-ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
-                                            ccl_private const ShaderData *sd,
-                                            const AttributeDescriptor desc,
-                                            ccl_private float3 *dx,
-                                            ccl_private float3 *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
-    float3 f0, f1, f2;
-
-    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
-      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-      f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
-      f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
-      f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
-    }
-    else {
-      const int tri = desc.offset + sd->prim * 3;
-      f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
-      f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
-      f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
-    }
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
-    if (dy)
-      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
-#endif
-
-    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
-  }
-  else {
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = make_float3(0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
-    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
-                                                               desc.offset;
-      return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
-    }
-    else {
-      return make_float3(0.0f, 0.0f, 0.0f);
-    }
-  }
-}
-
-ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
-                                            ccl_private const ShaderData *sd,
-                                            const AttributeDescriptor desc,
-                                            ccl_private float4 *dx,
-                                            ccl_private float4 *dy)
-{
-  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER |
-                      ATTR_ELEMENT_CORNER_BYTE)) {
-    float4 f0, f1, f2;
-
-    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
-      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
-      f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x);
-      f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y);
-      f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z);
-    }
-    else {
-      const int tri = desc.offset + sd->prim * 3;
-      if (desc.element == ATTR_ELEMENT_CORNER) {
-        f0 = kernel_tex_fetch(__attributes_float3, tri + 0);
-        f1 = kernel_tex_fetch(__attributes_float3, tri + 1);
-        f2 = kernel_tex_fetch(__attributes_float3, tri + 2);
-      }
-      else {
-        f0 = color_srgb_to_linear_v4(
-            color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 0)));
-        f1 = color_srgb_to_linear_v4(
-            color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 1)));
-        f2 = color_srgb_to_linear_v4(
-            color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 2)));
-      }
-    }
-
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
-    if (dy)
-      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
-#endif
-
-    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
-  }
-  else {
-#ifdef __RAY_DIFFERENTIALS__
-    if (dx)
-      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    if (dy)
-      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#endif
-
-    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
-                                                               desc.offset;
-      return kernel_tex_fetch(__attributes_float3, offset);
-    }
-    else {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
deleted file mode 100644
index 440dc23d124..00000000000
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Triangle/Ray intersections.
- *
- * For BVH ray intersection we use a precomputed triangle storage to accelerate
- * intersection at the cost of more memory usage.
- */
-
-#pragma once
-
-#include "kernel/sample/sample_lcg.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline bool triangle_intersect(KernelGlobals kg,
-                                          ccl_private Intersection *isect,
-                                          float3 P,
-                                          float3 dir,
-                                          float tmax,
-                                          uint visibility,
-                                          int object,
-                                          int prim_addr)
-{
-  const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
-#else
-  const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
-               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
-               tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
-#endif
-  float t, u, v;
-  if (ray_triangle_intersect(P,
-                             dir,
-                             tmax,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-                             ssef_verts,
-#else
-                             float4_to_float3(tri_a),
-                             float4_to_float3(tri_b),
-                             float4_to_float3(tri_c),
-#endif
-                             &u,
-                             &v,
-                             &t)) {
-#ifdef __VISIBILITY_FLAG__
-    /* Visibility flag test. we do it here under the assumption
-     * that most triangles are culled by node flags.
-     */
-    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
-#endif
-    {
-      isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
-                                                object;
-      isect->prim = prim;
-      isect->type = PRIMITIVE_TRIANGLE;
-      isect->u = u;
-      isect->v = v;
-      isect->t = t;
-      return true;
-    }
-  }
-  return false;
-}
-
-/* Special ray intersection routines for subsurface scattering. In that case we
- * only want to intersect with primitives in the same object, and if case of
- * multiple hits we pick a single random primitive as the intersection point.
- * Returns whether traversal should be stopped.
- */
-
-#ifdef __BVH_LOCAL__
-ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
-                                                ccl_private LocalIntersection *local_isect,
-                                                float3 P,
-                                                float3 dir,
-                                                int object,
-                                                int local_object,
-                                                int prim_addr,
-                                                float tmax,
-                                                ccl_private uint *lcg_state,
-                                                int max_hits)
-{
-  /* Only intersect with matching object, for instanced objects we
-   * already know we are only intersecting the right object. */
-  if (object == OBJECT_NONE) {
-    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
-      return false;
-    }
-  }
-
-  const int prim = kernel_tex_fetch(__prim_index, prim_addr);
-  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
-#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
-#  else
-  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
-               tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
-               tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
-#  endif
-  float t, u, v;
-  if (!ray_triangle_intersect(P,
-                              dir,
-                              tmax,
-#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-                              ssef_verts,
-#  else
-                              tri_a,
-                              tri_b,
-                              tri_c,
-#  endif
-                              &u,
-                              &v,
-                              &t)) {
-    return false;
-  }
-
-  /* If no actual hit information is requested, just return here. */
-  if (max_hits == 0) {
-    return true;
-  }
-
-  int hit;
-  if (lcg_state) {
-    /* Record up to max_hits intersections. */
-    for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
-      if (local_isect->hits[i].t == t) {
-        return false;
-      }
-    }
-
-    local_isect->num_hits++;
-
-    if (local_isect->num_hits <= max_hits) {
-      hit = local_isect->num_hits - 1;
-    }
-    else {
-      /* reservoir sampling: if we are at the maximum number of
-       * hits, randomly replace element or skip it */
-      hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
-
-      if (hit >= max_hits)
-        return false;
-    }
-  }
-  else {
-    /* Record closest intersection only. */
-    if (local_isect->num_hits && t > local_isect->hits[0].t) {
-      return false;
-    }
-
-    hit = 0;
-    local_isect->num_hits = 1;
-  }
-
-  /* Record intersection. */
-  ccl_private Intersection *isect = &local_isect->hits[hit];
-  isect->prim = prim;
-  isect->object = local_object;
-  isect->type = PRIMITIVE_TRIANGLE;
-  isect->u = u;
-  isect->v = v;
-  isect->t = t;
-
-  /* Record geometric normal. */
-#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
-               tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
-               tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
-#  endif
-  local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
-
-  return false;
-}
-#endif /* __BVH_LOCAL__ */
-
-/* Refine triangle intersection to more precise hit point. For rays that travel
- * far the precision is often not so good, this reintersects the primitive from
- * a closer distance. */
-
-/* Reintersections uses the paper:
- *
- * Tomas Moeller
- * Fast, minimum storage ray/triangle intersection
- * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
- */
-
-ccl_device_inline float3 triangle_refine(KernelGlobals kg,
-                                         ccl_private ShaderData *sd,
-                                         float3 P,
-                                         float3 D,
-                                         float t,
-                                         const int isect_object,
-                                         const int isect_prim)
-{
-#ifdef __INTERSECTION_REFINE__
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    if (UNLIKELY(t == 0.0f)) {
-      return P;
-    }
-    const Transform tfm = object_get_inverse_transform(kg, sd);
-
-    P = transform_point(&tfm, P);
-    D = transform_direction(&tfm, D * t);
-    D = normalize_len(D, &t);
-  }
-
-  P = P + D * t;
-
-  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
-  const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
-               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
-               tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
-  float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
-  float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
-  float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
-  float3 qvec = cross(tvec, edge1);
-  float3 pvec = cross(D, edge2);
-  float det = dot(edge1, pvec);
-  if (det != 0.0f) {
-    /* If determinant is zero it means ray lies in the plane of
-     * the triangle. It is possible in theory due to watertight
-     * nature of triangle intersection. For such cases we simply
-     * don't refine intersection hoping it'll go all fine.
-     */
-    float rt = dot(edge2, qvec) / det;
-    P = P + D * rt;
-  }
-
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_transform(kg, sd);
-    P = transform_point(&tfm, P);
-  }
-
-  return P;
-#else
-  return P + D * t;
-#endif
-}
-
-/* Same as above, except that t is assumed to be in object space for
- * instancing.
- */
-ccl_device_inline float3 triangle_refine_local(KernelGlobals kg,
-                                               ccl_private ShaderData *sd,
-                                               float3 P,
-                                               float3 D,
-                                               float t,
-                                               const int isect_object,
-                                               const int isect_prim)
-{
-#ifdef __KERNEL_OPTIX__
-  /* t is always in world space with OptiX. */
-  return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim);
-#else
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_inverse_transform(kg, sd);
-
-    P = transform_point(&tfm, P);
-    D = transform_direction(&tfm, D);
-    D = normalize(D);
-  }
-
-  P = P + D * t;
-
-#  ifdef __INTERSECTION_REFINE__
-  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
-  const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
-               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
-               tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
-  float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
-  float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
-  float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
-  float3 qvec = cross(tvec, edge1);
-  float3 pvec = cross(D, edge2);
-  float det = dot(edge1, pvec);
-  if (det != 0.0f) {
-    /* If determinant is zero it means ray lies in the plane of
-     * the triangle. It is possible in theory due to watertight
-     * nature of triangle intersection. For such cases we simply
-     * don't refine intersection hoping it'll go all fine.
-     */
-    float rt = dot(edge2, qvec) / det;
-    P = P + D * rt;
-  }
-#  endif /* __INTERSECTION_REFINE__ */
-
-  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    const Transform tfm = object_get_transform(kg, sd);
-    P = transform_point(&tfm, P);
-  }
-
-  return P;
-#endif
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
deleted file mode 100644
index 4e83ad6acb3..00000000000
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Volume Primitive
- *
- * Volumes are just regions inside meshes with the mesh surface as boundaries.
- * There isn't as much data to access as for surfaces, there is only a position
- * to do lookups in 3D voxel or procedural textures.
- *
- * 3D voxel textures can be assigned as attributes per mesh, which means the
- * same shader can be used for volume objects with different densities, etc. */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __VOLUME__
-
-/* Return position normalized to 0..1 in mesh bounds */
-
-ccl_device_inline float3 volume_normalized_position(KernelGlobals kg,
-                                                    ccl_private const ShaderData *sd,
-                                                    float3 P)
-{
-  /* todo: optimize this so it's just a single matrix multiplication when
-   * possible (not motion blur), or perhaps even just translation + scale */
-  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
-
-  object_inverse_position_transform(kg, sd, &P);
-
-  if (desc.offset != ATTR_STD_NOT_FOUND) {
-    Transform tfm = primitive_attribute_matrix(kg, sd, desc);
-    P = transform_point(&tfm, P);
-  }
-
-  return P;
-}
-
-ccl_device float volume_attribute_value_to_float(const float4 value)
-{
-  return average(float4_to_float3(value));
-}
-
-ccl_device float volume_attribute_value_to_alpha(const float4 value)
-{
-  return value.w;
-}
-
-ccl_device float3 volume_attribute_value_to_float3(const float4 value)
-{
-  if (value.w > 1e-6f && value.w != 1.0f) {
-    /* For RGBA colors, unpremultiply after interpolation. */
-    return float4_to_float3(value) / value.w;
-  }
-  else {
-    return float4_to_float3(value);
-  }
-}
-
-ccl_device float4 volume_attribute_float4(KernelGlobals kg,
-                                          ccl_private const ShaderData *sd,
-                                          const AttributeDescriptor desc)
-{
-  if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
-    return kernel_tex_fetch(__attributes_float3, desc.offset);
-  }
-  else if (desc.element == ATTR_ELEMENT_VOXEL) {
-    /* todo: optimize this so we don't have to transform both here and in
-     * kernel_tex_image_interp_3d when possible. Also could optimize for the
-     * common case where transform is translation/scale only. */
-    float3 P = sd->P;
-    object_inverse_position_transform(kg, sd, &P);
-    InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
-                                                              INTERPOLATION_NONE;
-    return kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
-  }
-  else {
-    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  }
-}
-
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/motion_curve.h b/intern/cycles/kernel/geom/motion_curve.h
new file mode 100644
index 00000000000..2dd213d43f6
--- /dev/null
+++ b/intern/cycles/kernel/geom/motion_curve.h
@@ -0,0 +1,155 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Motion Curve Primitive
+ *
+ * These are stored as regular curves, plus extra positions and radii at times
+ * other than the frame center. Computing the curve keys at a given ray time is
+ * a matter of interpolation of the two steps between which the ray time lies.
+ *
+ * The extra curve keys are stored as ATTR_STD_MOTION_VERTEX_POSITION.
+ */
+
+#ifdef __HAIR__
+
+ccl_device_inline void motion_curve_keys_for_step_linear(KernelGlobals kg,
+                                                         int offset,
+                                                         int numkeys,
+                                                         int numsteps,
+                                                         int step,
+                                                         int k0,
+                                                         int k1,
+                                                         float4 keys[2])
+{
+  if (step == numsteps) {
+    /* center step: regular key location */
+    keys[0] = kernel_tex_fetch(__curve_keys, k0);
+    keys[1] = kernel_tex_fetch(__curve_keys, k1);
+  }
+  else {
+    /* center step is not stored in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numkeys;
+
+    keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+    keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+  }
+}
+
+/* return 2 curve key locations */
+ccl_device_inline void motion_curve_keys_linear(
+    KernelGlobals kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
+{
+  /* get motion info */
+  int numsteps, numkeys;
+  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  const int maxstep = numsteps * 2;
+  const int step = min((int)(time * maxstep), maxstep - 1);
+  const float t = time * maxstep - step;
+
+  /* find attribute */
+  const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch key coordinates */
+  float4 next_keys[2];
+
+  motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step, k0, k1, keys);
+  motion_curve_keys_for_step_linear(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys);
+
+  /* interpolate between steps */
+  keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
+  keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
+}
+
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals kg,
+                                                  int offset,
+                                                  int numkeys,
+                                                  int numsteps,
+                                                  int step,
+                                                  int k0,
+                                                  int k1,
+                                                  int k2,
+                                                  int k3,
+                                                  float4 keys[4])
+{
+  if (step == numsteps) {
+    /* center step: regular key location */
+    keys[0] = kernel_tex_fetch(__curve_keys, k0);
+    keys[1] = kernel_tex_fetch(__curve_keys, k1);
+    keys[2] = kernel_tex_fetch(__curve_keys, k2);
+    keys[3] = kernel_tex_fetch(__curve_keys, k3);
+  }
+  else {
+    /* center step is not stored in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numkeys;
+
+    keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+    keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+    keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
+    keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
+  }
+}
+
+/* return 2 curve key locations */
+ccl_device_inline void motion_curve_keys(KernelGlobals kg,
+                                         int object,
+                                         int prim,
+                                         float time,
+                                         int k0,
+                                         int k1,
+                                         int k2,
+                                         int k3,
+                                         float4 keys[4])
+{
+  /* get motion info */
+  int numsteps, numkeys;
+  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  const int maxstep = numsteps * 2;
+  const int step = min((int)(time * maxstep), maxstep - 1);
+  const float t = time * maxstep - step;
+
+  /* find attribute */
+  const int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch key coordinates */
+  float4 next_keys[4];
+
+  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+
+  /* interpolate between steps */
+  keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
+  keys[1] = (1.0f - t) * keys[1] + t * next_keys[1];
+  keys[2] = (1.0f - t) * keys[2] + t * next_keys[2];
+  keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
+}
+
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/motion_triangle.h b/intern/cycles/kernel/geom/motion_triangle.h
new file mode 100644
index 00000000000..43f894938e0
--- /dev/null
+++ b/intern/cycles/kernel/geom/motion_triangle.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Motion Triangle Primitive
+ *
+ * These are stored as regular triangles, plus extra positions and normals at
+ * times other than the frame center. Computing the triangle vertex positions
+ * or normals at a given ray time is a matter of interpolation of the two steps
+ * between which the ray time lies.
+ *
+ * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
+ * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
+ */
+
+#pragma once
+
+#include "kernel/bvh/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Time interpolation of vertex positions and normals */
+
+ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals kg,
+                                                      uint4 tri_vindex,
+                                                      int offset,
+                                                      int numverts,
+                                                      int numsteps,
+                                                      int step,
+                                                      float3 verts[3])
+{
+  if (step == numsteps) {
+    /* center step: regular vertex location */
+    verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
+    verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
+    verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
+  }
+  else {
+    /* center step not store in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numverts;
+
+    verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+    verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+    verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+  }
+}
+
+ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals kg,
+                                                        uint4 tri_vindex,
+                                                        int offset,
+                                                        int numverts,
+                                                        int numsteps,
+                                                        int step,
+                                                        float3 normals[3])
+{
+  if (step == numsteps) {
+    /* center step: regular vertex location */
+    normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+    normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+    normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+  }
+  else {
+    /* center step is not stored in this array */
+    if (step > numsteps)
+      step--;
+
+    offset += step * numverts;
+
+    normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
+    normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
+    normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+  }
+}
+
+ccl_device_inline void motion_triangle_vertices(
+    KernelGlobals kg, int object, int prim, float time, float3 verts[3])
+{
+  /* get motion info */
+  int numsteps, numverts;
+  object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  /* find attribute */
+  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_POSITION);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch vertex coordinates */
+  float3 next_verts[3];
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+
+  /* interpolate between steps */
+  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
+}
+
+ccl_device_inline float3 motion_triangle_smooth_normal(
+    KernelGlobals kg, float3 Ng, int object, int prim, float u, float v, float time)
+{
+  /* get motion info */
+  int numsteps, numverts;
+  object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+  /* figure out which steps we need to fetch and their interpolation factor */
+  int maxstep = numsteps * 2;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  /* find attribute */
+  int offset = intersection_find_attribute(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+  /* fetch normals */
+  float3 normals[3], next_normals[3];
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+
+  motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+  motion_triangle_normals_for_step(
+      kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+
+  /* interpolate between steps */
+  normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+  normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+  normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+
+  /* interpolate between vertices */
+  float w = 1.0f - u - v;
+  float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]);
+
+  return is_zero(N) ? Ng : N;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h
new file mode 100644
index 00000000000..256e7add21e
--- /dev/null
+++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h
@@ -0,0 +1,303 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Motion Triangle Primitive
+ *
+ * These are stored as regular triangles, plus extra positions and normals at
+ * times other than the frame center. Computing the triangle vertex positions
+ * or normals at a given ray time is a matter of interpolation of the two steps
+ * between which the ray time lies.
+ *
+ * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
+ * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Refine triangle intersection to more precise hit point. For rays that travel
+ * far the precision is often not so good, this reintersects the primitive from
+ * a closer distance.
+ */
+
+ccl_device_inline float3 motion_triangle_refine(KernelGlobals kg,
+                                                ccl_private ShaderData *sd,
+                                                float3 P,
+                                                float3 D,
+                                                float t,
+                                                const int isect_object,
+                                                const int isect_prim,
+                                                float3 verts[3])
+{
+#ifdef __INTERSECTION_REFINE__
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    if (UNLIKELY(t == 0.0f)) {
+      return P;
+    }
+    const Transform tfm = object_get_inverse_transform(kg, sd);
+
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D * t);
+    D = normalize_len(D, &t);
+  }
+
+  P = P + D * t;
+
+  /* Compute refined intersection distance. */
+  const float3 e1 = verts[0] - verts[2];
+  const float3 e2 = verts[1] - verts[2];
+  const float3 s1 = cross(D, e2);
+
+  const float invdivisor = 1.0f / dot(s1, e1);
+  const float3 d = P - verts[2];
+  const float3 s2 = cross(d, e1);
+  float rt = dot(e2, s2) * invdivisor;
+
+  /* Compute refined position. */
+  P = P + D * rt;
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_transform(kg, sd);
+    P = transform_point(&tfm, P);
+  }
+
+  return P;
+#else
+  return P + D * t;
+#endif
+}
+
+/* Same as above, except that t is assumed to be in object space
+ * for instancing.
+ */
+
+#ifdef __BVH_LOCAL__
+#  if defined(__KERNEL_CUDA__) && (defined(i386) || defined(_M_IX86))
+ccl_device_noinline
+#  else
+ccl_device_inline
+#  endif
+    float3
+    motion_triangle_refine_local(KernelGlobals kg,
+                                 ccl_private ShaderData *sd,
+                                 float3 P,
+                                 float3 D,
+                                 float t,
+                                 const int isect_object,
+                                 const int isect_prim,
+                                 float3 verts[3])
+{
+#  ifdef __KERNEL_OPTIX__
+  /* t is always in world space with OptiX. */
+  return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts);
+#  else
+#    ifdef __INTERSECTION_REFINE__
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_inverse_transform(kg, sd);
+
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D);
+    D = normalize(D);
+  }
+
+  P = P + D * t;
+
+  /* compute refined intersection distance */
+  const float3 e1 = verts[0] - verts[2];
+  const float3 e2 = verts[1] - verts[2];
+  const float3 s1 = cross(D, e2);
+
+  const float invdivisor = 1.0f / dot(s1, e1);
+  const float3 d = P - verts[2];
+  const float3 s2 = cross(d, e1);
+  float rt = dot(e2, s2) * invdivisor;
+
+  P = P + D * rt;
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_transform(kg, sd);
+    P = transform_point(&tfm, P);
+  }
+
+  return P;
+#    else  /* __INTERSECTION_REFINE__ */
+  return P + D * t;
+#    endif /* __INTERSECTION_REFINE__ */
+#  endif
+}
+#endif /* __BVH_LOCAL__ */
+
+/* Ray intersection. We simply compute the vertex positions at the given ray
+ * time and do a ray intersection with the resulting triangle.
+ */
+
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals kg,
+                                                 ccl_private Intersection *isect,
+                                                 float3 P,
+                                                 float3 dir,
+                                                 float tmax,
+                                                 float time,
+                                                 uint visibility,
+                                                 int object,
+                                                 int prim_addr)
+{
+  /* Primitive index for vertex location lookup. */
+  int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object;
+  /* Get vertex locations for intersection. */
+  float3 verts[3];
+  motion_triangle_vertices(kg, fobject, prim, time, verts);
+  /* Ray-triangle intersection, unoptimized. */
+  float t, u, v;
+  if (ray_triangle_intersect(P,
+                             dir,
+                             tmax,
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                             (ssef *)verts,
+#else
+                             verts[0],
+                             verts[1],
+                             verts[2],
+#endif
+                             &u,
+                             &v,
+                             &t)) {
+#ifdef __VISIBILITY_FLAG__
+    /* Visibility flag test. we do it here under the assumption
+     * that most triangles are culled by node flags.
+     */
+    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+#endif
+    {
+      isect->t = t;
+      isect->u = u;
+      isect->v = v;
+      isect->prim = prim;
+      isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
+                                                object;
+      isect->type = PRIMITIVE_MOTION_TRIANGLE;
+      return true;
+    }
+  }
+  return false;
+}
+
+/* Special ray intersection routines for local intersections. In that case we
+ * only want to intersect with primitives in the same object, and if case of
+ * multiple hits we pick a single random primitive as the intersection point.
+ * Returns whether traversal should be stopped.
+ */
+#ifdef __BVH_LOCAL__
+ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals kg,
+                                                       ccl_private LocalIntersection *local_isect,
+                                                       float3 P,
+                                                       float3 dir,
+                                                       float time,
+                                                       int object,
+                                                       int local_object,
+                                                       int prim_addr,
+                                                       float tmax,
+                                                       ccl_private uint *lcg_state,
+                                                       int max_hits)
+{
+  /* Only intersect with matching object, for instanced objects we
+   * already know we are only intersecting the right object. */
+  if (object == OBJECT_NONE) {
+    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+      return false;
+    }
+  }
+
+  /* Primitive index for vertex location lookup. */
+  int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  /* Get vertex locations for intersection. */
+  float3 verts[3];
+  motion_triangle_vertices(kg, local_object, prim, time, verts);
+  /* Ray-triangle intersection, unoptimized. */
+  float t, u, v;
+  if (!ray_triangle_intersect(P,
+                              dir,
+                              tmax,
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                              (ssef *)verts,
+#  else
+                              verts[0],
+                              verts[1],
+                              verts[2],
+#  endif
+                              &u,
+                              &v,
+                              &t)) {
+    return false;
+  }
+
+  /* If no actual hit information is requested, just return here. */
+  if (max_hits == 0) {
+    return true;
+  }
+
+  int hit;
+  if (lcg_state) {
+    /* Record up to max_hits intersections. */
+    for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+      if (local_isect->hits[i].t == t) {
+        return false;
+      }
+    }
+
+    local_isect->num_hits++;
+
+    if (local_isect->num_hits <= max_hits) {
+      hit = local_isect->num_hits - 1;
+    }
+    else {
+      /* Reservoir sampling: if we are at the maximum number of
+       * hits, randomly replace element or skip it.
+       */
+      hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+
+      if (hit >= max_hits)
+        return false;
+    }
+  }
+  else {
+    /* Record closest intersection only. */
+    if (local_isect->num_hits && t > local_isect->hits[0].t) {
+      return false;
+    }
+
+    hit = 0;
+    local_isect->num_hits = 1;
+  }
+
+  /* Record intersection. */
+  ccl_private Intersection *isect = &local_isect->hits[hit];
+  isect->t = t;
+  isect->u = u;
+  isect->v = v;
+  isect->prim = prim;
+  isect->object = local_object;
+  isect->type = PRIMITIVE_MOTION_TRIANGLE;
+
+  /* Record geometric normal. */
+  local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+
+  return false;
+}
+#endif /* __BVH_LOCAL__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/motion_triangle_shader.h b/intern/cycles/kernel/geom/motion_triangle_shader.h
new file mode 100644
index 00000000000..fc7c181882e
--- /dev/null
+++ b/intern/cycles/kernel/geom/motion_triangle_shader.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Motion Triangle Primitive
+ *
+ * These are stored as regular triangles, plus extra positions and normals at
+ * times other than the frame center. Computing the triangle vertex positions
+ * or normals at a given ray time is a matter of interpolation of the two steps
+ * between which the ray time lies.
+ *
+ * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
+ * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Setup of motion triangle specific parts of ShaderData, moved into this one
+ * function to more easily share computation of interpolated positions and
+ * normals */
+
+/* return 3 triangle vertex normals */
+ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
+                                                      ccl_private ShaderData *sd,
+                                                      const float3 P,
+                                                      const float3 D,
+                                                      const float ray_t,
+                                                      const int isect_object,
+                                                      const int isect_prim,
+                                                      bool is_local)
+{
+  /* Get shader. */
+  sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+  /* Get motion info. */
+  /* TODO(sergey): This logic is really similar to motion_triangle_vertices(),
+   * can we de-duplicate something here?
+   */
+  int numsteps, numverts;
+  object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
+  /* Figure out which steps we need to fetch and their interpolation factor. */
+  int maxstep = numsteps * 2;
+  int step = min((int)(sd->time * maxstep), maxstep - 1);
+  float t = sd->time * maxstep - step;
+  /* Find attribute. */
+  int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION);
+  kernel_assert(offset != ATTR_STD_NOT_FOUND);
+  /* Fetch vertex coordinates. */
+  float3 verts[3], next_verts[3];
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+  motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts);
+  /* Interpolate between steps. */
+  verts[0] = (1.0f - t) * verts[0] + t * next_verts[0];
+  verts[1] = (1.0f - t) * verts[1] + t * next_verts[1];
+  verts[2] = (1.0f - t) * verts[2] + t * next_verts[2];
+  /* Compute refined position. */
+#ifdef __BVH_LOCAL__
+  if (is_local) {
+    sd->P = motion_triangle_refine_local(kg, sd, P, D, ray_t, isect_object, isect_prim, verts);
+  }
+  else
+#endif /* __BVH_LOCAL__*/
+  {
+    sd->P = motion_triangle_refine(kg, sd, P, D, ray_t, isect_object, isect_prim, verts);
+  }
+  /* Compute face normal. */
+  float3 Ng;
+  if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0]));
+  }
+  else {
+    Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+  }
+  sd->Ng = Ng;
+  sd->N = Ng;
+  /* Compute derivatives of P w.r.t. uv. */
+#ifdef __DPDU__
+  sd->dPdu = (verts[0] - verts[2]);
+  sd->dPdv = (verts[1] - verts[2]);
+#endif
+  /* Compute smooth normal. */
+  if (sd->shader & SHADER_SMOOTH_NORMAL) {
+    /* Find attribute. */
+    int offset = intersection_find_attribute(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL);
+    kernel_assert(offset != ATTR_STD_NOT_FOUND);
+    /* Fetch vertex coordinates. */
+    float3 normals[3], next_normals[3];
+    motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+    motion_triangle_normals_for_step(
+        kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals);
+    /* Interpolate between steps. */
+    normals[0] = (1.0f - t) * normals[0] + t * next_normals[0];
+    normals[1] = (1.0f - t) * normals[1] + t * next_normals[1];
+    normals[2] = (1.0f - t) * normals[2] + t * next_normals[2];
+    /* Interpolate between vertices. */
+    float u = sd->u;
+    float v = sd->v;
+    float w = 1.0f - u - v;
+    sd->N = (u * normals[0] + v * normals[1] + w * normals[2]);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h
new file mode 100644
index 00000000000..34a9d639d9d
--- /dev/null
+++ b/intern/cycles/kernel/geom/object.h
@@ -0,0 +1,600 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Object Primitive
+ *
+ * All mesh and curve primitives are part of an object. The same mesh and curves
+ * may be instanced multiple times by different objects.
+ *
+ * If the mesh is not instanced multiple times, the object will not be explicitly
+ * stored as a primitive in the BVH, rather the bare triangles are curved are
+ * directly primitives in the BVH with world space locations applied, and the object
+ * ID is looked up afterwards. */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Object attributes, for now a fixed size and contents */
+
+enum ObjectTransform {
+  OBJECT_TRANSFORM = 0,
+  OBJECT_INVERSE_TRANSFORM = 1,
+};
+
+enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 };
+
+/* Object to world space transformation */
+
+ccl_device_inline Transform object_fetch_transform(KernelGlobals kg,
+                                                   int object,
+                                                   enum ObjectTransform type)
+{
+  if (type == OBJECT_INVERSE_TRANSFORM) {
+    return kernel_tex_fetch(__objects, object).itfm;
+  }
+  else {
+    return kernel_tex_fetch(__objects, object).tfm;
+  }
+}
+
+/* Lamp to world space transformation */
+
+ccl_device_inline Transform lamp_fetch_transform(KernelGlobals kg, int lamp, bool inverse)
+{
+  if (inverse) {
+    return kernel_tex_fetch(__lights, lamp).itfm;
+  }
+  else {
+    return kernel_tex_fetch(__lights, lamp).tfm;
+  }
+}
+
+/* Object to world space transformation for motion vectors */
+
+ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg,
+                                                               int object,
+                                                               enum ObjectVectorTransform type)
+{
+  int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type;
+  return kernel_tex_fetch(__object_motion_pass, offset);
+}
+
+/* Motion blurred object transformations */
+
+#ifdef __OBJECT_MOTION__
+ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals kg, int object, float time)
+{
+  const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
+  ccl_global const DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
+  const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
+
+  Transform tfm;
+  transform_motion_array_interpolate(&tfm, motion, num_steps, time);
+
+  return tfm;
+}
+
+ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
+                                                               int object,
+                                                               float time,
+                                                               ccl_private Transform *itfm)
+{
+  int object_flag = kernel_tex_fetch(__object_flag, object);
+  if (object_flag & SD_OBJECT_MOTION) {
+    /* if we do motion blur */
+    Transform tfm = object_fetch_transform_motion(kg, object, time);
+
+    if (itfm)
+      *itfm = transform_quick_inverse(tfm);
+
+    return tfm;
+  }
+  else {
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+    if (itfm)
+      *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+
+    return tfm;
+  }
+}
+#endif
+
+/* Get transform matrix for shading point. */
+
+ccl_device_inline Transform object_get_transform(KernelGlobals kg,
+                                                 ccl_private const ShaderData *sd)
+{
+#ifdef __OBJECT_MOTION__
+  return (sd->object_flag & SD_OBJECT_MOTION) ?
+             sd->ob_tfm_motion :
+             object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+#else
+  return object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+#endif
+}
+
+ccl_device_inline Transform object_get_inverse_transform(KernelGlobals kg,
+                                                         ccl_private const ShaderData *sd)
+{
+#ifdef __OBJECT_MOTION__
+  return (sd->object_flag & SD_OBJECT_MOTION) ?
+             sd->ob_itfm_motion :
+             object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+#else
+  return object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+}
+/* Transform position from object to world space */
+
+ccl_device_inline void object_position_transform(KernelGlobals kg,
+                                                 ccl_private const ShaderData *sd,
+                                                 ccl_private float3 *P)
+{
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    *P = transform_point_auto(&sd->ob_tfm_motion, *P);
+    return;
+  }
+#endif
+
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+  *P = transform_point(&tfm, *P);
+}
+
+/* Transform position from world to object space */
+
+ccl_device_inline void object_inverse_position_transform(KernelGlobals kg,
+                                                         ccl_private const ShaderData *sd,
+                                                         ccl_private float3 *P)
+{
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    *P = transform_point_auto(&sd->ob_itfm_motion, *P);
+    return;
+  }
+#endif
+
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  *P = transform_point(&tfm, *P);
+}
+
+/* Transform normal from world to object space */
+
+ccl_device_inline void object_inverse_normal_transform(KernelGlobals kg,
+                                                       ccl_private const ShaderData *sd,
+                                                       ccl_private float3 *N)
+{
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
+      *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm_motion, *N));
+    }
+    return;
+  }
+#endif
+
+  if (sd->object != OBJECT_NONE) {
+    Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+    *N = normalize(transform_direction_transposed(&tfm, *N));
+  }
+  else if (sd->type == PRIMITIVE_LAMP) {
+    Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
+    *N = normalize(transform_direction_transposed(&tfm, *N));
+  }
+}
+
+/* Transform normal from object to world space */
+
+ccl_device_inline void object_normal_transform(KernelGlobals kg,
+                                               ccl_private const ShaderData *sd,
+                                               ccl_private float3 *N)
+{
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm_motion, *N));
+    return;
+  }
+#endif
+
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  *N = normalize(transform_direction_transposed(&tfm, *N));
+}
+
+/* Transform direction vector from object to world space */
+
+ccl_device_inline void object_dir_transform(KernelGlobals kg,
+                                            ccl_private const ShaderData *sd,
+                                            ccl_private float3 *D)
+{
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    *D = transform_direction_auto(&sd->ob_tfm_motion, *D);
+    return;
+  }
+#endif
+
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+  *D = transform_direction(&tfm, *D);
+}
+
+/* Transform direction vector from world to object space */
+
+ccl_device_inline void object_inverse_dir_transform(KernelGlobals kg,
+                                                    ccl_private const ShaderData *sd,
+                                                    ccl_private float3 *D)
+{
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    *D = transform_direction_auto(&sd->ob_itfm_motion, *D);
+    return;
+  }
+#endif
+
+  const Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
+  *D = transform_direction(&tfm, *D);
+}
+
+/* Object center position */
+
+ccl_device_inline float3 object_location(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  if (sd->object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+#ifdef __OBJECT_MOTION__
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    return make_float3(sd->ob_tfm_motion.x.w, sd->ob_tfm_motion.y.w, sd->ob_tfm_motion.z.w);
+  }
+#endif
+
+  Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
+  return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
+}
+
+/* Color of the object */
+
+ccl_device_inline float3 object_color(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
+}
+
+/* Pass ID number of object */
+
+ccl_device_inline float object_pass_id(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return 0.0f;
+
+  return kernel_tex_fetch(__objects, object).pass_id;
+}
+
+/* Per lamp random number for shader variation */
+
+ccl_device_inline float lamp_random_number(KernelGlobals kg, int lamp)
+{
+  if (lamp == LAMP_NONE)
+    return 0.0f;
+
+  return kernel_tex_fetch(__lights, lamp).random;
+}
+
+/* Per object random number for shader variation */
+
+ccl_device_inline float object_random_number(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return 0.0f;
+
+  return kernel_tex_fetch(__objects, object).random_number;
+}
+
+/* Particle ID from which this object was generated */
+
+ccl_device_inline int object_particle_id(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return 0;
+
+  return kernel_tex_fetch(__objects, object).particle_index;
+}
+
+/* Generated texture coordinate on surface from where object was instanced */
+
+ccl_device_inline float3 object_dupli_generated(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  return make_float3(
+      kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
+}
+
+/* UV texture coordinate on surface from where object was instanced */
+
+ccl_device_inline float3 object_dupli_uv(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  ccl_global const KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
+}
+
+/* Information about mesh for motion blurred triangles and curves */
+
+ccl_device_inline void object_motion_info(KernelGlobals kg,
+                                          int object,
+                                          ccl_private int *numsteps,
+                                          ccl_private int *numverts,
+                                          ccl_private int *numkeys)
+{
+  if (numkeys) {
+    *numkeys = kernel_tex_fetch(__objects, object).numkeys;
+  }
+
+  if (numsteps)
+    *numsteps = kernel_tex_fetch(__objects, object).numsteps;
+  if (numverts)
+    *numverts = kernel_tex_fetch(__objects, object).numverts;
+}
+
+/* Offset to an objects patch map */
+
+ccl_device_inline uint object_patch_map_offset(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return 0;
+
+  return kernel_tex_fetch(__objects, object).patch_map_offset;
+}
+
+/* Volume step size */
+
+ccl_device_inline float object_volume_density(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE) {
+    return 1.0f;
+  }
+
+  return kernel_tex_fetch(__objects, object).volume_density;
+}
+
+ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE) {
+    return kernel_data.background.volume_step_size;
+  }
+
+  return kernel_tex_fetch(__object_volume_step, object);
+}
+
+/* Pass ID for shader */
+
+ccl_device int shader_pass_id(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
+}
+
+/* Cryptomatte ID */
+
+ccl_device_inline float object_cryptomatte_id(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return 0.0f;
+
+  return kernel_tex_fetch(__objects, object).cryptomatte_object;
+}
+
+ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return 0;
+
+  return kernel_tex_fetch(__objects, object).cryptomatte_asset;
+}
+
+/* Particle data from which object was instanced */
+
+ccl_device_inline uint particle_index(KernelGlobals kg, int particle)
+{
+  return kernel_tex_fetch(__particles, particle).index;
+}
+
+ccl_device float particle_age(KernelGlobals kg, int particle)
+{
+  return kernel_tex_fetch(__particles, particle).age;
+}
+
+ccl_device float particle_lifetime(KernelGlobals kg, int particle)
+{
+  return kernel_tex_fetch(__particles, particle).lifetime;
+}
+
+ccl_device float particle_size(KernelGlobals kg, int particle)
+{
+  return kernel_tex_fetch(__particles, particle).size;
+}
+
+ccl_device float4 particle_rotation(KernelGlobals kg, int particle)
+{
+  return kernel_tex_fetch(__particles, particle).rotation;
+}
+
+ccl_device float3 particle_location(KernelGlobals kg, int particle)
+{
+  return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
+}
+
+ccl_device float3 particle_velocity(KernelGlobals kg, int particle)
+{
+  return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
+}
+
+ccl_device float3 particle_angular_velocity(KernelGlobals kg, int particle)
+{
+  return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
+}
+
+/* Object intersection in BVH */
+
+ccl_device_inline float3 bvh_clamp_direction(float3 dir)
+{
+  const float ooeps = 8.271806E-25f;
+  return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
+                     (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
+                     (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
+}
+
+ccl_device_inline float3 bvh_inverse_direction(float3 dir)
+{
+  return rcp(dir);
+}
+
+/* Transform ray into object space to enter static object in BVH */
+
+ccl_device_inline float bvh_instance_push(KernelGlobals kg,
+                                          int object,
+                                          ccl_private const Ray *ray,
+                                          ccl_private float3 *P,
+                                          ccl_private float3 *dir,
+                                          ccl_private float3 *idir)
+{
+  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+
+  *P = transform_point(&tfm, ray->P);
+
+  float len;
+  *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+  *idir = bvh_inverse_direction(*dir);
+
+  return len;
+}
+
+/* Transform ray to exit static object in BVH. */
+
+ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
+                                         int object,
+                                         ccl_private const Ray *ray,
+                                         ccl_private float3 *P,
+                                         ccl_private float3 *dir,
+                                         ccl_private float3 *idir,
+                                         float t)
+{
+  if (t != FLT_MAX) {
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+    t /= len(transform_direction(&tfm, ray->D));
+  }
+
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
+
+  return t;
+}
+
+/* Same as above, but returns scale factor to apply to multiple intersection distances */
+
+ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
+                                               int object,
+                                               ccl_private const Ray *ray,
+                                               ccl_private float3 *P,
+                                               ccl_private float3 *dir,
+                                               ccl_private float3 *idir,
+                                               ccl_private float *t_fac)
+{
+  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+  *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
+
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
+}
+
+#ifdef __OBJECT_MOTION__
+/* Transform ray into object space to enter motion blurred object in BVH */
+
+ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
+                                                 int object,
+                                                 ccl_private const Ray *ray,
+                                                 ccl_private float3 *P,
+                                                 ccl_private float3 *dir,
+                                                 ccl_private float3 *idir,
+                                                 ccl_private Transform *itfm)
+{
+  object_fetch_transform_motion_test(kg, object, ray->time, itfm);
+
+  *P = transform_point(itfm, ray->P);
+
+  float len;
+  *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
+  *idir = bvh_inverse_direction(*dir);
+
+  return len;
+}
+
+/* Transform ray to exit motion blurred object in BVH. */
+
+ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
+                                                int object,
+                                                ccl_private const Ray *ray,
+                                                ccl_private float3 *P,
+                                                ccl_private float3 *dir,
+                                                ccl_private float3 *idir,
+                                                float t,
+                                                ccl_private Transform *itfm)
+{
+  if (t != FLT_MAX) {
+    t /= len(transform_direction(itfm, ray->D));
+  }
+
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
+
+  return t;
+}
+
+/* Same as above, but returns scale factor to apply to multiple intersection distances */
+
+ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
+                                                      int object,
+                                                      ccl_private const Ray *ray,
+                                                      ccl_private float3 *P,
+                                                      ccl_private float3 *dir,
+                                                      ccl_private float3 *idir,
+                                                      ccl_private float *t_fac,
+                                                      ccl_private Transform *itfm)
+{
+  *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
+  *P = ray->P;
+  *dir = bvh_clamp_direction(ray->D);
+  *idir = bvh_inverse_direction(*dir);
+}
+
+#endif
+
+/* TODO: This can be removed when we know if no devices will require explicit
+ * address space qualifiers for this case. */
+
+#define object_position_transform_auto object_position_transform
+#define object_dir_transform_auto object_dir_transform
+#define object_normal_transform_auto object_normal_transform
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/patch.h b/intern/cycles/kernel/geom/patch.h
new file mode 100644
index 00000000000..7d24937a41e
--- /dev/null
+++ b/intern/cycles/kernel/geom/patch.h
@@ -0,0 +1,470 @@
+/*
+ * Based on code from OpenSubdiv released under this license:
+ *
+ * Copyright 2013 Pixar
+ *
+ * Licensed under the Apache License, Version 2.0 (the "Apache License")
+ * with the following modification; you may not use this file except in
+ * compliance with the Apache License and the following modification to it:
+ * Section 6. Trademarks. is deleted and replaced with:
+ *
+ * 6. Trademarks. This License does not grant permission to use the trade
+ *   names, trademarks, service marks, or product names of the Licensor
+ *   and its affiliates, except as required to comply with Section 4(c) of
+ *   the License and to reproduce the content of the NOTICE file.
+ *
+ * You may obtain a copy of the Apache License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Apache License with the above modification is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the Apache License for the specific
+ * language governing permissions and limitations under the Apache License.
+ */
+
+#pragma once
+
+#include "util/color.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct PatchHandle {
+  int array_index, patch_index, vert_index;
+} PatchHandle;
+
+ccl_device_inline int patch_map_resolve_quadrant(float median,
+                                                 ccl_private float *u,
+                                                 ccl_private float *v)
+{
+  int quadrant = -1;
+
+  if (*u < median) {
+    if (*v < median) {
+      quadrant = 0;
+    }
+    else {
+      quadrant = 1;
+      *v -= median;
+    }
+  }
+  else {
+    if (*v < median) {
+      quadrant = 3;
+    }
+    else {
+      quadrant = 2;
+      *v -= median;
+    }
+    *u -= median;
+  }
+
+  return quadrant;
+}
+
+/* retrieve PatchHandle from patch coords */
+
+ccl_device_inline PatchHandle
+patch_map_find_patch(KernelGlobals kg, int object, int patch, float u, float v)
+{
+  PatchHandle handle;
+
+  kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
+
+  int node = (object_patch_map_offset(kg, object) + patch) / 2;
+  float median = 0.5f;
+
+  for (int depth = 0; depth < 0xff; depth++) {
+    float delta = median * 0.5f;
+
+    int quadrant = patch_map_resolve_quadrant(median, &u, &v);
+    kernel_assert(quadrant >= 0);
+
+    uint child = kernel_tex_fetch(__patches, node + quadrant);
+
+    /* is the quadrant a hole? */
+    if (!(child & PATCH_MAP_NODE_IS_SET)) {
+      handle.array_index = -1;
+      return handle;
+    }
+
+    uint index = child & PATCH_MAP_NODE_INDEX_MASK;
+
+    if (child & PATCH_MAP_NODE_IS_LEAF) {
+      handle.array_index = kernel_tex_fetch(__patches, index + 0);
+      handle.patch_index = kernel_tex_fetch(__patches, index + 1);
+      handle.vert_index = kernel_tex_fetch(__patches, index + 2);
+
+      return handle;
+    }
+    else {
+      node = index;
+    }
+
+    median = delta;
+  }
+
+  /* no leaf found */
+  kernel_assert(0);
+
+  handle.array_index = -1;
+  return handle;
+}
+
+ccl_device_inline void patch_eval_bspline_weights(float t,
+                                                  ccl_private float *point,
+                                                  ccl_private float *deriv)
+{
+  /* The four uniform cubic B-Spline basis functions evaluated at t */
+  float inv_6 = 1.0f / 6.0f;
+
+  float t2 = t * t;
+  float t3 = t * t2;
+
+  point[0] = inv_6 * (1.0f - 3.0f * (t - t2) - t3);
+  point[1] = inv_6 * (4.0f - 6.0f * t2 + 3.0f * t3);
+  point[2] = inv_6 * (1.0f + 3.0f * (t + t2 - t3));
+  point[3] = inv_6 * t3;
+
+  /* Derivatives of the above four basis functions at t */
+  deriv[0] = -0.5f * t2 + t - 0.5f;
+  deriv[1] = 1.5f * t2 - 2.0f * t;
+  deriv[2] = -1.5f * t2 + t + 0.5f;
+  deriv[3] = 0.5f * t2;
+}
+
+ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits,
+                                                          ccl_private float *s,
+                                                          ccl_private float *t)
+{
+  int boundary = ((bits >> 8) & 0xf);
+
+  if (boundary & 1) {
+    t[2] -= t[0];
+    t[1] += 2 * t[0];
+    t[0] = 0;
+  }
+
+  if (boundary & 2) {
+    s[1] -= s[3];
+    s[2] += 2 * s[3];
+    s[3] = 0;
+  }
+
+  if (boundary & 4) {
+    t[1] -= t[3];
+    t[2] += 2 * t[3];
+    t[3] = 0;
+  }
+
+  if (boundary & 8) {
+    s[2] -= s[0];
+    s[1] += 2 * s[0];
+    s[0] = 0;
+  }
+}
+
+ccl_device_inline int patch_eval_depth(uint patch_bits)
+{
+  return (patch_bits & 0xf);
+}
+
+ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
+{
+  bool non_quad_root = (patch_bits >> 4) & 0x1;
+  int depth = patch_eval_depth(patch_bits);
+
+  if (non_quad_root) {
+    return 1.0f / (float)(1 << (depth - 1));
+  }
+  else {
+    return 1.0f / (float)(1 << depth);
+  }
+}
+
+ccl_device_inline void patch_eval_normalize_coords(uint patch_bits,
+                                                   ccl_private float *u,
+                                                   ccl_private float *v)
+{
+  float frac = patch_eval_param_fraction(patch_bits);
+
+  int iu = (patch_bits >> 22) & 0x3ff;
+  int iv = (patch_bits >> 12) & 0x3ff;
+
+  /* top left corner */
+  float pu = (float)iu * frac;
+  float pv = (float)iv * frac;
+
+  /* normalize uv coordinates */
+  *u = (*u - pu) / frac;
+  *v = (*v - pv) / frac;
+}
+
+/* retrieve patch control indices */
+
+ccl_device_inline int patch_eval_indices(KernelGlobals kg,
+                                         ccl_private const PatchHandle *handle,
+                                         int channel,
+                                         int indices[PATCH_MAX_CONTROL_VERTS])
+{
+  int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
+
+  /* XXX: regular patches only */
+  for (int i = 0; i < 16; i++) {
+    indices[i] = kernel_tex_fetch(__patches, index_base + i);
+  }
+
+  return 16;
+}
+
+/* evaluate patch basis functions */
+
+ccl_device_inline void patch_eval_basis(KernelGlobals kg,
+                                        ccl_private const PatchHandle *handle,
+                                        float u,
+                                        float v,
+                                        float weights[PATCH_MAX_CONTROL_VERTS],
+                                        float weights_du[PATCH_MAX_CONTROL_VERTS],
+                                        float weights_dv[PATCH_MAX_CONTROL_VERTS])
+{
+  uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
+  float d_scale = 1 << patch_eval_depth(patch_bits);
+
+  bool non_quad_root = (patch_bits >> 4) & 0x1;
+  if (non_quad_root) {
+    d_scale *= 0.5f;
+  }
+
+  patch_eval_normalize_coords(patch_bits, &u, &v);
+
+  /* XXX: regular patches only for now. */
+
+  float s[4], t[4], ds[4], dt[4];
+
+  patch_eval_bspline_weights(u, s, ds);
+  patch_eval_bspline_weights(v, t, dt);
+
+  patch_eval_adjust_boundary_weights(patch_bits, s, t);
+  patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
+
+  for (int k = 0; k < 4; k++) {
+    for (int l = 0; l < 4; l++) {
+      weights[4 * k + l] = s[l] * t[k];
+      weights_du[4 * k + l] = ds[l] * t[k] * d_scale;
+      weights_dv[4 * k + l] = s[l] * dt[k] * d_scale;
+    }
+  }
+}
+
+/* generic function for evaluating indices and weights from patch coords */
+
+ccl_device_inline int patch_eval_control_verts(KernelGlobals kg,
+                                               int object,
+                                               int patch,
+                                               float u,
+                                               float v,
+                                               int channel,
+                                               int indices[PATCH_MAX_CONTROL_VERTS],
+                                               float weights[PATCH_MAX_CONTROL_VERTS],
+                                               float weights_du[PATCH_MAX_CONTROL_VERTS],
+                                               float weights_dv[PATCH_MAX_CONTROL_VERTS])
+{
+  PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
+  kernel_assert(handle.array_index >= 0);
+
+  int num_control = patch_eval_indices(kg, &handle, channel, indices);
+  patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
+
+  return num_control;
+}
+
+/* functions for evaluating attributes on patches */
+
+ccl_device float patch_eval_float(KernelGlobals kg,
+                                  ccl_private const ShaderData *sd,
+                                  int offset,
+                                  int patch,
+                                  float u,
+                                  float v,
+                                  int channel,
+                                  ccl_private float *du,
+                                  ccl_private float *dv)
+{
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float val = 0.0f;
+  if (du)
+    *du = 0.0f;
+  if (dv)
+    *dv = 0.0f;
+
+  for (int i = 0; i < num_control; i++) {
+    float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
+}
+
+ccl_device float2 patch_eval_float2(KernelGlobals kg,
+                                    ccl_private const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    ccl_private float2 *du,
+                                    ccl_private float2 *dv)
+{
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float2 val = make_float2(0.0f, 0.0f);
+  if (du)
+    *du = make_float2(0.0f, 0.0f);
+  if (dv)
+    *dv = make_float2(0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]);
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
+}
+
+ccl_device float3 patch_eval_float3(KernelGlobals kg,
+                                    ccl_private const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    ccl_private float3 *du,
+                                    ccl_private float3 *dv)
+{
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float3 val = make_float3(0.0f, 0.0f, 0.0f);
+  if (du)
+    *du = make_float3(0.0f, 0.0f, 0.0f);
+  if (dv)
+    *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
+}
+
+ccl_device float4 patch_eval_float4(KernelGlobals kg,
+                                    ccl_private const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    ccl_private float4 *du,
+                                    ccl_private float4 *dv)
+{
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float4 val = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  if (du)
+    *du = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  if (dv)
+    *dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float4 v = kernel_tex_fetch(__attributes_float3, offset + indices[i]);
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
+}
+
+ccl_device float4 patch_eval_uchar4(KernelGlobals kg,
+                                    ccl_private const ShaderData *sd,
+                                    int offset,
+                                    int patch,
+                                    float u,
+                                    float v,
+                                    int channel,
+                                    ccl_private float4 *du,
+                                    ccl_private float4 *dv)
+{
+  int indices[PATCH_MAX_CONTROL_VERTS];
+  float weights[PATCH_MAX_CONTROL_VERTS];
+  float weights_du[PATCH_MAX_CONTROL_VERTS];
+  float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+  int num_control = patch_eval_control_verts(
+      kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv);
+
+  float4 val = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  if (du)
+    *du = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  if (dv)
+    *dv = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  for (int i = 0; i < num_control; i++) {
+    float4 v = color_srgb_to_linear_v4(
+        color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])));
+
+    val += v * weights[i];
+    if (du)
+      *du += v * weights_du[i];
+    if (dv)
+      *dv += v * weights_dv[i];
+  }
+
+  return val;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/primitive.h b/intern/cycles/kernel/geom/primitive.h
new file mode 100644
index 00000000000..7a8921b6d6e
--- /dev/null
+++ b/intern/cycles/kernel/geom/primitive.h
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Primitive Utilities
+ *
+ * Generic functions to look up mesh, curve and volume primitive attributes for
+ * shading and render passes. */
+
+#pragma once
+
+#include "kernel/camera/projection.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Surface Attributes
+ *
+ * Read geometry attributes for surface shading. This is distinct from volume
+ * attributes for performance, mainly for GPU performance to avoid bringing in
+ * heavy volume interpolation code. */
+
+ccl_device_inline float primitive_surface_attribute_float(KernelGlobals kg,
+                                                          ccl_private const ShaderData *sd,
+                                                          const AttributeDescriptor desc,
+                                                          ccl_private float *dx,
+                                                          ccl_private float *dy)
+{
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
+  }
+#ifdef __HAIR__
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float(kg, sd, desc, dx, dy);
+  }
+#endif
+  else {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+    return 0.0f;
+  }
+}
+
+ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals kg,
+                                                            ccl_private const ShaderData *sd,
+                                                            const AttributeDescriptor desc,
+                                                            ccl_private float2 *dx,
+                                                            ccl_private float2 *dy)
+{
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float2(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
+  }
+#ifdef __HAIR__
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float2(kg, sd, desc, dx, dy);
+  }
+#endif
+  else {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+    return make_float2(0.0f, 0.0f);
+  }
+}
+
+ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals kg,
+                                                            ccl_private const ShaderData *sd,
+                                                            const AttributeDescriptor desc,
+                                                            ccl_private float3 *dx,
+                                                            ccl_private float3 *dy)
+{
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float3(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
+  }
+#ifdef __HAIR__
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float3(kg, sd, desc, dx, dy);
+  }
+#endif
+  else {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
+}
+
+ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals kg,
+                                                                 ccl_private const ShaderData *sd,
+                                                                 const AttributeDescriptor desc,
+                                                                 ccl_private float4 *dx,
+                                                                 ccl_private float4 *dy)
+{
+  if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    if (subd_triangle_patch(kg, sd) == ~0)
+      return triangle_attribute_float4(kg, sd, desc, dx, dy);
+    else
+      return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
+  }
+#ifdef __HAIR__
+  else if (sd->type & PRIMITIVE_ALL_CURVE) {
+    return curve_attribute_float4(kg, sd, desc, dx, dy);
+  }
+#endif
+  else {
+    if (dx)
+      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  }
+}
+
+#ifdef __VOLUME__
+/* Volume Attributes
+ *
+ * Read geometry attributes for volume shading. This is distinct from surface
+ * attributes for performance, mainly for GPU performance to avoid bringing in
+ * heavy volume interpolation code. */
+
+ccl_device_inline bool primitive_is_volume_attribute(ccl_private const ShaderData *sd,
+                                                     const AttributeDescriptor desc)
+{
+  return sd->type == PRIMITIVE_VOLUME;
+}
+
+ccl_device_inline float primitive_volume_attribute_float(KernelGlobals kg,
+                                                         ccl_private const ShaderData *sd,
+                                                         const AttributeDescriptor desc)
+{
+  if (primitive_is_volume_attribute(sd, desc)) {
+    return volume_attribute_value_to_float(volume_attribute_float4(kg, sd, desc));
+  }
+  else {
+    return 0.0f;
+  }
+}
+
+ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals kg,
+                                                           ccl_private const ShaderData *sd,
+                                                           const AttributeDescriptor desc)
+{
+  if (primitive_is_volume_attribute(sd, desc)) {
+    return volume_attribute_value_to_float3(volume_attribute_float4(kg, sd, desc));
+  }
+  else {
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
+}
+
+ccl_device_inline float4 primitive_volume_attribute_float4(KernelGlobals kg,
+                                                           ccl_private const ShaderData *sd,
+                                                           const AttributeDescriptor desc)
+{
+  if (primitive_is_volume_attribute(sd, desc)) {
+    return volume_attribute_float4(kg, sd, desc);
+  }
+  else {
+    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  }
+}
+#endif
+
+/* Default UV coordinate */
+
+ccl_device_inline float3 primitive_uv(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
+
+  if (desc.offset == ATTR_STD_NOT_FOUND)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+  return make_float3(uv.x, uv.y, 1.0f);
+}
+
+/* Ptex coordinates */
+
+ccl_device bool primitive_ptex(KernelGlobals kg,
+                               ccl_private ShaderData *sd,
+                               ccl_private float2 *uv,
+                               ccl_private int *face_id)
+{
+  /* storing ptex data as attributes is not memory efficient but simple for tests */
+  const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
+  const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
+
+  if (desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
+    return false;
+
+  float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL);
+  float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL);
+
+  *uv = make_float2(uv3.x, uv3.y);
+  *face_id = (int)face_id_f;
+
+  return true;
+}
+
+/* Surface tangent */
+
+ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+#ifdef __HAIR__
+  if (sd->type & PRIMITIVE_ALL_CURVE)
+#  ifdef __DPDU__
+    return normalize(sd->dPdu);
+#  else
+    return make_float3(0.0f, 0.0f, 0.0f);
+#  endif
+#endif
+
+  /* try to create spherical tangent from generated coordinates */
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
+
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
+    object_normal_transform(kg, sd, &data);
+    return cross(sd->N, normalize(cross(data, sd->N)));
+  }
+  else {
+    /* otherwise use surface derivatives */
+#ifdef __DPDU__
+    return normalize(sd->dPdu);
+#else
+    return make_float3(0.0f, 0.0f, 0.0f);
+#endif
+  }
+}
+
+/* Motion vector for motion pass */
+
+ccl_device_inline float4 primitive_motion_vector(KernelGlobals kg,
+                                                 ccl_private const ShaderData *sd)
+{
+  /* center position */
+  float3 center;
+
+#ifdef __HAIR__
+  bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE;
+  if (is_curve_primitive) {
+    center = curve_motion_center_location(kg, sd);
+
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_position_transform(kg, sd, &center);
+    }
+  }
+  else
+#endif
+    center = sd->P;
+
+  float3 motion_pre = center, motion_post = center;
+
+  /* deformation motion */
+  AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
+
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    /* get motion info */
+    int numverts, numkeys;
+    object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
+
+    /* lookup attributes */
+    motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+
+    desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys;
+    motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+
+#ifdef __HAIR__
+    if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
+      object_position_transform(kg, sd, &motion_pre);
+      object_position_transform(kg, sd, &motion_post);
+    }
+#endif
+  }
+
+  /* object motion. note that depending on the mesh having motion vectors, this
+   * transformation was set match the world/object space of motion_pre/post */
+  Transform tfm;
+
+  tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
+  motion_pre = transform_point(&tfm, motion_pre);
+
+  tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
+  motion_post = transform_point(&tfm, motion_post);
+
+  float3 motion_center;
+
+  /* camera motion, for perspective/orthographic motion.pre/post will be a
+   * world-to-raster matrix, for panorama it's world-to-camera */
+  if (kernel_data.cam.type != CAMERA_PANORAMA) {
+    ProjectionTransform projection = kernel_data.cam.worldtoraster;
+    motion_center = transform_perspective(&projection, center);
+
+    projection = kernel_data.cam.perspective_pre;
+    motion_pre = transform_perspective(&projection, motion_pre);
+
+    projection = kernel_data.cam.perspective_post;
+    motion_post = transform_perspective(&projection, motion_post);
+  }
+  else {
+    tfm = kernel_data.cam.worldtocamera;
+    motion_center = normalize(transform_point(&tfm, center));
+    motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center));
+    motion_center.x *= kernel_data.cam.width;
+    motion_center.y *= kernel_data.cam.height;
+
+    tfm = kernel_data.cam.motion_pass_pre;
+    motion_pre = normalize(transform_point(&tfm, motion_pre));
+    motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
+    motion_pre.x *= kernel_data.cam.width;
+    motion_pre.y *= kernel_data.cam.height;
+
+    tfm = kernel_data.cam.motion_pass_post;
+    motion_post = normalize(transform_point(&tfm, motion_post));
+    motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
+    motion_post.x *= kernel_data.cam.width;
+    motion_post.y *= kernel_data.cam.height;
+  }
+
+  motion_pre = motion_pre - motion_center;
+  motion_post = motion_center - motion_post;
+
+  return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h
new file mode 100644
index 00000000000..46bda2b656c
--- /dev/null
+++ b/intern/cycles/kernel/geom/shader_data.h
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Functions to initialize ShaderData given.
+ *
+ * Could be from an incoming ray, intersection or sampled position. */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* ShaderData setup from incoming ray */
+
+#ifdef __OBJECT_MOTION__
+ccl_device void shader_setup_object_transforms(KernelGlobals kg,
+                                               ccl_private ShaderData *ccl_restrict sd,
+                                               float time)
+{
+  if (sd->object_flag & SD_OBJECT_MOTION) {
+    sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time);
+    sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion);
+  }
+}
+#endif
+
+/* TODO: break this up if it helps reduce register pressure to load data from
+ * global memory as we write it to shader-data. */
+ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
+                                             ccl_private ShaderData *ccl_restrict sd,
+                                             ccl_private const Ray *ccl_restrict ray,
+                                             ccl_private const Intersection *ccl_restrict isect)
+{
+  /* Read intersection data into shader globals.
+   *
+   * TODO: this is redundant, could potentially remove some of this from
+   * ShaderData but would need to ensure that it also works for shadow
+   * shader evaluation. */
+  sd->u = isect->u;
+  sd->v = isect->v;
+  sd->ray_length = isect->t;
+  sd->type = isect->type;
+  sd->object = isect->object;
+  sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
+  sd->prim = isect->prim;
+  sd->lamp = LAMP_NONE;
+  sd->flag = 0;
+
+  /* Read matrices and time. */
+  sd->time = ray->time;
+
+#ifdef __OBJECT_MOTION__
+  shader_setup_object_transforms(kg, sd, ray->time);
+#endif
+
+  /* Read ray data into shader globals. */
+  sd->I = -ray->D;
+
+#ifdef __HAIR__
+  if (sd->type & PRIMITIVE_ALL_CURVE) {
+    /* curve */
+    curve_shader_setup(kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim);
+  }
+  else
+#endif
+      if (sd->type & PRIMITIVE_TRIANGLE) {
+    /* static triangle */
+    float3 Ng = triangle_normal(kg, sd);
+    sd->shader = kernel_tex_fetch(__tri_shader, sd->prim);
+
+    /* vectors */
+    sd->P = triangle_refine(kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim);
+    sd->Ng = Ng;
+    sd->N = Ng;
+
+    /* smooth normal */
+    if (sd->shader & SHADER_SMOOTH_NORMAL)
+      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+
+#ifdef __DPDU__
+    /* dPdu/dPdv */
+    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+#endif
+  }
+  else {
+    /* motion triangle */
+    motion_triangle_shader_setup(
+        kg, sd, ray->P, ray->D, isect->t, isect->object, isect->prim, false);
+  }
+
+  sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    /* instance transform */
+    object_normal_transform_auto(kg, sd, &sd->N);
+    object_normal_transform_auto(kg, sd, &sd->Ng);
+#ifdef __DPDU__
+    object_dir_transform_auto(kg, sd, &sd->dPdu);
+    object_dir_transform_auto(kg, sd, &sd->dPdv);
+#endif
+  }
+
+  /* backfacing test */
+  bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+
+  if (backfacing) {
+    sd->flag |= SD_BACKFACING;
+    sd->Ng = -sd->Ng;
+    sd->N = -sd->N;
+#ifdef __DPDU__
+    sd->dPdu = -sd->dPdu;
+    sd->dPdv = -sd->dPdv;
+#endif
+  }
+
+#ifdef __RAY_DIFFERENTIALS__
+  /* differentials */
+  differential_transfer_compact(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, sd->ray_length);
+  differential_incoming_compact(&sd->dI, ray->D, ray->dD);
+  differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+#endif
+}
+
+/* ShaderData setup from position sampled on mesh */
+
+ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
+                                                ccl_private ShaderData *ccl_restrict sd,
+                                                const float3 P,
+                                                const float3 Ng,
+                                                const float3 I,
+                                                int shader,
+                                                int object,
+                                                int prim,
+                                                float u,
+                                                float v,
+                                                float t,
+                                                float time,
+                                                bool object_space,
+                                                int lamp)
+{
+  /* vectors */
+  sd->P = P;
+  sd->N = Ng;
+  sd->Ng = Ng;
+  sd->I = I;
+  sd->shader = shader;
+  if (prim != PRIM_NONE)
+    sd->type = PRIMITIVE_TRIANGLE;
+  else if (lamp != LAMP_NONE)
+    sd->type = PRIMITIVE_LAMP;
+  else
+    sd->type = PRIMITIVE_NONE;
+
+  /* primitive */
+  sd->object = object;
+  sd->lamp = LAMP_NONE;
+  /* Currently no access to bvh prim index for strand sd->prim. */
+  sd->prim = prim;
+  sd->u = u;
+  sd->v = v;
+  sd->time = time;
+  sd->ray_length = t;
+
+  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+  sd->object_flag = 0;
+  if (sd->object != OBJECT_NONE) {
+    sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
+
+#ifdef __OBJECT_MOTION__
+    shader_setup_object_transforms(kg, sd, time);
+#endif
+  }
+  else if (lamp != LAMP_NONE) {
+    sd->lamp = lamp;
+  }
+
+  /* transform into world space */
+  if (object_space) {
+    object_position_transform_auto(kg, sd, &sd->P);
+    object_normal_transform_auto(kg, sd, &sd->Ng);
+    sd->N = sd->Ng;
+    object_dir_transform_auto(kg, sd, &sd->I);
+  }
+
+  if (sd->type & PRIMITIVE_TRIANGLE) {
+    /* smooth normal */
+    if (sd->shader & SHADER_SMOOTH_NORMAL) {
+      sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v);
+
+      if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+        object_normal_transform_auto(kg, sd, &sd->N);
+      }
+    }
+
+    /* dPdu/dPdv */
+#ifdef __DPDU__
+    triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
+
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_dir_transform_auto(kg, sd, &sd->dPdu);
+      object_dir_transform_auto(kg, sd, &sd->dPdv);
+    }
+#endif
+  }
+  else {
+#ifdef __DPDU__
+    sd->dPdu = zero_float3();
+    sd->dPdv = zero_float3();
+#endif
+  }
+
+  /* backfacing test */
+  if (sd->prim != PRIM_NONE) {
+    bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
+
+    if (backfacing) {
+      sd->flag |= SD_BACKFACING;
+      sd->Ng = -sd->Ng;
+      sd->N = -sd->N;
+#ifdef __DPDU__
+      sd->dPdu = -sd->dPdu;
+      sd->dPdv = -sd->dPdv;
+#endif
+    }
+  }
+
+#ifdef __RAY_DIFFERENTIALS__
+  /* no ray differentials here yet */
+  sd->dP = differential3_zero();
+  sd->dI = differential3_zero();
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
+#endif
+}
+
+/* ShaderData setup for displacement */
+
+ccl_device void shader_setup_from_displace(KernelGlobals kg,
+                                           ccl_private ShaderData *ccl_restrict sd,
+                                           int object,
+                                           int prim,
+                                           float u,
+                                           float v)
+{
+  float3 P, Ng, I = zero_float3();
+  int shader;
+
+  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
+
+  /* force smooth shading for displacement */
+  shader |= SHADER_SMOOTH_NORMAL;
+
+  shader_setup_from_sample(
+      kg,
+      sd,
+      P,
+      Ng,
+      I,
+      shader,
+      object,
+      prim,
+      u,
+      v,
+      0.0f,
+      0.5f,
+      !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED),
+      LAMP_NONE);
+}
+
+/* ShaderData setup for point on curve. */
+
+ccl_device void shader_setup_from_curve(KernelGlobals kg,
+                                        ccl_private ShaderData *ccl_restrict sd,
+                                        int object,
+                                        int prim,
+                                        int segment,
+                                        float u)
+{
+  /* Primitive */
+  sd->type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE_THICK, segment);
+  sd->lamp = LAMP_NONE;
+  sd->prim = prim;
+  sd->u = u;
+  sd->v = 0.0f;
+  sd->time = 0.5f;
+  sd->ray_length = 0.0f;
+
+  /* Shader */
+  sd->shader = kernel_tex_fetch(__curves, prim).shader_id;
+  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+
+  /* Object */
+  sd->object = object;
+  sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
+#ifdef __OBJECT_MOTION__
+  shader_setup_object_transforms(kg, sd, sd->time);
+#endif
+
+  /* Get control points. */
+  KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
+
+  int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+  int k1 = k0 + 1;
+  int ka = max(k0 - 1, kcurve.first_key);
+  int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
+
+  float4 P_curve[4];
+
+  P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+  P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+  P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+  P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+
+  /* Interpolate position and tangent. */
+  sd->P = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
+#ifdef __DPDU__
+  sd->dPdu = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
+#endif
+
+  /* Transform into world space */
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    object_position_transform_auto(kg, sd, &sd->P);
+#ifdef __DPDU__
+    object_dir_transform_auto(kg, sd, &sd->dPdu);
+#endif
+  }
+
+  /* No view direction, normals or bitangent. */
+  sd->I = zero_float3();
+  sd->N = zero_float3();
+  sd->Ng = zero_float3();
+#ifdef __DPDU__
+  sd->dPdv = zero_float3();
+#endif
+
+  /* No ray differentials currently. */
+#ifdef __RAY_DIFFERENTIALS__
+  sd->dP = differential3_zero();
+  sd->dI = differential3_zero();
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
+#endif
+}
+
+/* ShaderData setup from ray into background */
+
+ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
+                                                    ccl_private ShaderData *ccl_restrict sd,
+                                                    const float3 ray_P,
+                                                    const float3 ray_D,
+                                                    const float ray_time)
+{
+  /* for NDC coordinates */
+  sd->ray_P = ray_P;
+
+  /* vectors */
+  sd->P = ray_D;
+  sd->N = -ray_D;
+  sd->Ng = -ray_D;
+  sd->I = -ray_D;
+  sd->shader = kernel_data.background.surface_shader;
+  sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+  sd->object_flag = 0;
+  sd->time = ray_time;
+  sd->ray_length = 0.0f;
+
+  sd->object = OBJECT_NONE;
+  sd->lamp = LAMP_NONE;
+  sd->prim = PRIM_NONE;
+  sd->u = 0.0f;
+  sd->v = 0.0f;
+
+#ifdef __DPDU__
+  /* dPdu/dPdv */
+  sd->dPdu = zero_float3();
+  sd->dPdv = zero_float3();
+#endif
+
+#ifdef __RAY_DIFFERENTIALS__
+  /* differentials */
+  sd->dP = differential3_zero(); /* TODO: ray->dP */
+  differential_incoming(&sd->dI, sd->dP);
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
+#endif
+}
+
+/* ShaderData setup from point inside volume */
+
+#ifdef __VOLUME__
+ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
+                                                ccl_private ShaderData *ccl_restrict sd,
+                                                ccl_private const Ray *ccl_restrict ray)
+{
+
+  /* vectors */
+  sd->P = ray->P;
+  sd->N = -ray->D;
+  sd->Ng = -ray->D;
+  sd->I = -ray->D;
+  sd->shader = SHADER_NONE;
+  sd->flag = 0;
+  sd->object_flag = 0;
+  sd->time = ray->time;
+  sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
+
+  sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */
+  sd->lamp = LAMP_NONE;
+  sd->prim = PRIM_NONE;
+  sd->type = PRIMITIVE_VOLUME;
+
+  sd->u = 0.0f;
+  sd->v = 0.0f;
+
+#  ifdef __DPDU__
+  /* dPdu/dPdv */
+  sd->dPdu = zero_float3();
+  sd->dPdv = zero_float3();
+#  endif
+
+#  ifdef __RAY_DIFFERENTIALS__
+  /* differentials */
+  sd->dP = differential3_zero(); /* TODO ray->dD */
+  differential_incoming(&sd->dI, sd->dP);
+  sd->du = differential_zero();
+  sd->dv = differential_zero();
+#  endif
+
+  /* for NDC coordinates */
+  sd->ray_P = ray->P;
+  sd->ray_dP = ray->dP;
+}
+#endif /* __VOLUME__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/subd_triangle.h b/intern/cycles/kernel/geom/subd_triangle.h
new file mode 100644
index 00000000000..8a9a3f71231
--- /dev/null
+++ b/intern/cycles/kernel/geom/subd_triangle.h
@@ -0,0 +1,687 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Functions for retrieving attributes on triangles produced from subdivision meshes */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Patch index for triangle, -1 if not subdivision triangle */
+
+ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0;
+}
+
+/* UV coords of triangle within patch */
+
+ccl_device_inline void subd_triangle_patch_uv(KernelGlobals kg,
+                                              ccl_private const ShaderData *sd,
+                                              float2 uv[3])
+{
+  uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+
+  uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x);
+  uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y);
+  uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z);
+}
+
+/* Vertex indices of patch */
+
+ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals kg, int patch)
+{
+  uint4 indices;
+
+  indices.x = kernel_tex_fetch(__patches, patch + 0);
+  indices.y = kernel_tex_fetch(__patches, patch + 1);
+  indices.z = kernel_tex_fetch(__patches, patch + 2);
+  indices.w = kernel_tex_fetch(__patches, patch + 3);
+
+  return indices;
+}
+
+/* Originating face for patch */
+
+ccl_device_inline uint subd_triangle_patch_face(KernelGlobals kg, int patch)
+{
+  return kernel_tex_fetch(__patches, patch + 4);
+}
+
+/* Number of corners on originating face */
+
+ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals kg, int patch)
+{
+  return kernel_tex_fetch(__patches, patch + 5) & 0xffff;
+}
+
+/* Indices of the four corners that are used by the patch */
+
+ccl_device_inline void subd_triangle_patch_corners(KernelGlobals kg, int patch, int corners[4])
+{
+  uint4 data;
+
+  data.x = kernel_tex_fetch(__patches, patch + 4);
+  data.y = kernel_tex_fetch(__patches, patch + 5);
+  data.z = kernel_tex_fetch(__patches, patch + 6);
+  data.w = kernel_tex_fetch(__patches, patch + 7);
+
+  int num_corners = data.y & 0xffff;
+
+  if (num_corners == 4) {
+    /* quad */
+    corners[0] = data.z;
+    corners[1] = data.z + 1;
+    corners[2] = data.z + 2;
+    corners[3] = data.z + 3;
+  }
+  else {
+    /* ngon */
+    int c = data.y >> 16;
+
+    corners[0] = data.z + c;
+    corners[1] = data.z + mod(c + 1, num_corners);
+    corners[2] = data.w;
+    corners[3] = data.z + mod(c - 1, num_corners);
+  }
+}
+
+/* Reading attributes on various subdivision triangle elements */
+
+ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
+                                                        ccl_private const ShaderData *sd,
+                                                        const AttributeDescriptor desc,
+                                                        ccl_private float *dx,
+                                                        ccl_private float *dy)
+{
+  int patch = subd_triangle_patch(kg, sd);
+
+#ifdef __PATCH_EVAL__
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float a, dads, dadt;
+    a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+
+    return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
+    float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
+    float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
+    float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
+
+    float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
+    float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
+    float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
+    float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+
+    return kernel_tex_fetch(__attributes_float, desc.offset);
+  }
+  else {
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+
+    return 0.0f;
+  }
+}
+
+ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
+                                                          ccl_private const ShaderData *sd,
+                                                          const AttributeDescriptor desc,
+                                                          ccl_private float2 *dx,
+                                                          ccl_private float2 *dy)
+{
+  int patch = subd_triangle_patch(kg, sd);
+
+#ifdef __PATCH_EVAL__
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float2 a, dads, dadt;
+
+    a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+
+    return kernel_tex_fetch(__attributes_float2,
+                            desc.offset + subd_triangle_patch_face(kg, patch));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x);
+    float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y);
+    float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z);
+    float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
+
+    float2 f0, f1, f2, f3;
+
+    f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset);
+    f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset);
+    f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset);
+    f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+
+    return kernel_tex_fetch(__attributes_float2, desc.offset);
+  }
+  else {
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+
+    return make_float2(0.0f, 0.0f);
+  }
+}
+
+ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
+                                                          ccl_private const ShaderData *sd,
+                                                          const AttributeDescriptor desc,
+                                                          ccl_private float3 *dx,
+                                                          ccl_private float3 *dy)
+{
+  int patch = subd_triangle_patch(kg, sd);
+
+#ifdef __PATCH_EVAL__
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float3 a, dads, dadt;
+    a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+    return float4_to_float3(
+        kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
+    float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
+    float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
+    float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
+
+    float3 f0, f1, f2, f3;
+
+    f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
+    f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
+    f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
+    f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+    return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset));
+  }
+  else {
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+    return make_float3(0.0f, 0.0f, 0.0f);
+  }
+}
+
+ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
+                                                          ccl_private const ShaderData *sd,
+                                                          const AttributeDescriptor desc,
+                                                          ccl_private float4 *dx,
+                                                          ccl_private float4 *dy)
+{
+  int patch = subd_triangle_patch(kg, sd);
+
+#ifdef __PATCH_EVAL__
+  if (desc.flags & ATTR_SUBDIVIDED) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    float2 dpdu = uv[0] - uv[2];
+    float2 dpdv = uv[1] - uv[2];
+
+    /* p is [s, t] */
+    float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+
+    float4 a, dads, dadt;
+    if (desc.type == NODE_ATTR_RGBA) {
+      a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+    }
+    else {
+      a = patch_eval_float4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+    }
+
+#  ifdef __RAY_DIFFERENTIALS__
+    if (dx || dy) {
+      float dsdu = dpdu.x;
+      float dtdu = dpdu.y;
+      float dsdv = dpdv.x;
+      float dtdv = dpdv.y;
+
+      if (dx) {
+        float dudx = sd->du.dx;
+        float dvdx = sd->dv.dx;
+
+        float dsdx = dsdu * dudx + dsdv * dvdx;
+        float dtdx = dtdu * dudx + dtdv * dvdx;
+
+        *dx = dads * dsdx + dadt * dtdx;
+      }
+      if (dy) {
+        float dudy = sd->du.dy;
+        float dvdy = sd->dv.dy;
+
+        float dsdy = dsdu * dudy + dsdv * dvdy;
+        float dtdy = dtdu * dudy + dtdv * dvdy;
+
+        *dy = dads * dsdy + dadt * dtdy;
+      }
+    }
+#  endif
+
+    return a;
+  }
+  else
+#endif /* __PATCH_EVAL__ */
+      if (desc.element == ATTR_ELEMENT_FACE) {
+    if (dx)
+      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    return kernel_tex_fetch(__attributes_float3,
+                            desc.offset + subd_triangle_patch_face(kg, patch));
+  }
+  else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    uint4 v = subd_triangle_patch_indices(kg, patch);
+
+    float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + v.x);
+    float4 f1 = kernel_tex_fetch(__attributes_float3, desc.offset + v.y);
+    float4 f2 = kernel_tex_fetch(__attributes_float3, desc.offset + v.z);
+    float4 f3 = kernel_tex_fetch(__attributes_float3, desc.offset + v.w);
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float4 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float4 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float4 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+    float2 uv[3];
+    subd_triangle_patch_uv(kg, sd, uv);
+
+    int corners[4];
+    subd_triangle_patch_corners(kg, patch, corners);
+
+    float4 f0, f1, f2, f3;
+
+    if (desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+      f0 = color_srgb_to_linear_v4(
+          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)));
+      f1 = color_srgb_to_linear_v4(
+          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)));
+      f2 = color_srgb_to_linear_v4(
+          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)));
+      f3 = color_srgb_to_linear_v4(
+          color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)));
+    }
+    else {
+      f0 = kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset);
+      f1 = kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset);
+      f2 = kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset);
+      f3 = kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset);
+    }
+
+    if (subd_triangle_patch_num_corners(kg, patch) != 4) {
+      f1 = (f1 + f0) * 0.5f;
+      f3 = (f3 + f0) * 0.5f;
+    }
+
+    float4 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+    float4 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+    float4 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+    if (dy)
+      *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+#endif
+
+    return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+  }
+  else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
+    if (dx)
+      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    return kernel_tex_fetch(__attributes_float3, desc.offset);
+  }
+  else {
+    if (dx)
+      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/triangle.h b/intern/cycles/kernel/geom/triangle.h
new file mode 100644
index 00000000000..233e901c7ca
--- /dev/null
+++ b/intern/cycles/kernel/geom/triangle.h
@@ -0,0 +1,370 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Triangle Primitive
+ *
+ * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
+ * ray intersection we use a precomputed triangle storage to accelerate
+ * intersection at the cost of more memory usage */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Normal on triangle. */
+ccl_device_inline float3 triangle_normal(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+  const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
+  const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
+  const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
+
+  /* return normal */
+  if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    return normalize(cross(v2 - v0, v1 - v0));
+  }
+  else {
+    return normalize(cross(v1 - v0, v2 - v0));
+  }
+}
+
+/* Point and normal on triangle. */
+ccl_device_inline void triangle_point_normal(KernelGlobals kg,
+                                             int object,
+                                             int prim,
+                                             float u,
+                                             float v,
+                                             ccl_private float3 *P,
+                                             ccl_private float3 *Ng,
+                                             ccl_private int *shader)
+{
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
+  float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
+  float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
+  /* compute point */
+  float t = 1.0f - u - v;
+  *P = (u * v0 + v * v1 + t * v2);
+  /* get object flags */
+  int object_flag = kernel_tex_fetch(__object_flag, object);
+  /* compute normal */
+  if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+    *Ng = normalize(cross(v2 - v0, v1 - v0));
+  }
+  else {
+    *Ng = normalize(cross(v1 - v0, v2 - v0));
+  }
+  /* shader`*/
+  *shader = kernel_tex_fetch(__tri_shader, prim);
+}
+
+/* Triangle vertex locations */
+
+ccl_device_inline void triangle_vertices(KernelGlobals kg, int prim, float3 P[3])
+{
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
+  P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
+  P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
+}
+
+/* Triangle vertex locations and vertex normals */
+
+ccl_device_inline void triangle_vertices_and_normals(KernelGlobals kg,
+                                                     int prim,
+                                                     float3 P[3],
+                                                     float3 N[3])
+{
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
+  P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
+  P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
+  N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+  N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+  N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+}
+
+/* Interpolate smooth vertex normal from vertices */
+
+ccl_device_inline float3
+triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
+{
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+  float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+  float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+
+  float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
+
+  return is_zero(N) ? Ng : N;
+}
+
+ccl_device_inline float3 triangle_smooth_normal_unnormalized(
+    KernelGlobals kg, ccl_private const ShaderData *sd, float3 Ng, int prim, float u, float v)
+{
+  /* load triangle vertices */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x));
+  float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y));
+  float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z));
+
+  /* ensure that the normals are in object space */
+  if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) {
+    object_inverse_normal_transform(kg, sd, &n0);
+    object_inverse_normal_transform(kg, sd, &n1);
+    object_inverse_normal_transform(kg, sd, &n2);
+  }
+
+  float3 N = (1.0f - u - v) * n2 + u * n0 + v * n1;
+
+  return is_zero(N) ? Ng : N;
+}
+
+/* Ray differentials on triangle */
+
+ccl_device_inline void triangle_dPdudv(KernelGlobals kg,
+                                       int prim,
+                                       ccl_private float3 *dPdu,
+                                       ccl_private float3 *dPdv)
+{
+  /* fetch triangle vertex coordinates */
+  const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+  const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0));
+  const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1));
+  const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2));
+
+  /* compute derivatives of P w.r.t. uv */
+  *dPdu = (p0 - p2);
+  *dPdv = (p1 - p2);
+}
+
+/* Reading attributes on various triangle elements */
+
+ccl_device float triangle_attribute_float(KernelGlobals kg,
+                                          ccl_private const ShaderData *sd,
+                                          const AttributeDescriptor desc,
+                                          ccl_private float *dx,
+                                          ccl_private float *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
+    float f0, f1, f2;
+
+    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
+      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+      f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
+      f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
+      f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
+    }
+    else {
+      const int tri = desc.offset + sd->prim * 3;
+      f0 = kernel_tex_fetch(__attributes_float, tri + 0);
+      f1 = kernel_tex_fetch(__attributes_float, tri + 1);
+      f2 = kernel_tex_fetch(__attributes_float, tri + 2);
+    }
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+#endif
+
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = 0.0f;
+    if (dy)
+      *dy = 0.0f;
+#endif
+
+    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
+                                                               desc.offset;
+      return kernel_tex_fetch(__attributes_float, offset);
+    }
+    else {
+      return 0.0f;
+    }
+  }
+}
+
+ccl_device float2 triangle_attribute_float2(KernelGlobals kg,
+                                            ccl_private const ShaderData *sd,
+                                            const AttributeDescriptor desc,
+                                            ccl_private float2 *dx,
+                                            ccl_private float2 *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
+    float2 f0, f1, f2;
+
+    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
+      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+      f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x);
+      f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y);
+      f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z);
+    }
+    else {
+      const int tri = desc.offset + sd->prim * 3;
+      f0 = kernel_tex_fetch(__attributes_float2, tri + 0);
+      f1 = kernel_tex_fetch(__attributes_float2, tri + 1);
+      f2 = kernel_tex_fetch(__attributes_float2, tri + 2);
+    }
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+#endif
+
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float2(0.0f, 0.0f);
+    if (dy)
+      *dy = make_float2(0.0f, 0.0f);
+#endif
+
+    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
+                                                               desc.offset;
+      return kernel_tex_fetch(__attributes_float2, offset);
+    }
+    else {
+      return make_float2(0.0f, 0.0f);
+    }
+  }
+}
+
+ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
+                                            ccl_private const ShaderData *sd,
+                                            const AttributeDescriptor desc,
+                                            ccl_private float3 *dx,
+                                            ccl_private float3 *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER)) {
+    float3 f0, f1, f2;
+
+    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
+      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+      f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
+      f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
+      f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
+    }
+    else {
+      const int tri = desc.offset + sd->prim * 3;
+      f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+      f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+      f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+    }
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+#endif
+
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float3(0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float3(0.0f, 0.0f, 0.0f);
+#endif
+
+    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
+                                                               desc.offset;
+      return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset));
+    }
+    else {
+      return make_float3(0.0f, 0.0f, 0.0f);
+    }
+  }
+}
+
+ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
+                                            ccl_private const ShaderData *sd,
+                                            const AttributeDescriptor desc,
+                                            ccl_private float4 *dx,
+                                            ccl_private float4 *dy)
+{
+  if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER |
+                      ATTR_ELEMENT_CORNER_BYTE)) {
+    float4 f0, f1, f2;
+
+    if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION)) {
+      const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim);
+      f0 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x);
+      f1 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y);
+      f2 = kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z);
+    }
+    else {
+      const int tri = desc.offset + sd->prim * 3;
+      if (desc.element == ATTR_ELEMENT_CORNER) {
+        f0 = kernel_tex_fetch(__attributes_float3, tri + 0);
+        f1 = kernel_tex_fetch(__attributes_float3, tri + 1);
+        f2 = kernel_tex_fetch(__attributes_float3, tri + 2);
+      }
+      else {
+        f0 = color_srgb_to_linear_v4(
+            color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 0)));
+        f1 = color_srgb_to_linear_v4(
+            color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 1)));
+        f2 = color_srgb_to_linear_v4(
+            color_uchar4_to_float4(kernel_tex_fetch(__attributes_uchar4, tri + 2)));
+      }
+    }
+
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+    if (dy)
+      *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+#endif
+
+    return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+  }
+  else {
+#ifdef __RAY_DIFFERENTIALS__
+    if (dx)
+      *dx = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    if (dy)
+      *dy = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#endif
+
+    if (desc.element & (ATTR_ELEMENT_FACE | ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+      const int offset = (desc.element == ATTR_ELEMENT_FACE) ? desc.offset + sd->prim :
+                                                               desc.offset;
+      return kernel_tex_fetch(__attributes_float3, offset);
+    }
+    else {
+      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h
new file mode 100644
index 00000000000..faff8a85a93
--- /dev/null
+++ b/intern/cycles/kernel/geom/triangle_intersect.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Triangle/Ray intersections.
+ *
+ * For BVH ray intersection we use a precomputed triangle storage to accelerate
+ * intersection at the cost of more memory usage.
+ */
+
+#pragma once
+
+#include "kernel/sample/lcg.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool triangle_intersect(KernelGlobals kg,
+                                          ccl_private Intersection *isect,
+                                          float3 P,
+                                          float3 dir,
+                                          float tmax,
+                                          uint visibility,
+                                          int object,
+                                          int prim_addr)
+{
+  const int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
+#else
+  const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
+               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
+               tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
+#endif
+  float t, u, v;
+  if (ray_triangle_intersect(P,
+                             dir,
+                             tmax,
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                             ssef_verts,
+#else
+                             float4_to_float3(tri_a),
+                             float4_to_float3(tri_b),
+                             float4_to_float3(tri_c),
+#endif
+                             &u,
+                             &v,
+                             &t)) {
+#ifdef __VISIBILITY_FLAG__
+    /* Visibility flag test. we do it here under the assumption
+     * that most triangles are culled by node flags.
+     */
+    if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
+#endif
+    {
+      isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) :
+                                                object;
+      isect->prim = prim;
+      isect->type = PRIMITIVE_TRIANGLE;
+      isect->u = u;
+      isect->v = v;
+      isect->t = t;
+      return true;
+    }
+  }
+  return false;
+}
+
+/* Special ray intersection routines for subsurface scattering. In that case we
+ * only want to intersect with primitives in the same object, and if case of
+ * multiple hits we pick a single random primitive as the intersection point.
+ * Returns whether traversal should be stopped.
+ */
+
+#ifdef __BVH_LOCAL__
+ccl_device_inline bool triangle_intersect_local(KernelGlobals kg,
+                                                ccl_private LocalIntersection *local_isect,
+                                                float3 P,
+                                                float3 dir,
+                                                int object,
+                                                int local_object,
+                                                int prim_addr,
+                                                float tmax,
+                                                ccl_private uint *lcg_state,
+                                                int max_hits)
+{
+  /* Only intersect with matching object, for instanced objects we
+   * already know we are only intersecting the right object. */
+  if (object == OBJECT_NONE) {
+    if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) {
+      return false;
+    }
+  }
+
+  const int prim = kernel_tex_fetch(__prim_index, prim_addr);
+  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w;
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex];
+#  else
+  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
+               tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
+               tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
+#  endif
+  float t, u, v;
+  if (!ray_triangle_intersect(P,
+                              dir,
+                              tmax,
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                              ssef_verts,
+#  else
+                              tri_a,
+                              tri_b,
+                              tri_c,
+#  endif
+                              &u,
+                              &v,
+                              &t)) {
+    return false;
+  }
+
+  /* If no actual hit information is requested, just return here. */
+  if (max_hits == 0) {
+    return true;
+  }
+
+  int hit;
+  if (lcg_state) {
+    /* Record up to max_hits intersections. */
+    for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+      if (local_isect->hits[i].t == t) {
+        return false;
+      }
+    }
+
+    local_isect->num_hits++;
+
+    if (local_isect->num_hits <= max_hits) {
+      hit = local_isect->num_hits - 1;
+    }
+    else {
+      /* reservoir sampling: if we are at the maximum number of
+       * hits, randomly replace element or skip it */
+      hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+
+      if (hit >= max_hits)
+        return false;
+    }
+  }
+  else {
+    /* Record closest intersection only. */
+    if (local_isect->num_hits && t > local_isect->hits[0].t) {
+      return false;
+    }
+
+    hit = 0;
+    local_isect->num_hits = 1;
+  }
+
+  /* Record intersection. */
+  ccl_private Intersection *isect = &local_isect->hits[hit];
+  isect->prim = prim;
+  isect->object = local_object;
+  isect->type = PRIMITIVE_TRIANGLE;
+  isect->u = u;
+  isect->v = v;
+  isect->t = t;
+
+  /* Record geometric normal. */
+#  if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)),
+               tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)),
+               tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2));
+#  endif
+  local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+  return false;
+}
+#endif /* __BVH_LOCAL__ */
+
+/* Refine triangle intersection to more precise hit point. For rays that travel
+ * far the precision is often not so good, this reintersects the primitive from
+ * a closer distance. */
+
+/* Reintersections uses the paper:
+ *
+ * Tomas Moeller
+ * Fast, minimum storage ray/triangle intersection
+ * http://www.cs.virginia.edu/~gfx/Courses/2003/ImageSynthesis/papers/Acceleration/Fast%20MinimumStorage%20RayTriangle%20Intersection.pdf
+ */
+
+ccl_device_inline float3 triangle_refine(KernelGlobals kg,
+                                         ccl_private ShaderData *sd,
+                                         float3 P,
+                                         float3 D,
+                                         float t,
+                                         const int isect_object,
+                                         const int isect_prim)
+{
+#ifdef __INTERSECTION_REFINE__
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    if (UNLIKELY(t == 0.0f)) {
+      return P;
+    }
+    const Transform tfm = object_get_inverse_transform(kg, sd);
+
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D * t);
+    D = normalize_len(D, &t);
+  }
+
+  P = P + D * t;
+
+  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
+  const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
+               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
+               tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
+  float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+  float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+  float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
+  float3 qvec = cross(tvec, edge1);
+  float3 pvec = cross(D, edge2);
+  float det = dot(edge1, pvec);
+  if (det != 0.0f) {
+    /* If determinant is zero it means ray lies in the plane of
+     * the triangle. It is possible in theory due to watertight
+     * nature of triangle intersection. For such cases we simply
+     * don't refine intersection hoping it'll go all fine.
+     */
+    float rt = dot(edge2, qvec) / det;
+    P = P + D * rt;
+  }
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_transform(kg, sd);
+    P = transform_point(&tfm, P);
+  }
+
+  return P;
+#else
+  return P + D * t;
+#endif
+}
+
+/* Same as above, except that t is assumed to be in object space for
+ * instancing.
+ */
+ccl_device_inline float3 triangle_refine_local(KernelGlobals kg,
+                                               ccl_private ShaderData *sd,
+                                               float3 P,
+                                               float3 D,
+                                               float t,
+                                               const int isect_object,
+                                               const int isect_prim)
+{
+#ifdef __KERNEL_OPTIX__
+  /* t is always in world space with OptiX. */
+  return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim);
+#else
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_inverse_transform(kg, sd);
+
+    P = transform_point(&tfm, P);
+    D = transform_direction(&tfm, D);
+    D = normalize(D);
+  }
+
+  P = P + D * t;
+
+#  ifdef __INTERSECTION_REFINE__
+  const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w;
+  const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0),
+               tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1),
+               tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2);
+  float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+  float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+  float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
+  float3 qvec = cross(tvec, edge1);
+  float3 pvec = cross(D, edge2);
+  float det = dot(edge1, pvec);
+  if (det != 0.0f) {
+    /* If determinant is zero it means ray lies in the plane of
+     * the triangle. It is possible in theory due to watertight
+     * nature of triangle intersection. For such cases we simply
+     * don't refine intersection hoping it'll go all fine.
+     */
+    float rt = dot(edge2, qvec) / det;
+    P = P + D * rt;
+  }
+#  endif /* __INTERSECTION_REFINE__ */
+
+  if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    const Transform tfm = object_get_transform(kg, sd);
+    P = transform_point(&tfm, P);
+  }
+
+  return P;
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/volume.h b/intern/cycles/kernel/geom/volume.h
new file mode 100644
index 00000000000..4e83ad6acb3
--- /dev/null
+++ b/intern/cycles/kernel/geom/volume.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Volume Primitive
+ *
+ * Volumes are just regions inside meshes with the mesh surface as boundaries.
+ * There isn't as much data to access as for surfaces, there is only a position
+ * to do lookups in 3D voxel or procedural textures.
+ *
+ * 3D voxel textures can be assigned as attributes per mesh, which means the
+ * same shader can be used for volume objects with different densities, etc. */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Return position normalized to 0..1 in mesh bounds */
+
+ccl_device_inline float3 volume_normalized_position(KernelGlobals kg,
+                                                    ccl_private const ShaderData *sd,
+                                                    float3 P)
+{
+  /* todo: optimize this so it's just a single matrix multiplication when
+   * possible (not motion blur), or perhaps even just translation + scale */
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
+
+  object_inverse_position_transform(kg, sd, &P);
+
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    Transform tfm = primitive_attribute_matrix(kg, sd, desc);
+    P = transform_point(&tfm, P);
+  }
+
+  return P;
+}
+
+ccl_device float volume_attribute_value_to_float(const float4 value)
+{
+  return average(float4_to_float3(value));
+}
+
+ccl_device float volume_attribute_value_to_alpha(const float4 value)
+{
+  return value.w;
+}
+
+ccl_device float3 volume_attribute_value_to_float3(const float4 value)
+{
+  if (value.w > 1e-6f && value.w != 1.0f) {
+    /* For RGBA colors, unpremultiply after interpolation. */
+    return float4_to_float3(value) / value.w;
+  }
+  else {
+    return float4_to_float3(value);
+  }
+}
+
+ccl_device float4 volume_attribute_float4(KernelGlobals kg,
+                                          ccl_private const ShaderData *sd,
+                                          const AttributeDescriptor desc)
+{
+  if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
+    return kernel_tex_fetch(__attributes_float3, desc.offset);
+  }
+  else if (desc.element == ATTR_ELEMENT_VOXEL) {
+    /* todo: optimize this so we don't have to transform both here and in
+     * kernel_tex_image_interp_3d when possible. Also could optimize for the
+     * common case where transform is translation/scale only. */
+    float3 P = sd->P;
+    object_inverse_position_transform(kg, sd, &P);
+    InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC :
+                                                              INTERPOLATION_NONE;
+    return kernel_tex_image_interp_3d(kg, desc.offset, P, interp);
+  }
+  else {
+    return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  }
+}
+
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
new file mode 100644
index 00000000000..4e30563e21b
--- /dev/null
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/camera/camera.h"
+
+#include "kernel/film/accumulate.h"
+#include "kernel/film/adaptive_sampling.h"
+
+#include "kernel/integrator/path_state.h"
+
+#include "kernel/sample/pattern.h"
+
+#include "kernel/geom/geom.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* This helps with AA but it's not the real solution as it does not AA the geometry
+ * but it's better than nothing, thus committed. */
+ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
+{
+  /* use mirror repeat (like opengl texture) so that if the barycentric
+   * coordinate goes past the end of the triangle it is not always clamped
+   * to the same value, gives ugly patterns */
+  u /= max;
+  float fu = floorf(u);
+  u = u - fu;
+
+  return ((((int)fu) & 1) ? 1.0f - u : u) * max;
+}
+
+/* Return false to indicate that this pixel is finished.
+ * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
+ * that the pixel did converge. */
+ccl_device bool integrator_init_from_bake(KernelGlobals kg,
+                                          IntegratorState state,
+                                          ccl_global const KernelWorkTile *ccl_restrict tile,
+                                          ccl_global float *render_buffer,
+                                          const int x,
+                                          const int y,
+                                          const int scheduled_sample)
+{
+  PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
+  /* Initialize path state to give basic buffer access and allow early outputs. */
+  path_state_init(state, tile, x, y);
+
+  /* Check whether the pixel has converged and should not be sampled anymore. */
+  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
+    return false;
+  }
+
+  /* Always count the sample, even if the camera sample will reject the ray. */
+  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
+
+  /* Setup render buffers. */
+  const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
+  const int pass_stride = kernel_data.film.pass_stride;
+  render_buffer += index * pass_stride;
+
+  ccl_global float *primitive = render_buffer + kernel_data.film.pass_bake_primitive;
+  ccl_global float *differential = render_buffer + kernel_data.film.pass_bake_differential;
+
+  const int seed = __float_as_uint(primitive[0]);
+  int prim = __float_as_uint(primitive[1]);
+  if (prim == -1) {
+    return false;
+  }
+
+  prim += kernel_data.bake.tri_offset;
+
+  /* Random number generator. */
+  const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
+
+  float filter_x, filter_y;
+  if (sample == 0) {
+    filter_x = filter_y = 0.5f;
+  }
+  else {
+    path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y);
+  }
+
+  /* Initialize path state for path integration. */
+  path_state_init_integrator(kg, state, sample, rng_hash);
+
+  /* Barycentric UV with sub-pixel offset. */
+  float u = primitive[2];
+  float v = primitive[3];
+
+  float dudx = differential[0];
+  float dudy = differential[1];
+  float dvdx = differential[2];
+  float dvdy = differential[3];
+
+  if (sample > 0) {
+    u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
+    v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+                                 1.0f - u);
+  }
+
+  /* Position and normal on triangle. */
+  const int object = kernel_data.bake.object_index;
+  float3 P, Ng;
+  int shader;
+  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
+
+  const int object_flag = kernel_tex_fetch(__object_flag, object);
+  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+    P = transform_point_auto(&tfm, P);
+  }
+
+  if (kernel_data.film.pass_background != PASS_UNUSED) {
+    /* Environment baking. */
+
+    /* Setup and write ray. */
+    Ray ray ccl_optional_struct_init;
+    ray.P = zero_float3();
+    ray.D = normalize(P);
+    ray.t = FLT_MAX;
+    ray.time = 0.5f;
+    ray.dP = differential_zero_compact();
+    ray.dD = differential_zero_compact();
+    integrator_state_write_ray(kg, state, &ray);
+
+    /* Setup next kernel to execute. */
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+  }
+  else {
+    /* Surface baking. */
+    float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) : Ng;
+
+    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+      N = normalize(transform_direction_transposed(&itfm, N));
+      Ng = normalize(transform_direction_transposed(&itfm, Ng));
+    }
+
+    /* Setup ray. */
+    Ray ray ccl_optional_struct_init;
+    ray.P = P + N;
+    ray.D = -N;
+    ray.t = FLT_MAX;
+    ray.time = 0.5f;
+
+    /* Setup differentials. */
+    float3 dPdu, dPdv;
+    triangle_dPdudv(kg, prim, &dPdu, &dPdv);
+    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+      dPdu = transform_direction(&tfm, dPdu);
+      dPdv = transform_direction(&tfm, dPdv);
+    }
+
+    differential3 dP;
+    dP.dx = dPdu * dudx + dPdv * dvdx;
+    dP.dy = dPdu * dudy + dPdv * dvdy;
+    ray.dP = differential_make_compact(dP);
+    ray.dD = differential_zero_compact();
+
+    /* Write ray. */
+    integrator_state_write_ray(kg, state, &ray);
+
+    /* Setup and write intersection. */
+    Intersection isect ccl_optional_struct_init;
+    isect.object = kernel_data.bake.object_index;
+    isect.prim = prim;
+    isect.u = u;
+    isect.v = v;
+    isect.t = 1.0f;
+    isect.type = PRIMITIVE_TRIANGLE;
+    integrator_state_write_isect(kg, state, &isect);
+
+    /* Setup next kernel to execute. */
+    const int shader_index = shader & SHADER_MASK;
+    const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags;
+    if (shader_flags & SD_HAS_RAYTRACE) {
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
+    }
+    else {
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
+    }
+  }
+
+  return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h
new file mode 100644
index 00000000000..f0ba77bd9a6
--- /dev/null
+++ b/intern/cycles/kernel/integrator/init_from_camera.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/camera/camera.h"
+
+#include "kernel/film/accumulate.h"
+#include "kernel/film/adaptive_sampling.h"
+
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shadow_catcher.h"
+
+#include "kernel/sample/pattern.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
+                                               const int sample,
+                                               const int x,
+                                               const int y,
+                                               const uint rng_hash,
+                                               ccl_private Ray *ray)
+{
+  /* Filter sampling. */
+  float filter_u, filter_v;
+
+  if (sample == 0) {
+    filter_u = 0.5f;
+    filter_v = 0.5f;
+  }
+  else {
+    path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v);
+  }
+
+  /* Depth of field sampling. */
+  float lens_u = 0.0f, lens_v = 0.0f;
+  if (kernel_data.cam.aperturesize > 0.0f) {
+    path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v);
+  }
+
+  /* Motion blur time sampling. */
+  float time = 0.0f;
+#ifdef __CAMERA_MOTION__
+  if (kernel_data.cam.shuttertime != -1.0f)
+    time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME);
+#endif
+
+  /* Generate camera ray. */
+  camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+}
+
+/* Return false to indicate that this pixel is finished.
+ * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
+ * that the pixel did converge. */
+ccl_device bool integrator_init_from_camera(KernelGlobals kg,
+                                            IntegratorState state,
+                                            ccl_global const KernelWorkTile *ccl_restrict tile,
+                                            ccl_global float *render_buffer,
+                                            const int x,
+                                            const int y,
+                                            const int scheduled_sample)
+{
+  PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
+  /* Initialize path state to give basic buffer access and allow early outputs. */
+  path_state_init(state, tile, x, y);
+
+  /* Check whether the pixel has converged and should not be sampled anymore. */
+  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
+    return false;
+  }
+
+  /* Count the sample and get an effective sample for this pixel.
+   *
+   * This logic allows to both count actual number of samples per pixel, and to add samples to this
+   * pixel after it was converged and samples were added somewhere else (in which case the
+   * `scheduled_sample` will be different from actual number of samples in this pixel). */
+  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
+
+  /* Initialize random number seed for path. */
+  const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
+
+  {
+    /* Generate camera ray. */
+    Ray ray;
+    integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
+    if (ray.t == 0.0f) {
+      return true;
+    }
+
+    /* Write camera ray to state. */
+    integrator_state_write_ray(kg, state, &ray);
+  }
+
+  /* Initialize path state for path integration. */
+  path_state_init_integrator(kg, state, sample, rng_hash);
+
+  /* Continue with intersect_closest kernel, optionally initializing volume
+   * stack before that if the camera may be inside a volume. */
+  if (kernel_data.cam.is_inside_volume) {
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
+  }
+  else {
+    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+  }
+
+  return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
deleted file mode 100644
index 5790cfd3f22..00000000000
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/camera/camera.h"
-
-#include "kernel/film/film_accumulate.h"
-#include "kernel/film/film_adaptive_sampling.h"
-
-#include "kernel/integrator/integrator_path_state.h"
-
-#include "kernel/sample/sample_pattern.h"
-
-#include "kernel/geom/geom.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* This helps with AA but it's not the real solution as it does not AA the geometry
- * but it's better than nothing, thus committed. */
-ccl_device_inline float bake_clamp_mirror_repeat(float u, float max)
-{
-  /* use mirror repeat (like opengl texture) so that if the barycentric
-   * coordinate goes past the end of the triangle it is not always clamped
-   * to the same value, gives ugly patterns */
-  u /= max;
-  float fu = floorf(u);
-  u = u - fu;
-
-  return ((((int)fu) & 1) ? 1.0f - u : u) * max;
-}
-
-/* Return false to indicate that this pixel is finished.
- * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
- * that the pixel did converge. */
-ccl_device bool integrator_init_from_bake(KernelGlobals kg,
-                                          IntegratorState state,
-                                          ccl_global const KernelWorkTile *ccl_restrict tile,
-                                          ccl_global float *render_buffer,
-                                          const int x,
-                                          const int y,
-                                          const int scheduled_sample)
-{
-  PROFILING_INIT(kg, PROFILING_RAY_SETUP);
-
-  /* Initialize path state to give basic buffer access and allow early outputs. */
-  path_state_init(state, tile, x, y);
-
-  /* Check whether the pixel has converged and should not be sampled anymore. */
-  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
-    return false;
-  }
-
-  /* Always count the sample, even if the camera sample will reject the ray. */
-  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
-
-  /* Setup render buffers. */
-  const int index = INTEGRATOR_STATE(state, path, render_pixel_index);
-  const int pass_stride = kernel_data.film.pass_stride;
-  render_buffer += index * pass_stride;
-
-  ccl_global float *primitive = render_buffer + kernel_data.film.pass_bake_primitive;
-  ccl_global float *differential = render_buffer + kernel_data.film.pass_bake_differential;
-
-  const int seed = __float_as_uint(primitive[0]);
-  int prim = __float_as_uint(primitive[1]);
-  if (prim == -1) {
-    return false;
-  }
-
-  prim += kernel_data.bake.tri_offset;
-
-  /* Random number generator. */
-  const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
-
-  float filter_x, filter_y;
-  if (sample == 0) {
-    filter_x = filter_y = 0.5f;
-  }
-  else {
-    path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y);
-  }
-
-  /* Initialize path state for path integration. */
-  path_state_init_integrator(kg, state, sample, rng_hash);
-
-  /* Barycentric UV with sub-pixel offset. */
-  float u = primitive[2];
-  float v = primitive[3];
-
-  float dudx = differential[0];
-  float dudy = differential[1];
-  float dvdx = differential[2];
-  float dvdy = differential[3];
-
-  if (sample > 0) {
-    u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
-    v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
-                                 1.0f - u);
-  }
-
-  /* Position and normal on triangle. */
-  const int object = kernel_data.bake.object_index;
-  float3 P, Ng;
-  int shader;
-  triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader);
-
-  const int object_flag = kernel_tex_fetch(__object_flag, object);
-  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-    Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-    P = transform_point_auto(&tfm, P);
-  }
-
-  if (kernel_data.film.pass_background != PASS_UNUSED) {
-    /* Environment baking. */
-
-    /* Setup and write ray. */
-    Ray ray ccl_optional_struct_init;
-    ray.P = zero_float3();
-    ray.D = normalize(P);
-    ray.t = FLT_MAX;
-    ray.time = 0.5f;
-    ray.dP = differential_zero_compact();
-    ray.dD = differential_zero_compact();
-    integrator_state_write_ray(kg, state, &ray);
-
-    /* Setup next kernel to execute. */
-    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
-  }
-  else {
-    /* Surface baking. */
-    float3 N = (shader & SHADER_SMOOTH_NORMAL) ? triangle_smooth_normal(kg, Ng, prim, u, v) : Ng;
-
-    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-      N = normalize(transform_direction_transposed(&itfm, N));
-      Ng = normalize(transform_direction_transposed(&itfm, Ng));
-    }
-
-    /* Setup ray. */
-    Ray ray ccl_optional_struct_init;
-    ray.P = P + N;
-    ray.D = -N;
-    ray.t = FLT_MAX;
-    ray.time = 0.5f;
-
-    /* Setup differentials. */
-    float3 dPdu, dPdv;
-    triangle_dPdudv(kg, prim, &dPdu, &dPdv);
-    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
-      dPdu = transform_direction(&tfm, dPdu);
-      dPdv = transform_direction(&tfm, dPdv);
-    }
-
-    differential3 dP;
-    dP.dx = dPdu * dudx + dPdv * dvdx;
-    dP.dy = dPdu * dudy + dPdv * dvdy;
-    ray.dP = differential_make_compact(dP);
-    ray.dD = differential_zero_compact();
-
-    /* Write ray. */
-    integrator_state_write_ray(kg, state, &ray);
-
-    /* Setup and write intersection. */
-    Intersection isect ccl_optional_struct_init;
-    isect.object = kernel_data.bake.object_index;
-    isect.prim = prim;
-    isect.u = u;
-    isect.v = v;
-    isect.t = 1.0f;
-    isect.type = PRIMITIVE_TRIANGLE;
-    integrator_state_write_isect(kg, state, &isect);
-
-    /* Setup next kernel to execute. */
-    const int shader_index = shader & SHADER_MASK;
-    const int shader_flags = kernel_tex_fetch(__shaders, shader_index).flags;
-    if (shader_flags & SD_HAS_RAYTRACE) {
-      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index);
-    }
-    else {
-      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader_index);
-    }
-  }
-
-  return true;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_camera.h b/intern/cycles/kernel/integrator/integrator_init_from_camera.h
deleted file mode 100644
index 499a72ffbc4..00000000000
--- a/intern/cycles/kernel/integrator/integrator_init_from_camera.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/camera/camera.h"
-
-#include "kernel/film/film_accumulate.h"
-#include "kernel/film/film_adaptive_sampling.h"
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shadow_catcher.h"
-
-#include "kernel/sample/sample_pattern.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
-                                               const int sample,
-                                               const int x,
-                                               const int y,
-                                               const uint rng_hash,
-                                               ccl_private Ray *ray)
-{
-  /* Filter sampling. */
-  float filter_u, filter_v;
-
-  if (sample == 0) {
-    filter_u = 0.5f;
-    filter_v = 0.5f;
-  }
-  else {
-    path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v);
-  }
-
-  /* Depth of field sampling. */
-  float lens_u = 0.0f, lens_v = 0.0f;
-  if (kernel_data.cam.aperturesize > 0.0f) {
-    path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v);
-  }
-
-  /* Motion blur time sampling. */
-  float time = 0.0f;
-#ifdef __CAMERA_MOTION__
-  if (kernel_data.cam.shuttertime != -1.0f)
-    time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME);
-#endif
-
-  /* Generate camera ray. */
-  camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
-}
-
-/* Return false to indicate that this pixel is finished.
- * Used by CPU implementation to not attempt to sample pixel for multiple samples once its known
- * that the pixel did converge. */
-ccl_device bool integrator_init_from_camera(KernelGlobals kg,
-                                            IntegratorState state,
-                                            ccl_global const KernelWorkTile *ccl_restrict tile,
-                                            ccl_global float *render_buffer,
-                                            const int x,
-                                            const int y,
-                                            const int scheduled_sample)
-{
-  PROFILING_INIT(kg, PROFILING_RAY_SETUP);
-
-  /* Initialize path state to give basic buffer access and allow early outputs. */
-  path_state_init(state, tile, x, y);
-
-  /* Check whether the pixel has converged and should not be sampled anymore. */
-  if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
-    return false;
-  }
-
-  /* Count the sample and get an effective sample for this pixel.
-   *
-   * This logic allows to both count actual number of samples per pixel, and to add samples to this
-   * pixel after it was converged and samples were added somewhere else (in which case the
-   * `scheduled_sample` will be different from actual number of samples in this pixel). */
-  const int sample = kernel_accum_sample(kg, state, render_buffer, scheduled_sample);
-
-  /* Initialize random number seed for path. */
-  const uint rng_hash = path_rng_hash_init(kg, sample, x, y);
-
-  {
-    /* Generate camera ray. */
-    Ray ray;
-    integrate_camera_sample(kg, sample, x, y, rng_hash, &ray);
-    if (ray.t == 0.0f) {
-      return true;
-    }
-
-    /* Write camera ray to state. */
-    integrator_state_write_ray(kg, state, &ray);
-  }
-
-  /* Initialize path state for path integration. */
-  path_state_init_integrator(kg, state, sample, rng_hash);
-
-  /* Continue with intersect_closest kernel, optionally initializing volume
-   * stack before that if the camera may be inside a volume. */
-  if (kernel_data.cam.is_inside_volume) {
-    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK);
-  }
-  else {
-    INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-  }
-
-  return true;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h
deleted file mode 100644
index 41d3dfde41a..00000000000
--- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/camera/camera_projection.h"
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shadow_catcher.h"
-
-#include "kernel/light/light.h"
-
-#include "kernel/util/util_differential.h"
-
-#include "kernel/geom/geom.h"
-
-#include "kernel/bvh/bvh.h"
-
-CCL_NAMESPACE_BEGIN
-
-template<uint32_t current_kernel>
-ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
-                                                           IntegratorState state,
-                                                           const int shader_flags)
-{
-
-  /* Optional AO bounce termination.
-   * We continue evaluating emissive/transparent surfaces and volumes, similar
-   * to direct lighting. Only if we know there are none can we terminate the
-   * path immediately. */
-  if (path_state_ao_bounce(kg, state)) {
-    if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
-      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-    }
-    else if (!integrator_state_volume_stack_is_empty(kg, state)) {
-      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
-    }
-    else {
-      return true;
-    }
-  }
-
-  /* Load random number state. */
-  RNGState rng_state;
-  path_state_rng_load(state, &rng_state);
-
-  /* We perform path termination in this kernel to avoid launching shade_surface
-   * and evaluating the shader when not needed. Only for emission and transparent
-   * surfaces in front of emission do we need to evaluate the shader, since we
-   * perform MIS as part of indirect rays. */
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  const float probability = path_state_continuation_probability(kg, state, path_flag);
-
-  if (probability != 1.0f) {
-    const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
-
-    if (probability == 0.0f || terminate >= probability) {
-      if (shader_flags & SD_HAS_EMISSION) {
-        /* Mark path to be terminated right after shader evaluation on the surface. */
-        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
-      }
-      else if (!integrator_state_volume_stack_is_empty(kg, state)) {
-        /* TODO: only do this for emissive volumes. */
-        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
-      }
-      else {
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-/* Note that current_kernel is a template value since making this a variable
- * leads to poor performance with CUDA atomics. */
-template<uint32_t current_kernel>
-ccl_device_forceinline void integrator_intersect_shader_next_kernel(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private const Intersection *ccl_restrict isect,
-    const int shader,
-    const int shader_flags)
-{
-  /* Note on scheduling.
-   *
-   * When there is no shadow catcher split the scheduling is simple: schedule surface shading with
-   * or without raytrace support, depending on the shader used.
-   *
-   * When there is a shadow catcher split the general idea is to have the following configuration:
-   *
-   *  - Schedule surface shading kernel (with corresponding raytrace support) for the ray which
-   *    will trace shadow catcher object.
-   *
-   *  - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for
-   *    the matte ray.
-   *
-   *  - Otherwise schedule background shading kernel, so that we have a background to alpha-over
-   *    on. The background kernel will then schedule surface shading for the matte ray.
-   *
-   * Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for
-   * the matte path. */
-
-  const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
-
-  if (use_raytrace_kernel) {
-    INTEGRATOR_PATH_NEXT_SORTED(
-        current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
-  }
-  else {
-    INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
-  }
-
-#ifdef __SHADOW_CATCHER__
-  const int object_flags = intersection_get_object_flags(kg, isect);
-  if (kernel_shadow_catcher_split(kg, state, object_flags)) {
-    if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
-      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
-
-      INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
-    }
-    else if (use_raytrace_kernel) {
-      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
-    }
-    else {
-      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
-    }
-  }
-#endif
-}
-
-ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
-{
-  PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
-
-  /* Read ray from integrator state into local memory. */
-  Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(kg, state, &ray);
-  kernel_assert(ray.t != 0.0f);
-
-  const uint visibility = path_state_ray_visibility(state);
-  const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
-  const int last_isect_object = INTEGRATOR_STATE(state, isect, object);
-
-  /* Trick to use short AO rays to approximate indirect light at the end of the path. */
-  if (path_state_ao_bounce(kg, state)) {
-    ray.t = kernel_data.integrator.ao_bounces_distance;
-
-    const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance;
-    if (object_ao_distance != 0.0f) {
-      ray.t = object_ao_distance;
-    }
-  }
-
-  /* Scene Intersection. */
-  Intersection isect ccl_optional_struct_init;
-  bool hit = scene_intersect(kg, &ray, visibility, &isect);
-
-  /* TODO: remove this and do it in the various intersection functions instead. */
-  if (!hit) {
-    isect.prim = PRIM_NONE;
-  }
-
-  /* Light intersection for MIS. */
-  if (kernel_data.integrator.use_lamp_mis) {
-    /* NOTE: if we make lights visible to camera rays, we'll need to initialize
-     * these in the path_state_init. */
-    const int last_type = INTEGRATOR_STATE(state, isect, type);
-    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-    hit = lights_intersect(
-              kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) ||
-          hit;
-  }
-
-  /* Write intersection result into global integrator state memory. */
-  integrator_state_write_isect(kg, state, &isect);
-
-#ifdef __VOLUME__
-  if (!integrator_state_volume_stack_is_empty(kg, state)) {
-    const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
-    const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
-    const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
-
-    if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-            kg, state, flags)) {
-      /* Continue with volume kernel if we are inside a volume, regardless
-       * if we hit anything. */
-      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
-    }
-    else {
-      INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-    }
-    return;
-  }
-#endif
-
-  if (hit) {
-    /* Hit a surface, continue with light or surface kernel. */
-    if (isect.type & PRIMITIVE_LAMP) {
-      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
-      return;
-    }
-    else {
-      /* Hit a surface, continue with surface kernel unless terminated. */
-      const int shader = intersection_get_shader(kg, &isect);
-      const int flags = kernel_tex_fetch(__shaders, shader).flags;
-
-      if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-              kg, state, flags)) {
-        integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
-            kg, state, &isect, shader, flags);
-        return;
-      }
-      else {
-        INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-        return;
-      }
-    }
-  }
-  else {
-    /* Nothing hit, continue with background kernel. */
-    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
-                         DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
-    return;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
deleted file mode 100644
index 90422445fad..00000000000
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Visibility for the shadow ray. */
-ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg,
-                                                                  ConstIntegratorShadowState state)
-{
-  uint visibility = PATH_RAY_SHADOW;
-
-#ifdef __SHADOW_CATCHER__
-  const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
-  visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
-#endif
-
-  return visibility;
-}
-
-ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg,
-                                                  IntegratorShadowState state,
-                                                  ccl_private const Ray *ray,
-                                                  const uint visibility)
-{
-  /* Mask which will pick only opaque visibility bits from the `visibility`.
-   * Calculate the mask at compile time: the visibility will either be a high bits for the shadow
-   * catcher objects, or lower bits for the regular objects (there is no need to check the path
-   * state here again). */
-  constexpr const uint opaque_mask = SHADOW_CATCHER_VISIBILITY_SHIFT(PATH_RAY_SHADOW_OPAQUE) |
-                                     PATH_RAY_SHADOW_OPAQUE;
-
-  Intersection isect;
-  const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect);
-
-  if (!opaque_hit) {
-    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
-  }
-
-  return opaque_hit;
-}
-
-ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg,
-                                                                 ConstIntegratorShadowState state)
-{
-  const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
-  const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
-
-  return max(transparent_max_bounce - transparent_bounce - 1, 0);
-}
-
-#ifdef __TRANSPARENT_SHADOWS__
-#  if defined(__KERNEL_CPU__)
-ccl_device int shadow_intersections_compare(const void *a, const void *b)
-{
-  const Intersection *isect_a = (const Intersection *)a;
-  const Intersection *isect_b = (const Intersection *)b;
-
-  if (isect_a->t < isect_b->t)
-    return -1;
-  else if (isect_a->t > isect_b->t)
-    return 1;
-  else
-    return 0;
-}
-#  endif
-
-ccl_device_inline void sort_shadow_intersections(IntegratorShadowState state, uint num_hits)
-{
-  kernel_assert(num_hits > 0);
-
-#  ifdef __KERNEL_GPU__
-  /* Use bubble sort which has more friendly memory pattern on GPU. */
-  bool swapped;
-  do {
-    swapped = false;
-    for (int j = 0; j < num_hits - 1; ++j) {
-      if (INTEGRATOR_STATE_ARRAY(state, shadow_isect, j, t) >
-          INTEGRATOR_STATE_ARRAY(state, shadow_isect, j + 1, t)) {
-        struct Intersection tmp_j ccl_optional_struct_init;
-        struct Intersection tmp_j_1 ccl_optional_struct_init;
-        integrator_state_read_shadow_isect(state, &tmp_j, j);
-        integrator_state_read_shadow_isect(state, &tmp_j_1, j + 1);
-        integrator_state_write_shadow_isect(state, &tmp_j_1, j);
-        integrator_state_write_shadow_isect(state, &tmp_j, j + 1);
-        swapped = true;
-      }
-    }
-    --num_hits;
-  } while (swapped);
-#  else
-  Intersection *isect_array = (Intersection *)state->shadow_isect;
-  qsort(isect_array, num_hits, sizeof(Intersection), shadow_intersections_compare);
-#  endif
-}
-
-ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg,
-                                                       IntegratorShadowState state,
-                                                       ccl_private const Ray *ray,
-                                                       const uint visibility)
-{
-  /* Limit the number hits to the max transparent bounces allowed and the size that we
-   * have available in the integrator state. */
-  const uint max_hits = integrate_shadow_max_transparent_hits(kg, state);
-  uint num_hits = 0;
-  float throughput = 1.0f;
-  bool opaque_hit = scene_intersect_shadow_all(
-      kg, state, ray, visibility, max_hits, &num_hits, &throughput);
-
-  /* Computed throughput from baked shadow transparency, where we can bypass recording
-   * intersections and shader evaluation. */
-  if (throughput != 1.0f) {
-    INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) *= throughput;
-  }
-
-  /* If number of hits exceed the transparent bounces limit, make opaque. */
-  if (num_hits > max_hits) {
-    opaque_hit = true;
-  }
-
-  if (!opaque_hit) {
-    const uint num_recorded_hits = min(num_hits, min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE));
-
-    if (num_recorded_hits > 0) {
-      sort_shadow_intersections(state, num_recorded_hits);
-    }
-
-    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = num_hits;
-  }
-  else {
-    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
-  }
-
-  return opaque_hit;
-}
-#endif
-
-ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowState state)
-{
-  PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW);
-
-  /* Read ray from integrator state into local memory. */
-  Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(kg, state, &ray);
-
-  /* Compute visibility. */
-  const uint visibility = integrate_intersect_shadow_visibility(kg, state);
-
-#ifdef __TRANSPARENT_SHADOWS__
-  /* TODO: compile different kernels depending on this? Especially for OptiX
-   * conditional trace calls are bad. */
-  const bool opaque_hit = (kernel_data.integrator.transparent_shadows) ?
-                              integrate_intersect_shadow_transparent(kg, state, &ray, visibility) :
-                              integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
-#else
-  const bool opaque_hit = integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
-#endif
-
-  if (opaque_hit) {
-    /* Hit an opaque surface, shadow path ends here. */
-    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
-    return;
-  }
-  else {
-    /* Hit nothing or transparent surfaces, continue to shadow kernel
-     * for shading and render buffer output.
-     *
-     * TODO: could also write to render buffer directly if no transparent shadows?
-     * Could save a kernel execution for the common case. */
-    INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
-                                DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
-    return;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h b/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
deleted file mode 100644
index b575e7fd1e6..00000000000
--- a/intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/integrator/integrator_subsurface.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorState state)
-{
-  PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE);
-
-#ifdef __SUBSURFACE__
-  if (subsurface_scatter(kg, state)) {
-    return;
-  }
-#endif
-
-  INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h b/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
deleted file mode 100644
index 505d9687948..00000000000
--- a/intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/bvh/bvh.h"
-#include "kernel/geom/geom.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-#include "kernel/integrator/integrator_volume_stack.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
-                                                              IntegratorState state,
-                                                              const float3 from_P,
-                                                              const float3 to_P)
-{
-  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
-
-  ShaderDataTinyStorage stack_sd_storage;
-  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
-
-  kernel_assert(kernel_data.integrator.use_volumes);
-
-  Ray volume_ray ccl_optional_struct_init;
-  volume_ray.P = from_P;
-  volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
-
-  /* Store to avoid global fetches on every intersection step. */
-  const uint volume_stack_size = kernel_data.volume_stack_size;
-
-#ifdef __VOLUME_RECORD_ALL__
-  Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
-  uint num_hits = scene_intersect_volume_all(
-      kg, &volume_ray, hits, 2 * volume_stack_size, PATH_RAY_ALL_VISIBILITY);
-  if (num_hits > 0) {
-    Intersection *isect = hits;
-
-    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
-    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
-      shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
-      volume_stack_enter_exit(kg, state, stack_sd);
-    }
-  }
-#else
-  Intersection isect;
-  int step = 0;
-  while (step < 2 * volume_stack_size &&
-         scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
-    shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
-    volume_stack_enter_exit(kg, state, stack_sd);
-
-    /* Move ray forward. */
-    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
-    if (volume_ray.t != FLT_MAX) {
-      volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
-    }
-    ++step;
-  }
-#endif
-}
-
-ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
-{
-  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
-
-  ShaderDataTinyStorage stack_sd_storage;
-  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
-
-  Ray volume_ray ccl_optional_struct_init;
-  integrator_state_read_ray(kg, state, &volume_ray);
-  volume_ray.t = FLT_MAX;
-
-  const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY);
-  int stack_index = 0, enclosed_index = 0;
-
-  /* Write background shader. */
-  if (kernel_data.background.volume_shader != SHADER_NONE) {
-    const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
-    integrator_state_write_volume_stack(state, stack_index, new_entry);
-    stack_index++;
-  }
-
-  /* Store to avoid global fetches on every intersection step. */
-  const uint volume_stack_size = kernel_data.volume_stack_size;
-
-#ifdef __VOLUME_RECORD_ALL__
-  Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
-  uint num_hits = scene_intersect_volume_all(
-      kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
-  if (num_hits > 0) {
-    int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
-    Intersection *isect = hits;
-
-    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
-
-    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
-      shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
-      if (stack_sd->flag & SD_BACKFACING) {
-        bool need_add = true;
-        for (int i = 0; i < enclosed_index && need_add; ++i) {
-          /* If ray exited the volume and never entered to that volume
-           * it means that camera is inside such a volume.
-           */
-          if (enclosed_volumes[i] == stack_sd->object) {
-            need_add = false;
-          }
-        }
-        for (int i = 0; i < stack_index && need_add; ++i) {
-          /* Don't add intersections twice. */
-          VolumeStack entry = integrator_state_read_volume_stack(state, i);
-          if (entry.object == stack_sd->object) {
-            need_add = false;
-            break;
-          }
-        }
-        if (need_add && stack_index < volume_stack_size - 1) {
-          const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
-          integrator_state_write_volume_stack(state, stack_index, new_entry);
-          ++stack_index;
-        }
-      }
-      else {
-        /* If ray from camera enters the volume, this volume shouldn't
-         * be added to the stack on exit.
-         */
-        enclosed_volumes[enclosed_index++] = stack_sd->object;
-      }
-    }
-  }
-#else
-  /* CUDA does not support definition of a variable size arrays, so use the maximum possible. */
-  int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
-  int step = 0;
-
-  while (stack_index < volume_stack_size - 1 && enclosed_index < volume_stack_size - 1 &&
-         step < 2 * volume_stack_size) {
-    Intersection isect;
-    if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
-      break;
-    }
-
-    shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
-    if (stack_sd->flag & SD_BACKFACING) {
-      /* If ray exited the volume and never entered to that volume
-       * it means that camera is inside such a volume.
-       */
-      bool need_add = true;
-      for (int i = 0; i < enclosed_index && need_add; ++i) {
-        /* If ray exited the volume and never entered to that volume
-         * it means that camera is inside such a volume.
-         */
-        if (enclosed_volumes[i] == stack_sd->object) {
-          need_add = false;
-        }
-      }
-      for (int i = 0; i < stack_index && need_add; ++i) {
-        /* Don't add intersections twice. */
-        VolumeStack entry = integrator_state_read_volume_stack(state, i);
-        if (entry.object == stack_sd->object) {
-          need_add = false;
-          break;
-        }
-      }
-      if (need_add) {
-        const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
-        integrator_state_write_volume_stack(state, stack_index, new_entry);
-        ++stack_index;
-      }
-    }
-    else {
-      /* If ray from camera enters the volume, this volume shouldn't
-       * be added to the stack on exit.
-       */
-      enclosed_volumes[enclosed_index++] = stack_sd->object;
-    }
-
-    /* Move ray forward. */
-    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
-    ++step;
-  }
-#endif
-
-  /* Write terminator. */
-  const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
-  integrator_state_write_volume_stack(state, stack_index, new_entry);
-
-  INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
-                       DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_megakernel.h b/intern/cycles/kernel/integrator/integrator_megakernel.h
deleted file mode 100644
index 21a483a792b..00000000000
--- a/intern/cycles/kernel/integrator/integrator_megakernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/integrator/integrator_init_from_camera.h"
-#include "kernel/integrator/integrator_intersect_closest.h"
-#include "kernel/integrator/integrator_intersect_shadow.h"
-#include "kernel/integrator/integrator_intersect_subsurface.h"
-#include "kernel/integrator/integrator_intersect_volume_stack.h"
-#include "kernel/integrator/integrator_shade_background.h"
-#include "kernel/integrator/integrator_shade_light.h"
-#include "kernel/integrator/integrator_shade_shadow.h"
-#include "kernel/integrator/integrator_shade_surface.h"
-#include "kernel/integrator/integrator_shade_volume.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void integrator_megakernel(KernelGlobals kg,
-                                      IntegratorState state,
-                                      ccl_global float *ccl_restrict render_buffer)
-{
-  /* Each kernel indicates the next kernel to execute, so here we simply
-   * have to check what that kernel is and execute it. */
-  while (true) {
-    /* Handle any shadow paths before we potentially create more shadow paths. */
-    const uint32_t shadow_queued_kernel = INTEGRATOR_STATE(
-        &state->shadow, shadow_path, queued_kernel);
-    if (shadow_queued_kernel) {
-      switch (shadow_queued_kernel) {
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-          integrator_intersect_shadow(kg, &state->shadow);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
-          integrator_shade_shadow(kg, &state->shadow, render_buffer);
-          break;
-        default:
-          kernel_assert(0);
-          break;
-      }
-      continue;
-    }
-
-    /* Handle any AO paths before we potentially create more AO paths. */
-    const uint32_t ao_queued_kernel = INTEGRATOR_STATE(&state->ao, shadow_path, queued_kernel);
-    if (ao_queued_kernel) {
-      switch (ao_queued_kernel) {
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
-          integrator_intersect_shadow(kg, &state->ao);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
-          integrator_shade_shadow(kg, &state->ao, render_buffer);
-          break;
-        default:
-          kernel_assert(0);
-          break;
-      }
-      continue;
-    }
-
-    /* Then handle regular path kernels. */
-    const uint32_t queued_kernel = INTEGRATOR_STATE(state, path, queued_kernel);
-    if (queued_kernel) {
-      switch (queued_kernel) {
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
-          integrator_intersect_closest(kg, state);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
-          integrator_shade_background(kg, state, render_buffer);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
-          integrator_shade_surface(kg, state, render_buffer);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
-          integrator_shade_volume(kg, state, render_buffer);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
-          integrator_shade_surface_raytrace(kg, state, render_buffer);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
-          integrator_shade_light(kg, state, render_buffer);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
-          integrator_intersect_subsurface(kg, state);
-          break;
-        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
-          integrator_intersect_volume_stack(kg, state);
-          break;
-        default:
-          kernel_assert(0);
-          break;
-      }
-      continue;
-    }
-
-    break;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_path_state.h b/intern/cycles/kernel/integrator/integrator_path_state.h
deleted file mode 100644
index 73062b26682..00000000000
--- a/intern/cycles/kernel/integrator/integrator_path_state.h
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/sample/sample_pattern.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Initialize queues, so that the this path is considered terminated.
- * Used for early outputs in the camera ray initialization, as well as initialization of split
- * states for shadow catcher. */
-ccl_device_inline void path_state_init_queues(IntegratorState state)
-{
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-#ifdef __KERNEL_CPU__
-  INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
-  INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
-#endif
-}
-
-/* Minimalistic initialization of the path state, which is needed for early outputs in the
- * integrator initialization to work. */
-ccl_device_inline void path_state_init(IntegratorState state,
-                                       ccl_global const KernelWorkTile *ccl_restrict tile,
-                                       const int x,
-                                       const int y)
-{
-  const uint render_pixel_index = (uint)tile->offset + x + y * tile->stride;
-
-  INTEGRATOR_STATE_WRITE(state, path, render_pixel_index) = render_pixel_index;
-
-  path_state_init_queues(state);
-}
-
-/* Initialize the rest of the path state needed to continue the path integration. */
-ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
-                                                  IntegratorState state,
-                                                  const int sample,
-                                                  const uint rng_hash)
-{
-  INTEGRATOR_STATE_WRITE(state, path, sample) = sample;
-  INTEGRATOR_STATE_WRITE(state, path, bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0;
-  INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash;
-  INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM;
-  INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
-                                              PATH_RAY_TRANSPARENT_BACKGROUND;
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
-  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
-  INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
-    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE;
-    INTEGRATOR_STATE_ARRAY_WRITE(
-        state, volume_stack, 0, shader) = kernel_data.background.volume_shader;
-    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE;
-    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
-  }
-
-#ifdef __DENOISING_FEATURES__
-  if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) {
-    INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES;
-    INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3();
-  }
-#endif
-}
-
-ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state, int label)
-{
-  uint32_t flag = INTEGRATOR_STATE(state, path, flag);
-
-  /* ray through transparent keeps same flags from previous ray and is
-   * not counted as a regular bounce, transparent has separate max */
-  if (label & LABEL_TRANSPARENT) {
-    uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
-
-    flag |= PATH_RAY_TRANSPARENT;
-    if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
-      flag |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
-    }
-
-    if (!kernel_data.integrator.transparent_shadows)
-      flag |= PATH_RAY_MIS_SKIP;
-
-    INTEGRATOR_STATE_WRITE(state, path, flag) = flag;
-    INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
-    /* Random number generator next bounce. */
-    INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
-    return;
-  }
-
-  uint32_t bounce = INTEGRATOR_STATE(state, path, bounce) + 1;
-  if (bounce >= kernel_data.integrator.max_bounce) {
-    flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-  }
-
-  flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP);
-
-#ifdef __VOLUME__
-  if (label & LABEL_VOLUME_SCATTER) {
-    /* volume scatter */
-    flag |= PATH_RAY_VOLUME_SCATTER;
-    flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-    if (bounce == 1) {
-      flag |= PATH_RAY_VOLUME_PASS;
-    }
-
-    const int volume_bounce = INTEGRATOR_STATE(state, path, volume_bounce) + 1;
-    INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = volume_bounce;
-    if (volume_bounce >= kernel_data.integrator.max_volume_bounce) {
-      flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-    }
-  }
-  else
-#endif
-  {
-    /* surface reflection/transmission */
-    if (label & LABEL_REFLECT) {
-      flag |= PATH_RAY_REFLECT;
-      flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-
-      if (label & LABEL_DIFFUSE) {
-        const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1;
-        INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce;
-        if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
-          flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-        }
-      }
-      else {
-        const int glossy_bounce = INTEGRATOR_STATE(state, path, glossy_bounce) + 1;
-        INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = glossy_bounce;
-        if (glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
-          flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-        }
-      }
-    }
-    else {
-      kernel_assert(label & LABEL_TRANSMIT);
-
-      flag |= PATH_RAY_TRANSMIT;
-
-      if (!(label & LABEL_TRANSMIT_TRANSPARENT)) {
-        flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
-      }
-
-      const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) + 1;
-      INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce;
-      if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
-        flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
-      }
-    }
-
-    /* diffuse/glossy/singular */
-    if (label & LABEL_DIFFUSE) {
-      flag |= PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR;
-    }
-    else if (label & LABEL_GLOSSY) {
-      flag |= PATH_RAY_GLOSSY;
-    }
-    else {
-      kernel_assert(label & LABEL_SINGULAR);
-      flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP;
-    }
-
-    /* Render pass categories. */
-    if (bounce == 1) {
-      flag |= (label & LABEL_TRANSMIT) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
-    }
-  }
-
-  INTEGRATOR_STATE_WRITE(state, path, flag) = flag;
-  INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce;
-
-  /* Random number generator next bounce. */
-  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
-}
-
-#ifdef __VOLUME__
-ccl_device_inline bool path_state_volume_next(IntegratorState state)
-{
-  /* For volume bounding meshes we pass through without counting transparent
-   * bounces, only sanity check in case self intersection gets us stuck. */
-  uint32_t volume_bounds_bounce = INTEGRATOR_STATE(state, path, volume_bounds_bounce) + 1;
-  INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = volume_bounds_bounce;
-  if (volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
-    return false;
-  }
-
-  /* Random number generator next bounce. */
-  if (volume_bounds_bounce > 1) {
-    INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
-  }
-
-  return true;
-}
-#endif
-
-ccl_device_inline uint path_state_ray_visibility(ConstIntegratorState state)
-{
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  uint32_t visibility = path_flag & PATH_RAY_ALL_VISIBILITY;
-
-  /* For visibility, diffuse/glossy are for reflection only. */
-  if (visibility & PATH_RAY_TRANSMIT) {
-    visibility &= ~(PATH_RAY_DIFFUSE | PATH_RAY_GLOSSY);
-  }
-
-  /* todo: this is not supported as its own ray visibility yet. */
-  if (path_flag & PATH_RAY_VOLUME_SCATTER) {
-    visibility |= PATH_RAY_DIFFUSE;
-  }
-
-  visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
-
-  return visibility;
-}
-
-ccl_device_inline float path_state_continuation_probability(KernelGlobals kg,
-                                                            ConstIntegratorState state,
-                                                            const uint32_t path_flag)
-{
-  if (path_flag & PATH_RAY_TRANSPARENT) {
-    const uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
-    /* Do at least specified number of bounces without RR. */
-    if (transparent_bounce <= kernel_data.integrator.transparent_min_bounce) {
-      return 1.0f;
-    }
-  }
-  else {
-    const uint32_t bounce = INTEGRATOR_STATE(state, path, bounce);
-    /* Do at least specified number of bounces without RR. */
-    if (bounce <= kernel_data.integrator.min_bounce) {
-      return 1.0f;
-    }
-  }
-
-  /* Probabilistic termination: use sqrt() to roughly match typical view
-   * transform and do path termination a bit later on average. */
-  return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f);
-}
-
-ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state)
-{
-  if (!kernel_data.integrator.ao_bounces) {
-    return false;
-  }
-
-  const int bounce = INTEGRATOR_STATE(state, path, bounce) -
-                     INTEGRATOR_STATE(state, path, transmission_bounce) -
-                     (INTEGRATOR_STATE(state, path, glossy_bounce) > 0) + 1;
-  return (bounce > kernel_data.integrator.ao_bounces);
-}
-
-/* Random Number Sampling Utility Functions
- *
- * For each random number in each step of the path we must have a unique
- * dimension to avoid using the same sequence twice.
- *
- * For branches in the path we must be careful not to reuse the same number
- * in a sequence and offset accordingly.
- */
-
-/* RNG State loaded onto stack. */
-typedef struct RNGState {
-  uint rng_hash;
-  uint rng_offset;
-  int sample;
-} RNGState;
-
-ccl_device_inline void path_state_rng_load(ConstIntegratorState state,
-                                           ccl_private RNGState *rng_state)
-{
-  rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash);
-  rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset);
-  rng_state->sample = INTEGRATOR_STATE(state, path, sample);
-}
-
-ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState state,
-                                                  ccl_private RNGState *rng_state)
-{
-  rng_state->rng_hash = INTEGRATOR_STATE(state, shadow_path, rng_hash);
-  rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset);
-  rng_state->sample = INTEGRATOR_STATE(state, shadow_path, sample);
-}
-
-ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
-                                          ccl_private const RNGState *rng_state,
-                                          int dimension)
-{
-  return path_rng_1D(
-      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
-}
-
-ccl_device_inline void path_state_rng_2D(KernelGlobals kg,
-                                         ccl_private const RNGState *rng_state,
-                                         int dimension,
-                                         ccl_private float *fx,
-                                         ccl_private float *fy)
-{
-  path_rng_2D(
-      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
-}
-
-ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg,
-                                               ccl_private const RNGState *rng_state,
-                                               uint hash)
-{
-  /* Use a hash instead of dimension, this is not great but avoids adding
-   * more dimensions to each bounce which reduces quality of dimensions we
-   * are already using. */
-  return path_rng_1D(
-      kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
-}
-
-ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
-                                             ccl_private const RNGState *rng_state,
-                                             int branch,
-                                             int num_branches,
-                                             int dimension)
-{
-  return path_rng_1D(kg,
-                     rng_state->rng_hash,
-                     rng_state->sample * num_branches + branch,
-                     rng_state->rng_offset + dimension);
-}
-
-ccl_device_inline void path_branched_rng_2D(KernelGlobals kg,
-                                            ccl_private const RNGState *rng_state,
-                                            int branch,
-                                            int num_branches,
-                                            int dimension,
-                                            ccl_private float *fx,
-                                            ccl_private float *fy)
-{
-  path_rng_2D(kg,
-              rng_state->rng_hash,
-              rng_state->sample * num_branches + branch,
-              rng_state->rng_offset + dimension,
-              fx,
-              fy);
-}
-
-/* Utility functions to get light termination value,
- * since it might not be needed in many cases.
- */
-ccl_device_inline float path_state_rng_light_termination(KernelGlobals kg,
-                                                         ccl_private const RNGState *state)
-{
-  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-    return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
-  }
-  return 0.0f;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h
deleted file mode 100644
index b3bef9a234e..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shade_background.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/film/film_accumulate.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-#include "kernel/light/light.h"
-#include "kernel/light/light_sample.h"
-#include "kernel/sample/sample_mis.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
-                                                    IntegratorState state,
-                                                    ccl_global float *ccl_restrict render_buffer)
-{
-#ifdef __BACKGROUND__
-  const int shader = kernel_data.background.surface_shader;
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  /* Use visibility flag to skip lights. */
-  if (shader & SHADER_EXCLUDE_ANY) {
-    if (((shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
-        ((shader & SHADER_EXCLUDE_GLOSSY) && ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
-                                              (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
-        ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
-        ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
-        ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
-      return zero_float3();
-  }
-
-  /* Use fast constant background color if available. */
-  float3 L = zero_float3();
-  if (!shader_constant_emission_eval(kg, shader, &L)) {
-    /* Evaluate background shader. */
-
-    /* TODO: does aliasing like this break automatic SoA in CUDA?
-     * Should we instead store closures separate from ShaderData? */
-    ShaderDataTinyStorage emission_sd_storage;
-    ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-
-    PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
-    shader_setup_from_background(kg,
-                                 emission_sd,
-                                 INTEGRATOR_STATE(state, ray, P),
-                                 INTEGRATOR_STATE(state, ray, D),
-                                 INTEGRATOR_STATE(state, ray, time));
-
-    PROFILING_SHADER(emission_sd->object, emission_sd->shader);
-    PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
-    shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
-        kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
-
-    L = shader_background_eval(emission_sd);
-  }
-
-  /* Background MIS weights. */
-#  ifdef __BACKGROUND_MIS__
-  /* Check if background light exists or if we should skip pdf. */
-  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
-      kernel_data.background.use_mis) {
-    const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
-    const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
-    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-    const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
-
-    /* multiple importance sampling, get background light pdf for ray
-     * direction, and compute weight with respect to BSDF pdf */
-    const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
-    const float mis_weight = power_heuristic(mis_ray_pdf, pdf);
-
-    L *= mis_weight;
-  }
-#  endif
-
-  return L;
-#else
-  return make_float3(0.8f, 0.8f, 0.8f);
-#endif
-}
-
-ccl_device_inline void integrate_background(KernelGlobals kg,
-                                            IntegratorState state,
-                                            ccl_global float *ccl_restrict render_buffer)
-{
-  /* Accumulate transparency for transparent background. We can skip background
-   * shader evaluation unless a background pass is used. */
-  bool eval_background = true;
-  float transparent = 0.0f;
-
-  const bool is_transparent_background_ray = kernel_data.background.transparent &&
-                                             (INTEGRATOR_STATE(state, path, flag) &
-                                              PATH_RAY_TRANSPARENT_BACKGROUND);
-
-  if (is_transparent_background_ray) {
-    transparent = average(INTEGRATOR_STATE(state, path, throughput));
-
-#ifdef __PASSES__
-    eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND));
-#else
-    eval_background = false;
-#endif
-  }
-
-  /* Evaluate background shader. */
-  float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
-                                 zero_float3();
-
-  /* When using the ao bounces approximation, adjust background
-   * shader intensity with ao factor. */
-  if (path_state_ao_bounce(kg, state)) {
-    L *= kernel_data.integrator.ao_bounces_factor;
-  }
-
-  /* Write to render buffer. */
-  kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
-}
-
-ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
-                                                IntegratorState state,
-                                                ccl_global float *ccl_restrict render_buffer)
-{
-  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
-  const float ray_time = INTEGRATOR_STATE(state, ray, time);
-  LightSample ls ccl_optional_struct_init;
-  for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
-    if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) {
-      /* Use visibility flag to skip lights. */
-#ifdef __PASSES__
-      const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-      if (ls.shader & SHADER_EXCLUDE_ANY) {
-        if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
-            ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
-             ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
-              (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
-            ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
-            ((ls.shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
-            ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
-          return;
-      }
-#endif
-
-      /* Evaluate light shader. */
-      /* TODO: does aliasing like this break automatic SoA in CUDA? */
-      ShaderDataTinyStorage emission_sd_storage;
-      ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-      float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
-      if (is_zero(light_eval)) {
-        return;
-      }
-
-      /* MIS weighting. */
-      if (!(path_flag & PATH_RAY_MIS_SKIP)) {
-        /* multiple importance sampling, get regular light pdf,
-         * and compute weight with respect to BSDF pdf */
-        const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-        const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
-        light_eval *= mis_weight;
-      }
-
-      /* Write to render buffer. */
-      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-      kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
-    }
-  }
-}
-
-ccl_device void integrator_shade_background(KernelGlobals kg,
-                                            IntegratorState state,
-                                            ccl_global float *ccl_restrict render_buffer)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
-
-  /* TODO: unify these in a single loop to only have a single shader evaluation call. */
-  integrate_distant_lights(kg, state, render_buffer);
-  integrate_background(kg, state, render_buffer);
-
-#ifdef __SHADOW_CATCHER__
-  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
-    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
-
-    const int isect_prim = INTEGRATOR_STATE(state, isect, prim);
-    const int isect_type = INTEGRATOR_STATE(state, isect, type);
-    const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
-    const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
-
-    if (shader_flags & SD_HAS_RAYTRACE) {
-      INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
-                                  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
-                                  shader);
-    }
-    else {
-      INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
-                                  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
-                                  shader);
-    }
-    return;
-  }
-#endif
-
-  INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_light.h b/intern/cycles/kernel/integrator/integrator_shade_light.h
deleted file mode 100644
index 7d220006322..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shade_light.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/film/film_accumulate.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-#include "kernel/light/light.h"
-#include "kernel/light/light_sample.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline void integrate_light(KernelGlobals kg,
-                                       IntegratorState state,
-                                       ccl_global float *ccl_restrict render_buffer)
-{
-  /* Setup light sample. */
-  Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(kg, state, &isect);
-
-  float3 ray_P = INTEGRATOR_STATE(state, ray, P);
-  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
-  const float ray_time = INTEGRATOR_STATE(state, ray, time);
-
-  /* Advance ray beyond light. */
-  /* TODO: can we make this more numerically robust to avoid reintersecting the
-   * same light in some cases? */
-  const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D);
-  INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
-  INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
-
-  /* Set position to where the BSDF was sampled, for correct MIS PDF. */
-  const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
-  ray_P -= ray_D * mis_ray_t;
-  isect.t += mis_ray_t;
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t;
-
-  LightSample ls ccl_optional_struct_init;
-  const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
-
-  if (!use_light_sample) {
-    return;
-  }
-
-  /* Use visibility flag to skip lights. */
-#ifdef __PASSES__
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if (ls.shader & SHADER_EXCLUDE_ANY) {
-    if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
-        ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
-         ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
-          (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
-        ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
-        ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
-      return;
-  }
-#endif
-
-  /* Evaluate light shader. */
-  /* TODO: does aliasing like this break automatic SoA in CUDA? */
-  ShaderDataTinyStorage emission_sd_storage;
-  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
-  if (is_zero(light_eval)) {
-    return;
-  }
-
-  /* MIS weighting. */
-  if (!(path_flag & PATH_RAY_MIS_SKIP)) {
-    /* multiple importance sampling, get regular light pdf,
-     * and compute weight with respect to BSDF pdf */
-    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-    const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
-    light_eval *= mis_weight;
-  }
-
-  /* Write to render buffer. */
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
-}
-
-ccl_device void integrator_shade_light(KernelGlobals kg,
-                                       IntegratorState state,
-                                       ccl_global float *ccl_restrict render_buffer)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
-
-  integrate_light(kg, state, render_buffer);
-
-  /* TODO: we could get stuck in an infinite loop if there are precision issues
-   * and the same light is hit again.
-   *
-   * As a workaround count this as a transparent bounce. It makes some sense
-   * to interpret lights as transparent surfaces (and support making them opaque),
-   * but this needs to be revisited. */
-  uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
-  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
-
-  if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
-    INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
-    return;
-  }
-  else {
-    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
-                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-    return;
-  }
-
-  /* TODO: in some cases we could continue directly to SHADE_BACKGROUND, but
-   * probably that optimization is probably not practical if we add lights to
-   * scene geometry. */
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
deleted file mode 100644
index 0c4eeb8d10d..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/integrator/integrator_shade_volume.h"
-#include "kernel/integrator/integrator_volume_stack.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits)
-{
-  return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
-}
-
-#ifdef __TRANSPARENT_SHADOWS__
-ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
-                                                              IntegratorShadowState state,
-                                                              const int hit)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
-
-  /* TODO: does aliasing like this break automatic SoA in CUDA?
-   * Should we instead store closures separate from ShaderData?
-   *
-   * TODO: is it better to declare this outside the loop or keep it local
-   * so the compiler can see there is no dependency between iterations? */
-  ShaderDataTinyStorage shadow_sd_storage;
-  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
-
-  /* Setup shader data at surface. */
-  Intersection isect ccl_optional_struct_init;
-  integrator_state_read_shadow_isect(state, &isect, hit);
-
-  Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(kg, state, &ray);
-
-  shader_setup_from_ray(kg, shadow_sd, &ray, &isect);
-
-  /* Evaluate shader. */
-  if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
-    shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
-        kg, state, shadow_sd, NULL, PATH_RAY_SHADOW);
-  }
-
-#  ifdef __VOLUME__
-  /* Exit/enter volume. */
-  shadow_volume_stack_enter_exit(kg, state, shadow_sd);
-#  endif
-
-  /* Compute transparency from closures. */
-  return shader_bsdf_transparency(kg, shadow_sd);
-}
-
-#  ifdef __VOLUME__
-ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
-                                                           IntegratorShadowState state,
-                                                           const int hit,
-                                                           const int num_recorded_hits,
-                                                           ccl_private float3 *ccl_restrict
-                                                               throughput)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
-
-  /* TODO: deduplicate with surface, or does it not matter for memory usage? */
-  ShaderDataTinyStorage shadow_sd_storage;
-  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
-
-  /* Setup shader data. */
-  Ray ray ccl_optional_struct_init;
-  integrator_state_read_shadow_ray(kg, state, &ray);
-
-  /* Modify ray position and length to match current segment. */
-  const float start_t = (hit == 0) ? 0.0f :
-                                     INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
-  const float end_t = (hit < num_recorded_hits) ?
-                          INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
-                          ray.t;
-  ray.P += start_t * ray.D;
-  ray.t = end_t - start_t;
-
-  shader_setup_from_volume(kg, shadow_sd, &ray);
-
-  const float step_size = volume_stack_step_size(
-      kg, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); });
-
-  volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
-}
-#  endif
-
-ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
-                                                    IntegratorShadowState state,
-                                                    const uint num_hits)
-{
-  /* Accumulate shadow for transparent surfaces. */
-  const uint num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
-
-  for (uint hit = 0; hit < num_recorded_hits + 1; hit++) {
-    /* Volume shaders. */
-    if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
-#  ifdef __VOLUME__
-      if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) {
-        float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
-        integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput);
-        if (is_zero(throughput)) {
-          return true;
-        }
-
-        INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
-      }
-#  endif
-    }
-
-    /* Surface shaders. */
-    if (hit < num_recorded_hits) {
-      const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit);
-      const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
-      if (is_zero(throughput)) {
-        return true;
-      }
-
-      INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
-      INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) += 1;
-      INTEGRATOR_STATE_WRITE(state, shadow_path, rng_offset) += PRNG_BOUNCE_NUM;
-    }
-
-    /* Note we do not need to check max_transparent_bounce here, the number
-     * of intersections is already limited and made opaque in the
-     * INTERSECT_SHADOW kernel. */
-  }
-
-  if (shadow_intersections_has_remaining(num_hits)) {
-    /* There are more hits that we could not recorded due to memory usage,
-     * adjust ray to intersect again from the last hit. */
-    const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
-    const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
-    const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
-    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
-    INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
-  }
-
-  return false;
-}
-#endif /* __TRANSPARENT_SHADOWS__ */
-
-ccl_device void integrator_shade_shadow(KernelGlobals kg,
-                                        IntegratorShadowState state,
-                                        ccl_global float *ccl_restrict render_buffer)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
-  const uint num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
-
-#ifdef __TRANSPARENT_SHADOWS__
-  /* Evaluate transparent shadows. */
-  const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
-  if (opaque) {
-    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
-    return;
-  }
-#endif
-
-  if (shadow_intersections_has_remaining(num_hits)) {
-    /* More intersections to find, continue shadow ray. */
-    INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
-                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
-    return;
-  }
-  else {
-    kernel_accum_light(kg, state, render_buffer);
-    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
-    return;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_surface.h b/intern/cycles/kernel/integrator/integrator_shade_surface.h
deleted file mode 100644
index 70dce1c4913..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shade_surface.h
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/film/film_accumulate.h"
-#include "kernel/film/film_passes.h"
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-#include "kernel/integrator/integrator_subsurface.h"
-#include "kernel/integrator/integrator_volume_stack.h"
-
-#include "kernel/light/light.h"
-#include "kernel/light/light_sample.h"
-
-#include "kernel/sample/sample_mis.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
-                                                           ConstIntegratorState state,
-                                                           ccl_private ShaderData *sd)
-{
-  Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(kg, state, &isect);
-
-  Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(kg, state, &ray);
-
-  shader_setup_from_ray(kg, sd, &ray, &isect);
-}
-
-#ifdef __HOLDOUT__
-ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
-                                                      ConstIntegratorState state,
-                                                      ccl_private ShaderData *sd,
-                                                      ccl_global float *ccl_restrict render_buffer)
-{
-  /* Write holdout transparency to render buffer and stop if fully holdout. */
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
-      (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
-    const float3 holdout_weight = shader_holdout_apply(kg, sd);
-    if (kernel_data.background.transparent) {
-      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-      const float transparent = average(holdout_weight * throughput);
-      kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer);
-    }
-    if (isequal_float3(holdout_weight, one_float3())) {
-      return false;
-    }
-  }
-
-  return true;
-}
-#endif /* __HOLDOUT__ */
-
-#ifdef __EMISSION__
-ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
-                                                       ConstIntegratorState state,
-                                                       ccl_private const ShaderData *sd,
-                                                       ccl_global float *ccl_restrict
-                                                           render_buffer)
-{
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  /* Evaluate emissive closure. */
-  float3 L = shader_emissive_eval(sd);
-
-#  ifdef __HAIR__
-  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
-      (sd->type & PRIMITIVE_ALL_TRIANGLE))
-#  else
-  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
-#  endif
-  {
-    const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-    const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
-
-    /* Multiple importance sampling, get triangle light pdf,
-     * and compute weight with respect to BSDF pdf. */
-    float pdf = triangle_light_pdf(kg, sd, t);
-    float mis_weight = power_heuristic(bsdf_pdf, pdf);
-
-    L *= mis_weight;
-  }
-
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  kernel_accum_emission(kg, state, throughput, L, render_buffer);
-}
-#endif /* __EMISSION__ */
-
-#ifdef __EMISSION__
-/* Path tracing: sample point on light and evaluate light shader, then
- * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
-                                                           IntegratorState state,
-                                                           ccl_private ShaderData *sd,
-                                                           ccl_private const RNGState *rng_state)
-{
-  /* Test if there is a light or BSDF that needs direct light. */
-  if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) {
-    return;
-  }
-
-  /* Sample position on a light. */
-  LightSample ls ccl_optional_struct_init;
-  {
-    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-    float light_u, light_v;
-    path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
-
-    if (!light_distribution_sample_from_position(
-            kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
-      return;
-    }
-  }
-
-  kernel_assert(ls.pdf != 0.0f);
-
-  /* Evaluate light shader.
-   *
-   * TODO: can we reuse sd memory? In theory we can move this after
-   * integrate_surface_bounce, evaluate the BSDF, and only then evaluate
-   * the light shader. This could also move to its own kernel, for
-   * non-constant light sources. */
-  ShaderDataTinyStorage emission_sd_storage;
-  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
-  if (is_zero(light_eval)) {
-    return;
-  }
-
-  /* Evaluate BSDF. */
-  const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
-
-  BsdfEval bsdf_eval ccl_optional_struct_init;
-  const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
-  bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
-
-  if (ls.shader & SHADER_USE_MIS) {
-    const float mis_weight = power_heuristic(ls.pdf, bsdf_pdf);
-    bsdf_eval_mul(&bsdf_eval, mis_weight);
-  }
-
-  /* Path termination. */
-  const float terminate = path_state_rng_light_termination(kg, rng_state);
-  if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) {
-    return;
-  }
-
-  /* Create shadow ray. */
-  Ray ray ccl_optional_struct_init;
-  light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
-  const bool is_light = light_sample_is_light(&ls);
-
-  /* Branch off shadow kernel. */
-  INTEGRATOR_SHADOW_PATH_INIT(
-      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
-
-  /* Copy volume stack and enter/exit volume. */
-  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
-
-  if (is_transmission) {
-#  ifdef __VOLUME__
-    shadow_volume_stack_enter_exit(kg, shadow_state, sd);
-#  endif
-  }
-
-  /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
-
-  /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
-  shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
-  shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    const float3 diffuse_glossy_ratio = (bounce == 0) ?
-                                            bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
-                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
-  }
-
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
-      state, path, render_pixel_index);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
-      state, path, rng_offset);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
-      state, path, rng_hash);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
-      state, path, sample);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_bounce) = INTEGRATOR_STATE(
-      state, path, diffuse_bounce);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
-      state, path, glossy_bounce);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
-      state, path, transmission_bounce);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput;
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput;
-  }
-}
-#endif
-
-/* Path tracing: bounce off or through surface with new direction. */
-ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private ShaderData *sd,
-    ccl_private const RNGState *rng_state)
-{
-  /* Sample BSDF or BSSRDF. */
-  if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
-    return LABEL_NONE;
-  }
-
-  float bsdf_u, bsdf_v;
-  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
-
-#ifdef __SUBSURFACE__
-  /* BSSRDF closure, we schedule subsurface intersection kernel. */
-  if (CLOSURE_IS_BSSRDF(sc->type)) {
-    return subsurface_bounce(kg, state, sd, sc);
-  }
-#endif
-
-  /* BSDF closure, sample direction. */
-  float bsdf_pdf;
-  BsdfEval bsdf_eval ccl_optional_struct_init;
-  float3 bsdf_omega_in ccl_optional_struct_init;
-  differential3 bsdf_domega_in ccl_optional_struct_init;
-  int label;
-
-  label = shader_bsdf_sample_closure(
-      kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
-
-  if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
-    return LABEL_NONE;
-  }
-
-  /* Setup ray. Note that clipping works through transparent bounces. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P,
-                                                     (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
-  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
-  INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
-                                              INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
-                                              FLT_MAX;
-
-#ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
-#endif
-
-  /* Update throughput. */
-  float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
-  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
-      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(
-          &bsdf_eval);
-    }
-  }
-
-  /* Update path state */
-  if (label & LABEL_TRANSPARENT) {
-    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
-  }
-  else {
-    INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
-    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
-    INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
-        bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
-  }
-
-  path_state_next(kg, state, label);
-  return label;
-}
-
-#ifdef __VOLUME__
-ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState state,
-                                                                 ccl_private ShaderData *sd)
-{
-  if (!path_state_volume_next(state)) {
-    return LABEL_NONE;
-  }
-
-  /* Setup ray position, direction stays unchanged. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng);
-
-  /* Clipping works through transparent. */
-  INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
-
-#  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-#  endif
-
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
-
-  return LABEL_TRANSMIT | LABEL_TRANSPARENT;
-}
-#endif
-
-#if defined(__AO__)
-ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
-                                                 IntegratorState state,
-                                                 ccl_private const ShaderData *ccl_restrict sd,
-                                                 ccl_private const RNGState *ccl_restrict
-                                                     rng_state,
-                                                 ccl_global float *ccl_restrict render_buffer)
-{
-  if (!(kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) &&
-      !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_CAMERA)) {
-    return;
-  }
-
-  float bsdf_u, bsdf_v;
-  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
-  float3 ao_N;
-  const float3 ao_weight = shader_bsdf_ao(
-      kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N);
-
-  float3 ao_D;
-  float ao_pdf;
-  sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
-  if (!(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f)) {
-    return;
-  }
-
-  Ray ray ccl_optional_struct_init;
-  ray.P = ray_offset(sd->P, sd->Ng);
-  ray.D = ao_D;
-  ray.t = kernel_data.integrator.ao_bounces_distance;
-  ray.time = sd->time;
-  ray.dP = differential_zero_compact();
-  ray.dD = differential_zero_compact();
-
-  /* Branch off shadow kernel. */
-  INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
-
-  /* Copy volume stack and enter/exit volume. */
-  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
-
-  /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
-
-  /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO;
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd);
-
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
-      state, path, render_pixel_index);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
-      state, path, rng_offset);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
-      state, path, rng_hash);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
-      state, path, sample);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput;
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) {
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = ao_weight;
-  }
-}
-#endif /* defined(__AO__) */
-
-template<uint node_feature_mask>
-ccl_device bool integrate_surface(KernelGlobals kg,
-                                  IntegratorState state,
-                                  ccl_global float *ccl_restrict render_buffer)
-
-{
-  PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_SURFACE_SETUP);
-
-  /* Setup shader data. */
-  ShaderData sd;
-  integrate_surface_shader_setup(kg, state, &sd);
-  PROFILING_SHADER(sd.object, sd.shader);
-
-  int continue_path_label = 0;
-
-  /* Skip most work for volume bounding surface. */
-#ifdef __VOLUME__
-  if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
-#endif
-    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-#ifdef __SUBSURFACE__
-    /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
-    if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP)))
-#endif
-    {
-      /* Evaluate shader. */
-      PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
-      shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
-
-      /* Initialize additional RNG for BSDFs. */
-      if (sd.flag & SD_BSDF_NEEDS_LCG) {
-        sd.lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
-                                      INTEGRATOR_STATE(state, path, rng_offset),
-                                      INTEGRATOR_STATE(state, path, sample),
-                                      0xb4bc3953);
-      }
-    }
-
-#ifdef __SUBSURFACE__
-    if (path_flag & PATH_RAY_SUBSURFACE) {
-      /* When coming from inside subsurface scattering, setup a diffuse
-       * closure to perform lighting at the exit point. */
-      subsurface_shader_data_setup(kg, state, &sd, path_flag);
-      INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SUBSURFACE;
-    }
-#endif
-
-    shader_prepare_surface_closures(kg, state, &sd);
-
-#ifdef __HOLDOUT__
-    /* Evaluate holdout. */
-    if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
-      return false;
-    }
-#endif
-
-#ifdef __EMISSION__
-    /* Write emission. */
-    if (sd.flag & SD_EMISSION) {
-      integrate_surface_emission(kg, state, &sd, render_buffer);
-    }
-#endif
-
-#ifdef __PASSES__
-    /* Write render passes. */
-    PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
-    kernel_write_data_passes(kg, state, &sd, render_buffer);
-#endif
-
-    /* Load random number state. */
-    RNGState rng_state;
-    path_state_rng_load(state, &rng_state);
-
-    /* Perform path termination. Most paths have already been terminated in
-     * the intersect_closest kernel, this is just for emission and for dividing
-     * throughput by the probability at the right moment.
-     *
-     * Also ensure we don't do it twice for SSS at both the entry and exit point. */
-    if (!(path_flag & PATH_RAY_SUBSURFACE)) {
-      const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
-                                    0.0f :
-                                    path_state_continuation_probability(kg, state, path_flag);
-      if (probability == 0.0f) {
-        return false;
-      }
-      else if (probability != 1.0f) {
-        INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability;
-      }
-    }
-
-#ifdef __DENOISING_FEATURES__
-    kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
-#endif
-
-#ifdef __SHADOW_CATCHER__
-    kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
-#endif
-
-    /* Direct light. */
-    PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
-    integrate_surface_direct_light(kg, state, &sd, &rng_state);
-
-#if defined(__AO__)
-    /* Ambient occlusion pass. */
-    if (kernel_data.kernel_features & KERNEL_FEATURE_AO) {
-      PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO);
-      integrate_surface_ao(kg, state, &sd, &rng_state, render_buffer);
-    }
-#endif
-
-    PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
-    continue_path_label = integrate_surface_bsdf_bssrdf_bounce(kg, state, &sd, &rng_state);
-#ifdef __VOLUME__
-  }
-  else {
-    PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
-    continue_path_label = integrate_surface_volume_only_bounce(state, &sd);
-  }
-
-  if (continue_path_label & LABEL_TRANSMIT) {
-    /* Enter/Exit volume. */
-    volume_stack_enter_exit(kg, state, &sd);
-  }
-#endif
-
-  return continue_path_label != 0;
-}
-
-template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
-         int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
-ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
-                                                     IntegratorState state,
-                                                     ccl_global float *ccl_restrict render_buffer)
-{
-  if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
-    if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
-      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
-    }
-    else {
-      kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
-      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-    }
-  }
-  else {
-    INTEGRATOR_PATH_TERMINATE(current_kernel);
-  }
-}
-
-ccl_device_forceinline void integrator_shade_surface_raytrace(
-    KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
-  integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(
-      kg, state, render_buffer);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shade_volume.h b/intern/cycles/kernel/integrator/integrator_shade_volume.h
deleted file mode 100644
index 44ef4803575..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shade_volume.h
+++ /dev/null
@@ -1,1049 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/film/film_accumulate.h"
-#include "kernel/film/film_passes.h"
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-#include "kernel/integrator/integrator_intersect_closest.h"
-#include "kernel/integrator/integrator_volume_stack.h"
-
-#include "kernel/light/light.h"
-#include "kernel/light/light_sample.h"
-
-#include "kernel/sample/sample_mis.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __VOLUME__
-
-/* Events for probabilistic scattering. */
-
-typedef enum VolumeIntegrateEvent {
-  VOLUME_PATH_SCATTERED = 0,
-  VOLUME_PATH_ATTENUATED = 1,
-  VOLUME_PATH_MISSED = 2
-} VolumeIntegrateEvent;
-
-typedef struct VolumeIntegrateResult {
-  /* Throughput and offset for direct light scattering. */
-  bool direct_scatter;
-  float3 direct_throughput;
-  float direct_t;
-  ShaderVolumePhases direct_phases;
-
-  /* Throughput and offset for indirect light scattering. */
-  bool indirect_scatter;
-  float3 indirect_throughput;
-  float indirect_t;
-  ShaderVolumePhases indirect_phases;
-} VolumeIntegrateResult;
-
-/* Ignore paths that have volume throughput below this value, to avoid unnecessary work
- * and precision issues.
- * todo: this value could be tweaked or turned into a probability to avoid unnecessary
- * work in volumes and subsurface scattering. */
-#  define VOLUME_THROUGHPUT_EPSILON 1e-6f
-
-/* Volume shader properties
- *
- * extinction coefficient = absorption coefficient + scattering coefficient
- * sigma_t = sigma_a + sigma_s */
-
-typedef struct VolumeShaderCoefficients {
-  float3 sigma_t;
-  float3 sigma_s;
-  float3 emission;
-} VolumeShaderCoefficients;
-
-/* Evaluate shader to get extinction coefficient at P. */
-ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
-                                                   IntegratorShadowState state,
-                                                   ccl_private ShaderData *ccl_restrict sd,
-                                                   ccl_private float3 *ccl_restrict extinction)
-{
-  shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, [=](const int i) {
-    return integrator_state_read_shadow_volume_stack(state, i);
-  });
-
-  if (!(sd->flag & SD_EXTINCTION)) {
-    return false;
-  }
-
-  const float density = object_volume_density(kg, sd->object);
-  *extinction = sd->closure_transparent_extinction * density;
-  return true;
-}
-
-/* Evaluate shader to get absorption, scattering and emission at P. */
-ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
-                                            IntegratorState state,
-                                            ccl_private ShaderData *ccl_restrict sd,
-                                            ccl_private VolumeShaderCoefficients *coeff)
-{
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  shader_eval_volume<false>(kg, state, sd, path_flag, [=](const int i) {
-    return integrator_state_read_volume_stack(state, i);
-  });
-
-  if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
-    return false;
-  }
-
-  coeff->sigma_s = zero_float3();
-  coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3();
-  coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3();
-
-  if (sd->flag & SD_SCATTER) {
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-      if (CLOSURE_IS_VOLUME(sc->type)) {
-        coeff->sigma_s += sc->weight;
-      }
-    }
-  }
-
-  const float density = object_volume_density(kg, sd->object);
-  coeff->sigma_s *= density;
-  coeff->sigma_t *= density;
-  coeff->emission *= density;
-
-  return true;
-}
-
-ccl_device_forceinline void volume_step_init(KernelGlobals kg,
-                                             ccl_private const RNGState *rng_state,
-                                             const float object_step_size,
-                                             float t,
-                                             ccl_private float *step_size,
-                                             ccl_private float *step_shade_offset,
-                                             ccl_private float *steps_offset,
-                                             ccl_private int *max_steps)
-{
-  if (object_step_size == FLT_MAX) {
-    /* Homogeneous volume. */
-    *step_size = t;
-    *step_shade_offset = 0.0f;
-    *steps_offset = 1.0f;
-    *max_steps = 1;
-  }
-  else {
-    /* Heterogeneous volume. */
-    *max_steps = kernel_data.integrator.volume_max_steps;
-    float step = min(object_step_size, t);
-
-    /* compute exact steps in advance for malloc */
-    if (t > *max_steps * step) {
-      step = t / (float)*max_steps;
-    }
-
-    *step_size = step;
-
-    /* Perform shading at this offset within a step, to integrate over
-     * over the entire step segment. */
-    *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4);
-
-    /* Shift starting point of all segment by this random amount to avoid
-     * banding artifacts from the volume bounding shape. */
-    *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3);
-  }
-}
-
-/* Volume Shadows
- *
- * These functions are used to attenuate shadow rays to lights. Both absorption
- * and scattering will block light, represented by the extinction coefficient. */
-
-#  if 0
-/* homogeneous volume: assume shader evaluation at the starts gives
- * the extinction coefficient for the entire line segment */
-ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state,
-                                          ccl_private Ray *ccl_restrict ray,
-                                          ccl_private ShaderData *ccl_restrict sd,
-                                          ccl_global float3 *ccl_restrict throughput)
-{
-  float3 sigma_t = zero_float3();
-
-  if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
-    *throughput *= volume_color_transmittance(sigma_t, ray->t);
-  }
-}
-#  endif
-
-/* heterogeneous volume: integrate stepping through the volume until we
- * reach the end, get absorbed entirely, or run out of iterations */
-ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
-                                            IntegratorShadowState state,
-                                            ccl_private Ray *ccl_restrict ray,
-                                            ccl_private ShaderData *ccl_restrict sd,
-                                            ccl_private float3 *ccl_restrict throughput,
-                                            const float object_step_size)
-{
-  /* Load random number state. */
-  RNGState rng_state;
-  shadow_path_state_rng_load(state, &rng_state);
-
-  float3 tp = *throughput;
-
-  /* Prepare for stepping.
-   * For shadows we do not offset all segments, since the starting point is
-   * already a random distance inside the volume. It also appears to create
-   * banding artifacts for unknown reasons. */
-  int max_steps;
-  float step_size, step_shade_offset, unused;
-  volume_step_init(kg,
-                   &rng_state,
-                   object_step_size,
-                   ray->t,
-                   &step_size,
-                   &step_shade_offset,
-                   &unused,
-                   &max_steps);
-  const float steps_offset = 1.0f;
-
-  /* compute extinction at the start */
-  float t = 0.0f;
-
-  float3 sum = zero_float3();
-
-  for (int i = 0; i < max_steps; i++) {
-    /* advance to new position */
-    float new_t = min(ray->t, (i + steps_offset) * step_size);
-    float dt = new_t - t;
-
-    float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
-    float3 sigma_t = zero_float3();
-
-    /* compute attenuation over segment */
-    sd->P = new_P;
-    if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
-      /* Compute `expf()` only for every Nth step, to save some calculations
-       * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON
-       * check then. */
-      sum += (-sigma_t * dt);
-      if ((i & 0x07) == 0) { /* TODO: Other interval? */
-        tp = *throughput * exp3(sum);
-
-        /* stop if nearly all light is blocked */
-        if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON &&
-            tp.z < VOLUME_THROUGHPUT_EPSILON)
-          break;
-      }
-    }
-
-    /* stop if at the end of the volume */
-    t = new_t;
-    if (t == ray->t) {
-      /* Update throughput in case we haven't done it above */
-      tp = *throughput * exp3(sum);
-      break;
-    }
-  }
-
-  *throughput = tp;
-}
-
-/* Equi-angular sampling as in:
- * "Importance Sampling Techniques for Path Tracing in Participating Media" */
-
-ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
-                                           const float3 light_P,
-                                           const float xi,
-                                           ccl_private float *pdf)
-{
-  const float t = ray->t;
-  const float delta = dot((light_P - ray->P), ray->D);
-  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-  if (UNLIKELY(D == 0.0f)) {
-    *pdf = 0.0f;
-    return 0.0f;
-  }
-  const float theta_a = -atan2f(delta, D);
-  const float theta_b = atan2f(t - delta, D);
-  const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
-  if (UNLIKELY(theta_b == theta_a)) {
-    *pdf = 0.0f;
-    return 0.0f;
-  }
-  *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
-
-  return min(t, delta + t_); /* min is only for float precision errors */
-}
-
-ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
-                                        const float3 light_P,
-                                        const float sample_t)
-{
-  const float delta = dot((light_P - ray->P), ray->D);
-  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-  if (UNLIKELY(D == 0.0f)) {
-    return 0.0f;
-  }
-
-  const float t = ray->t;
-  const float t_ = sample_t - delta;
-
-  const float theta_a = -atan2f(delta, D);
-  const float theta_b = atan2f(t - delta, D);
-  if (UNLIKELY(theta_b == theta_a)) {
-    return 0.0f;
-  }
-
-  const float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
-
-  return pdf;
-}
-
-ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
-                                        const float3 light_P,
-                                        const float sample_t)
-{
-  float delta = dot((light_P - ray->P), ray->D);
-  float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
-  if (UNLIKELY(D == 0.0f)) {
-    return 0.0f;
-  }
-
-  const float t = ray->t;
-  const float t_ = sample_t - delta;
-
-  const float theta_a = -atan2f(delta, D);
-  const float theta_b = atan2f(t - delta, D);
-  if (UNLIKELY(theta_b == theta_a)) {
-    return 0.0f;
-  }
-
-  const float theta_sample = atan2f(t_, D);
-  const float cdf = (theta_sample - theta_a) / (theta_b - theta_a);
-
-  return cdf;
-}
-
-/* Distance sampling */
-
-ccl_device float volume_distance_sample(float max_t,
-                                        float3 sigma_t,
-                                        int channel,
-                                        float xi,
-                                        ccl_private float3 *transmittance,
-                                        ccl_private float3 *pdf)
-{
-  /* xi is [0, 1[ so log(0) should never happen, division by zero is
-   * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
-  float sample_sigma_t = volume_channel_get(sigma_t, channel);
-  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
-  float sample_transmittance = volume_channel_get(full_transmittance, channel);
-
-  float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
-
-  *transmittance = volume_color_transmittance(sigma_t, sample_t);
-  *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance);
-
-  /* todo: optimization: when taken together with hit/miss decision,
-   * the full_transmittance cancels out drops out and xi does not
-   * need to be remapped */
-
-  return sample_t;
-}
-
-ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
-{
-  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
-  float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
-
-  return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance);
-}
-
-/* Emission */
-
-ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
-                                            int closure_flag,
-                                            float3 transmittance,
-                                            float t)
-{
-  /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
-   * this goes to E * t as sigma_t goes to zero
-   *
-   * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
-  float3 emission = coeff->emission;
-
-  if (closure_flag & SD_EXTINCTION) {
-    float3 sigma_t = coeff->sigma_t;
-
-    emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
-    emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
-    emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
-  }
-  else
-    emission *= t;
-
-  return emission;
-}
-
-/* Volume Integration */
-
-typedef struct VolumeIntegrateState {
-  /* Volume segment extents. */
-  float start_t;
-  float end_t;
-
-  /* If volume is absorption-only up to this point, and no probabilistic
-   * scattering or termination has been used yet. */
-  bool absorption_only;
-
-  /* Random numbers for scattering. */
-  float rscatter;
-  float rphase;
-
-  /* Multiple importance sampling. */
-  VolumeSampleMethod direct_sample_method;
-  bool use_mis;
-  float distance_pdf;
-  float equiangular_pdf;
-} VolumeIntegrateState;
-
-ccl_device_forceinline void volume_integrate_step_scattering(
-    ccl_private const ShaderData *sd,
-    ccl_private const Ray *ray,
-    const float3 equiangular_light_P,
-    ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
-    const float3 transmittance,
-    ccl_private VolumeIntegrateState &ccl_restrict vstate,
-    ccl_private VolumeIntegrateResult &ccl_restrict result)
-{
-  /* Pick random color channel, we use the Veach one-sample
-   * model with balance heuristic for the channels. */
-  const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
-  float3 channel_pdf;
-  const int channel = volume_sample_channel(
-      albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
-
-  /* Equiangular sampling for direct lighting. */
-  if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
-    if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) {
-      const float new_dt = result.direct_t - vstate.start_t;
-      const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
-
-      result.direct_scatter = true;
-      result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf;
-      shader_copy_volume_phases(&result.direct_phases, sd);
-
-      /* Multiple importance sampling. */
-      if (vstate.use_mis) {
-        const float distance_pdf = vstate.distance_pdf *
-                                   dot(channel_pdf, coeff.sigma_t * new_transmittance);
-        const float mis_weight = 2.0f * power_heuristic(vstate.equiangular_pdf, distance_pdf);
-        result.direct_throughput *= mis_weight;
-      }
-    }
-    else {
-      result.direct_throughput *= transmittance;
-      vstate.distance_pdf *= dot(channel_pdf, transmittance);
-    }
-  }
-
-  /* Distance sampling for indirect and optional direct lighting. */
-  if (!result.indirect_scatter) {
-    /* decide if we will scatter or continue */
-    const float sample_transmittance = volume_channel_get(transmittance, channel);
-
-    if (1.0f - vstate.rscatter >= sample_transmittance) {
-      /* compute sampling distance */
-      const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
-      const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
-      const float new_t = vstate.start_t + new_dt;
-
-      /* transmittance and pdf */
-      const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
-      const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
-
-      /* throughput */
-      result.indirect_scatter = true;
-      result.indirect_t = new_t;
-      result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
-      shader_copy_volume_phases(&result.indirect_phases, sd);
-
-      if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
-        /* If using distance sampling for direct light, just copy parameters
-         * of indirect light since we scatter at the same point then. */
-        result.direct_scatter = true;
-        result.direct_t = result.indirect_t;
-        result.direct_throughput = result.indirect_throughput;
-        shader_copy_volume_phases(&result.direct_phases, sd);
-
-        /* Multiple importance sampling. */
-        if (vstate.use_mis) {
-          const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
-          const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
-                                                   equiangular_pdf);
-          result.direct_throughput *= 2.0f * mis_weight;
-        }
-      }
-    }
-    else {
-      /* throughput */
-      const float pdf = dot(channel_pdf, transmittance);
-      result.indirect_throughput *= transmittance / pdf;
-      if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
-        vstate.distance_pdf *= pdf;
-      }
-
-      /* remap rscatter so we can reuse it and keep thing stratified */
-      vstate.rscatter = 1.0f - (1.0f - vstate.rscatter) / sample_transmittance;
-    }
-  }
-}
-
-/* heterogeneous volume distance sampling: integrate stepping through the
- * volume until we reach the end, get absorbed entirely, or run out of
- * iterations. this does probabilistically scatter or get transmitted through
- * for path tracing where we don't want to branch. */
-ccl_device_forceinline void volume_integrate_heterogeneous(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private Ray *ccl_restrict ray,
-    ccl_private ShaderData *ccl_restrict sd,
-    ccl_private const RNGState *rng_state,
-    ccl_global float *ccl_restrict render_buffer,
-    const float object_step_size,
-    const VolumeSampleMethod direct_sample_method,
-    const float3 equiangular_light_P,
-    ccl_private VolumeIntegrateResult &result)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
-
-  /* Prepare for stepping.
-   * Using a different step offset for the first step avoids banding artifacts. */
-  int max_steps;
-  float step_size, step_shade_offset, steps_offset;
-  volume_step_init(kg,
-                   rng_state,
-                   object_step_size,
-                   ray->t,
-                   &step_size,
-                   &step_shade_offset,
-                   &steps_offset,
-                   &max_steps);
-
-  /* Initialize volume integration state. */
-  VolumeIntegrateState vstate ccl_optional_struct_init;
-  vstate.start_t = 0.0f;
-  vstate.end_t = 0.0f;
-  vstate.absorption_only = true;
-  vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
-  vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
-
-  /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */
-  vstate.direct_sample_method = direct_sample_method;
-  vstate.use_mis = (direct_sample_method == VOLUME_SAMPLE_MIS);
-  if (vstate.use_mis) {
-    if (vstate.rscatter < 0.5f) {
-      vstate.rscatter *= 2.0f;
-      vstate.direct_sample_method = VOLUME_SAMPLE_DISTANCE;
-    }
-    else {
-      vstate.rscatter = (vstate.rscatter - 0.5f) * 2.0f;
-      vstate.direct_sample_method = VOLUME_SAMPLE_EQUIANGULAR;
-    }
-  }
-  vstate.equiangular_pdf = 0.0f;
-  vstate.distance_pdf = 1.0f;
-
-  /* Initialize volume integration result. */
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  result.direct_throughput = throughput;
-  result.indirect_throughput = throughput;
-
-  /* Equiangular sampling: compute distance and PDF in advance. */
-  if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
-    result.direct_t = volume_equiangular_sample(
-        ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
-  }
-
-#  ifdef __DENOISING_FEATURES__
-  const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
-                                         PATH_RAY_DENOISING_FEATURES);
-  float3 accum_albedo = zero_float3();
-#  endif
-  float3 accum_emission = zero_float3();
-
-  for (int i = 0; i < max_steps; i++) {
-    /* Advance to new position */
-    vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
-    const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
-    sd->P = ray->P + ray->D * shade_t;
-
-    /* compute segment */
-    VolumeShaderCoefficients coeff ccl_optional_struct_init;
-    if (volume_shader_sample(kg, state, sd, &coeff)) {
-      const int closure_flag = sd->flag;
-
-      /* Evaluate transmittance over segment. */
-      const float dt = (vstate.end_t - vstate.start_t);
-      const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
-                                       volume_color_transmittance(coeff.sigma_t, dt) :
-                                       one_float3();
-
-      /* Emission. */
-      if (closure_flag & SD_EMISSION) {
-        /* Only write emission before indirect light scatter position, since we terminate
-         * stepping at that point if we have already found a direct light scatter position. */
-        if (!result.indirect_scatter) {
-          const float3 emission = volume_emission_integrate(
-              &coeff, closure_flag, transmittance, dt);
-          accum_emission += emission;
-        }
-      }
-
-      if (closure_flag & SD_EXTINCTION) {
-        if ((closure_flag & SD_SCATTER) || !vstate.absorption_only) {
-#  ifdef __DENOISING_FEATURES__
-          /* Accumulate albedo for denoising features. */
-          if (write_denoising_features && (closure_flag & SD_SCATTER)) {
-            const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
-            accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance);
-          }
-#  endif
-
-          /* Scattering and absorption. */
-          volume_integrate_step_scattering(
-              sd, ray, equiangular_light_P, coeff, transmittance, vstate, result);
-        }
-        else {
-          /* Absorption only. */
-          result.indirect_throughput *= transmittance;
-          result.direct_throughput *= transmittance;
-        }
-
-        /* Stop if nearly all light blocked. */
-        if (!result.indirect_scatter) {
-          if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
-            result.indirect_throughput = zero_float3();
-            break;
-          }
-        }
-        else if (!result.direct_scatter) {
-          if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) {
-            break;
-          }
-        }
-      }
-
-      /* If we have scattering data for both direct and indirect, we're done. */
-      if (result.direct_scatter && result.indirect_scatter) {
-        break;
-      }
-    }
-
-    /* Stop if at the end of the volume. */
-    vstate.start_t = vstate.end_t;
-    if (vstate.start_t == ray->t) {
-      break;
-    }
-  }
-
-  /* Write accumulated emission. */
-  if (!is_zero(accum_emission)) {
-    kernel_accum_emission(kg, state, result.indirect_throughput, accum_emission, render_buffer);
-  }
-
-#  ifdef __DENOISING_FEATURES__
-  /* Write denoising features. */
-  if (write_denoising_features) {
-    kernel_write_denoising_features_volume(
-        kg, state, accum_albedo, result.indirect_scatter, render_buffer);
-  }
-#  endif /* __DENOISING_FEATURES__ */
-}
-
-#  ifdef __EMISSION__
-/* Path tracing: sample point on light and evaluate light shader, then
- * queue shadow ray to be traced. */
-ccl_device_forceinline bool integrate_volume_sample_light(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private const ShaderData *ccl_restrict sd,
-    ccl_private const RNGState *ccl_restrict rng_state,
-    ccl_private LightSample *ccl_restrict ls)
-{
-  /* Test if there is a light or BSDF that needs direct light. */
-  if (!kernel_data.integrator.use_direct_light) {
-    return false;
-  }
-
-  /* Sample position on a light. */
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-  float light_u, light_v;
-  path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
-
-  light_distribution_sample_from_volume_segment(
-      kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls);
-
-  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
-    return false;
-  }
-
-  return true;
-}
-
-/* Path tracing: sample point on light and evaluate light shader, then
- * queue shadow ray to be traced. */
-ccl_device_forceinline void integrate_volume_direct_light(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private const ShaderData *ccl_restrict sd,
-    ccl_private const RNGState *ccl_restrict rng_state,
-    const float3 P,
-    ccl_private const ShaderVolumePhases *ccl_restrict phases,
-    ccl_private const float3 throughput,
-    ccl_private LightSample *ccl_restrict ls)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
-
-  if (!kernel_data.integrator.use_direct_light) {
-    return;
-  }
-
-  /* Sample position on the same light again, now from the shading
-   * point where we scattered.
-   *
-   * TODO: decorrelate random numbers and use light_sample_new_position to
-   * avoid resampling the CDF. */
-  {
-    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
-    float light_u, light_v;
-    path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
-
-    if (!light_distribution_sample_from_position(
-            kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) {
-      return;
-    }
-  }
-
-  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
-    return;
-  }
-
-  /* Evaluate light shader.
-   *
-   * TODO: can we reuse sd memory? In theory we can move this after
-   * integrate_surface_bounce, evaluate the BSDF, and only then evaluate
-   * the light shader. This could also move to its own kernel, for
-   * non-constant light sources. */
-  ShaderDataTinyStorage emission_sd_storage;
-  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
-  if (is_zero(light_eval)) {
-    return;
-  }
-
-  /* Evaluate BSDF. */
-  BsdfEval phase_eval ccl_optional_struct_init;
-  const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval);
-
-  if (ls->shader & SHADER_USE_MIS) {
-    float mis_weight = power_heuristic(ls->pdf, phase_pdf);
-    bsdf_eval_mul(&phase_eval, mis_weight);
-  }
-
-  bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf);
-
-  /* Path termination. */
-  const float terminate = path_state_rng_light_termination(kg, rng_state);
-  if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
-    return;
-  }
-
-  /* Create shadow ray. */
-  Ray ray ccl_optional_struct_init;
-  light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
-  const bool is_light = light_sample_is_light(ls);
-
-  /* Branch off shadow kernel. */
-  INTEGRATOR_SHADOW_PATH_INIT(
-      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
-
-  /* Write shadow ray and associated state to global memory. */
-  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
-
-  /* Copy state from main path to shadow path. */
-  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
-  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
-  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
-  shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
-  shadow_flag |= PATH_RAY_VOLUME_PASS;
-  const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    const float3 diffuse_glossy_ratio = (bounce == 0) ?
-                                            one_float3() :
-                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
-  }
-
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
-      state, path, render_pixel_index);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
-      state, path, rng_offset);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
-      state, path, rng_hash);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
-      state, path, sample);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce;
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_bounce) = INTEGRATOR_STATE(
-      state, path, diffuse_bounce);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
-      state, path, glossy_bounce);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
-      state, path, transmission_bounce);
-  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput_phase;
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput;
-  }
-
-  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
-}
-#  endif
-
-/* Path tracing: scatter in new direction using phase function */
-ccl_device_forceinline bool integrate_volume_phase_scatter(
-    KernelGlobals kg,
-    IntegratorState state,
-    ccl_private ShaderData *sd,
-    ccl_private const RNGState *rng_state,
-    ccl_private const ShaderVolumePhases *phases)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
-
-  float phase_u, phase_v;
-  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v);
-
-  /* Phase closure, sample direction. */
-  float phase_pdf;
-  BsdfEval phase_eval ccl_optional_struct_init;
-  float3 phase_omega_in ccl_optional_struct_init;
-  differential3 phase_domega_in ccl_optional_struct_init;
-
-  const int label = shader_volume_phase_sample(kg,
-                                               sd,
-                                               phases,
-                                               phase_u,
-                                               phase_v,
-                                               &phase_eval,
-                                               &phase_omega_in,
-                                               &phase_domega_in,
-                                               &phase_pdf);
-
-  if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
-    return false;
-  }
-
-  /* Setup ray. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
-  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
-
-#  ifdef __RAY_DIFFERENTIALS__
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
-#  endif
-
-  /* Update throughput. */
-  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
-  const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
-  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
-  }
-
-  /* Update path state */
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
-  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
-  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
-      phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
-
-  path_state_next(kg, state, label);
-  return true;
-}
-
-/* get the volume attenuation and emission over line segment defined by
- * ray, with the assumption that there are no surfaces blocking light
- * between the endpoints. distance sampling is used to decide if we will
- * scatter or not. */
-ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
-                                                 IntegratorState state,
-                                                 ccl_private Ray *ccl_restrict ray,
-                                                 ccl_global float *ccl_restrict render_buffer)
-{
-  ShaderData sd;
-  shader_setup_from_volume(kg, &sd, ray);
-
-  /* Load random number state. */
-  RNGState rng_state;
-  path_state_rng_load(state, &rng_state);
-
-  /* Sample light ahead of volume stepping, for equiangular sampling. */
-  /* TODO: distant lights are ignored now, but could instead use even distribution. */
-  LightSample ls ccl_optional_struct_init;
-  const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
-  const bool have_equiangular_sample = need_light_sample &&
-                                       integrate_volume_sample_light(
-                                           kg, state, &sd, &rng_state, &ls) &&
-                                       (ls.t != FLT_MAX);
-
-  VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
-                                                volume_stack_sample_method(kg, state) :
-                                                VOLUME_SAMPLE_DISTANCE;
-
-  /* Step through volume. */
-  const float step_size = volume_stack_step_size(
-      kg, [=](const int i) { return integrator_state_read_volume_stack(state, i); });
-
-  /* TODO: expensive to zero closures? */
-  VolumeIntegrateResult result = {};
-  volume_integrate_heterogeneous(kg,
-                                 state,
-                                 ray,
-                                 &sd,
-                                 &rng_state,
-                                 render_buffer,
-                                 step_size,
-                                 direct_sample_method,
-                                 ls.P,
-                                 result);
-
-  /* Perform path termination. The intersect_closest will have already marked this path
-   * to be terminated. That will shading evaluating to leave out any scattering closures,
-   * but emission and absorption are still handled for multiple importance sampling. */
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-  const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
-                                0.0f :
-                                path_state_continuation_probability(kg, state, path_flag);
-  if (probability == 0.0f) {
-    return VOLUME_PATH_MISSED;
-  }
-
-  /* Direct light. */
-  if (result.direct_scatter) {
-    const float3 direct_P = ray->P + result.direct_t * ray->D;
-    result.direct_throughput /= probability;
-    integrate_volume_direct_light(kg,
-                                  state,
-                                  &sd,
-                                  &rng_state,
-                                  direct_P,
-                                  &result.direct_phases,
-                                  result.direct_throughput,
-                                  &ls);
-  }
-
-  /* Indirect light.
-   *
-   * Only divide throughput by probability if we scatter. For the attenuation
-   * case the next surface will already do this division. */
-  if (result.indirect_scatter) {
-    result.indirect_throughput /= probability;
-  }
-  INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
-
-  if (result.indirect_scatter) {
-    sd.P = ray->P + result.indirect_t * ray->D;
-
-    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
-      return VOLUME_PATH_SCATTERED;
-    }
-    else {
-      return VOLUME_PATH_MISSED;
-    }
-  }
-  else {
-    return VOLUME_PATH_ATTENUATED;
-  }
-}
-
-#endif
-
-ccl_device void integrator_shade_volume(KernelGlobals kg,
-                                        IntegratorState state,
-                                        ccl_global float *ccl_restrict render_buffer)
-{
-  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP);
-
-#ifdef __VOLUME__
-  /* Setup shader data. */
-  Ray ray ccl_optional_struct_init;
-  integrator_state_read_ray(kg, state, &ray);
-
-  Intersection isect ccl_optional_struct_init;
-  integrator_state_read_isect(kg, state, &isect);
-
-  /* Set ray length to current segment. */
-  ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
-
-  /* Clean volume stack for background rays. */
-  if (isect.prim == PRIM_NONE) {
-    volume_stack_clean(kg, state);
-  }
-
-  VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer);
-
-  if (event == VOLUME_PATH_SCATTERED) {
-    /* Queue intersect_closest kernel. */
-    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
-                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
-    return;
-  }
-  else if (event == VOLUME_PATH_MISSED) {
-    /* End path. */
-    INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
-    return;
-  }
-  else {
-    /* Continue to background, light or surface. */
-    if (isect.prim == PRIM_NONE) {
-      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
-      return;
-    }
-    else if (isect.type & PRIMITIVE_LAMP) {
-      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
-                           DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
-      return;
-    }
-    else {
-      /* Hit a surface, continue with surface kernel unless terminated. */
-      const int shader = intersection_get_shader(kg, &isect);
-      const int flags = kernel_tex_fetch(__shaders, shader).flags;
-
-      integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
-          kg, state, &isect, shader, flags);
-      return;
-    }
-  }
-#endif /* __VOLUME__ */
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shader_eval.h b/intern/cycles/kernel/integrator/integrator_shader_eval.h
deleted file mode 100644
index 04a3a965fd3..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shader_eval.h
+++ /dev/null
@@ -1,869 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Functions to evaluate shaders and use the resulting shader closures. */
-
-#pragma once
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/bsdf.h"
-#include "kernel/closure/emissive.h"
-
-#include "kernel/film/film_accumulate.h"
-
-#include "kernel/svm/svm.h"
-
-#ifdef __OSL__
-#  include "kernel/osl/osl_shader.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Merging */
-
-#if defined(__VOLUME__)
-ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd)
-{
-  /* Merge identical closures to save closure space with stacked volumes. */
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private ShaderClosure *sci = &sd->closure[i];
-
-    if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-      continue;
-    }
-
-    for (int j = i + 1; j < sd->num_closure; j++) {
-      ccl_private ShaderClosure *scj = &sd->closure[j];
-      if (sci->type != scj->type) {
-        continue;
-      }
-
-      ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
-          sci;
-      ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
-          scj;
-      if (!(hgi->g == hgj->g)) {
-        continue;
-      }
-
-      sci->weight += scj->weight;
-      sci->sample_weight += scj->sample_weight;
-
-      int size = sd->num_closure - (j + 1);
-      if (size > 0) {
-        for (int k = 0; k < size; k++) {
-          scj[k] = scj[k + 1];
-        }
-      }
-
-      sd->num_closure--;
-      kernel_assert(sd->num_closure >= 0);
-      j--;
-    }
-  }
-}
-
-ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict
-                                                     phases,
-                                                 ccl_private const ShaderData *ccl_restrict sd)
-{
-  phases->num_closure = 0;
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *from_sc = &sd->closure[i];
-    ccl_private const HenyeyGreensteinVolume *from_hg =
-        (ccl_private const HenyeyGreensteinVolume *)from_sc;
-
-    if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-      ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
-
-      to_sc->weight = from_sc->weight;
-      to_sc->sample_weight = from_sc->sample_weight;
-      to_sc->g = from_hg->g;
-      phases->num_closure++;
-      if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
-        break;
-      }
-    }
-  }
-}
-#endif /* __VOLUME__ */
-
-ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
-                                                       ConstIntegratorState state,
-                                                       ccl_private ShaderData *sd)
-{
-  /* Defensive sampling.
-   *
-   * We can likely also do defensive sampling at deeper bounces, particularly
-   * for cases like a perfect mirror but possibly also others. This will need
-   * a good heuristic. */
-  if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
-          0 &&
-      sd->num_closure > 1) {
-    float sum = 0.0f;
-
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private ShaderClosure *sc = &sd->closure[i];
-      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-        sum += sc->sample_weight;
-      }
-    }
-
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private ShaderClosure *sc = &sd->closure[i];
-      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-        sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
-      }
-    }
-  }
-
-  /* Filter glossy.
-   *
-   * Blurring of bsdf after bounces, for rays that have a small likelihood
-   * of following this particular path (diffuse, rough glossy) */
-  if (kernel_data.integrator.filter_glossy != FLT_MAX) {
-    float blur_pdf = kernel_data.integrator.filter_glossy *
-                     INTEGRATOR_STATE(state, path, min_ray_pdf);
-
-    if (blur_pdf < 1.0f) {
-      float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
-
-      for (int i = 0; i < sd->num_closure; i++) {
-        ccl_private ShaderClosure *sc = &sd->closure[i];
-        if (CLOSURE_IS_BSDF(sc->type)) {
-          bsdf_blur(kg, sc, blur_roughness);
-        }
-      }
-    }
-  }
-}
-
-/* BSDF */
-
-ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd,
-                                                   const float3 omega_in)
-{
-  return dot(sd->N, omega_in) < 0.0f;
-}
-
-ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags)
-{
-  if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) {
-    return false;
-  }
-  if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) {
-    if (CLOSURE_IS_BSDF_DIFFUSE(type)) {
-      return true;
-    }
-  }
-  if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) {
-    if (CLOSURE_IS_BSDF_GLOSSY(type)) {
-      return true;
-    }
-  }
-  if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) {
-    if (CLOSURE_IS_BSDF_TRANSMISSION(type)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
-                                                ccl_private ShaderData *sd,
-                                                const float3 omega_in,
-                                                const bool is_transmission,
-                                                ccl_private const ShaderClosure *skip_sc,
-                                                ccl_private BsdfEval *result_eval,
-                                                float sum_pdf,
-                                                float sum_sample_weight,
-                                                const uint light_shader_flags)
-{
-  /* This is the veach one-sample model with balance heuristic,
-   * some PDF factors drop out when using balance heuristic weighting. */
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (sc == skip_sc) {
-      continue;
-    }
-
-    if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-      if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) {
-        float bsdf_pdf = 0.0f;
-        float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
-
-        if (bsdf_pdf != 0.0f) {
-          const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
-          bsdf_eval_accum(result_eval, is_diffuse, eval * sc->weight, 1.0f);
-          sum_pdf += bsdf_pdf * sc->sample_weight;
-        }
-      }
-
-      sum_sample_weight += sc->sample_weight;
-    }
-  }
-
-  return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
-}
-
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_inline
-#endif
-    float
-    shader_bsdf_eval(KernelGlobals kg,
-                     ccl_private ShaderData *sd,
-                     const float3 omega_in,
-                     const bool is_transmission,
-                     ccl_private BsdfEval *bsdf_eval,
-                     const uint light_shader_flags)
-{
-  bsdf_eval_init(bsdf_eval, false, zero_float3());
-
-  return _shader_bsdf_multi_eval(
-      kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
-}
-
-/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
-ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
-    ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
-{
-  int sampled = 0;
-
-  if (sd->num_closure > 1) {
-    /* Pick a BSDF or based on sample weights. */
-    float sum = 0.0f;
-
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-        sum += sc->sample_weight;
-      }
-    }
-
-    float r = (*randu) * sum;
-    float partial_sum = 0.0f;
-
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-        float next_sum = partial_sum + sc->sample_weight;
-
-        if (r < next_sum) {
-          sampled = i;
-
-          /* Rescale to reuse for direction sample, to better preserve stratification. */
-          *randu = (r - partial_sum) / sc->sample_weight;
-          break;
-        }
-
-        partial_sum = next_sum;
-      }
-    }
-  }
-
-  return &sd->closure[sampled];
-}
-
-/* Return weight for picked BSSRDF. */
-ccl_device_inline float3
-shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
-                            ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
-{
-  float3 weight = bssrdf_sc->weight;
-
-  if (sd->num_closure > 1) {
-    float sum = 0.0f;
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
-        sum += sc->sample_weight;
-      }
-    }
-    weight *= sum / bssrdf_sc->sample_weight;
-  }
-
-  return weight;
-}
-
-/* Sample direction for picked BSDF, and return evaluation and pdf for all
- * BSDFs combined using MIS. */
-ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private const ShaderClosure *sc,
-                                          float randu,
-                                          float randv,
-                                          ccl_private BsdfEval *bsdf_eval,
-                                          ccl_private float3 *omega_in,
-                                          ccl_private differential3 *domega_in,
-                                          ccl_private float *pdf)
-{
-  /* BSSRDF should already have been handled elsewhere. */
-  kernel_assert(CLOSURE_IS_BSDF(sc->type));
-
-  int label;
-  float3 eval = zero_float3();
-
-  *pdf = 0.0f;
-  label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
-
-  if (*pdf != 0.0f) {
-    const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
-    bsdf_eval_init(bsdf_eval, is_diffuse, eval * sc->weight);
-
-    if (sd->num_closure > 1) {
-      const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
-      float sweight = sc->sample_weight;
-      *pdf = _shader_bsdf_multi_eval(
-          kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0);
-    }
-  }
-
-  return label;
-}
-
-ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
-{
-  float roughness = 0.0f;
-  float sum_weight = 0.0f;
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (CLOSURE_IS_BSDF(sc->type)) {
-      /* sqrt once to undo the squaring from multiplying roughness on the
-       * two axes, and once for the squared roughness convention. */
-      float weight = fabsf(average(sc->weight));
-      roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
-      sum_weight += weight;
-    }
-  }
-
-  return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
-}
-
-ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  if (sd->flag & SD_HAS_ONLY_VOLUME) {
-    return one_float3();
-  }
-  else if (sd->flag & SD_TRANSPARENT) {
-    return sd->closure_transparent_extinction;
-  }
-  else {
-    return zero_float3();
-  }
-}
-
-ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
-{
-  if (sd->flag & SD_TRANSPARENT) {
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private ShaderClosure *sc = &sd->closure[i];
-
-      if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
-        sc->sample_weight = 0.0f;
-        sc->weight = zero_float3();
-      }
-    }
-
-    sd->flag &= ~SD_TRANSPARENT;
-  }
-}
-
-ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
-
-  alpha = max(alpha, zero_float3());
-  alpha = min(alpha, one_float3());
-
-  return alpha;
-}
-
-ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float3 eval = zero_float3();
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
-      eval += sc->weight;
-  }
-
-  return eval;
-}
-
-ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float3 eval = zero_float3();
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
-      eval += sc->weight;
-  }
-
-  return eval;
-}
-
-ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float3 eval = zero_float3();
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
-      eval += sc->weight;
-  }
-
-  return eval;
-}
-
-ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
-  float3 N = zero_float3();
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-    if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
-      N += sc->N * fabsf(average(sc->weight));
-  }
-
-  return (is_zero(N)) ? sd->N : normalize(N);
-}
-
-ccl_device float3 shader_bsdf_ao(KernelGlobals kg,
-                                 ccl_private const ShaderData *sd,
-                                 const float ao_factor,
-                                 ccl_private float3 *N_)
-{
-  float3 eval = zero_float3();
-  float3 N = zero_float3();
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
-      ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
-      eval += sc->weight * ao_factor;
-      N += bsdf->N * fabsf(average(sc->weight));
-    }
-  }
-
-  *N_ = (is_zero(N)) ? sd->N : normalize(N);
-  return eval;
-}
-
-#ifdef __SUBSURFACE__
-ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
-{
-  float3 N = zero_float3();
-
-  for (int i = 0; i < sd->num_closure; i++) {
-    ccl_private const ShaderClosure *sc = &sd->closure[i];
-
-    if (CLOSURE_IS_BSSRDF(sc->type)) {
-      ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
-      float avg_weight = fabsf(average(sc->weight));
-
-      N += bssrdf->N * avg_weight;
-    }
-  }
-
-  return (is_zero(N)) ? sd->N : normalize(N);
-}
-#endif /* __SUBSURFACE__ */
-
-/* Constant emission optimization */
-
-ccl_device bool shader_constant_emission_eval(KernelGlobals kg,
-                                              int shader,
-                                              ccl_private float3 *eval)
-{
-  int shader_index = shader & SHADER_MASK;
-  int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
-
-  if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
-    *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
-                        kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
-                        kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
-
-    return true;
-  }
-
-  return false;
-}
-
-/* Background */
-
-ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd)
-{
-  if (sd->flag & SD_EMISSION) {
-    return sd->closure_emission_background;
-  }
-  else {
-    return zero_float3();
-  }
-}
-
-/* Emission */
-
-ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
-{
-  if (sd->flag & SD_EMISSION) {
-    return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
-  }
-  else {
-    return zero_float3();
-  }
-}
-
-/* Holdout */
-
-ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd)
-{
-  float3 weight = zero_float3();
-
-  /* For objects marked as holdout, preserve transparency and remove all other
-   * closures, replacing them with a holdout weight. */
-  if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
-    if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
-      weight = one_float3() - sd->closure_transparent_extinction;
-
-      for (int i = 0; i < sd->num_closure; i++) {
-        ccl_private ShaderClosure *sc = &sd->closure[i];
-        if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
-          sc->type = NBUILTIN_CLOSURES;
-        }
-      }
-
-      sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
-    }
-    else {
-      weight = one_float3();
-    }
-  }
-  else {
-    for (int i = 0; i < sd->num_closure; i++) {
-      ccl_private const ShaderClosure *sc = &sd->closure[i];
-      if (CLOSURE_IS_HOLDOUT(sc->type)) {
-        weight += sc->weight;
-      }
-    }
-  }
-
-  return weight;
-}
-
-/* Surface Evaluation */
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-ccl_device void shader_eval_surface(KernelGlobals kg,
-                                    ConstIntegratorGenericState state,
-                                    ccl_private ShaderData *ccl_restrict sd,
-                                    ccl_global float *ccl_restrict buffer,
-                                    uint32_t path_flag)
-{
-  /* If path is being terminated, we are tracing a shadow ray or evaluating
-   * emission, then we don't need to store closures. The emission and shadow
-   * shader data also do not have a closure array to save GPU memory. */
-  int max_closures;
-  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
-    max_closures = 0;
-  }
-  else {
-    max_closures = kernel_data.max_closures;
-  }
-
-  sd->num_closure = 0;
-  sd->num_closure_left = max_closures;
-
-#ifdef __OSL__
-  if (kg->osl) {
-    if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
-      OSLShader::eval_background(kg, state, sd, path_flag);
-    }
-    else {
-      OSLShader::eval_surface(kg, state, sd, path_flag);
-    }
-  }
-  else
-#endif
-  {
-#ifdef __SVM__
-    svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
-#else
-    if (sd->object == OBJECT_NONE) {
-      sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
-      sd->flag |= SD_EMISSION;
-    }
-    else {
-      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
-          sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
-      if (bsdf != NULL) {
-        bsdf->N = sd->N;
-        sd->flag |= bsdf_diffuse_setup(bsdf);
-      }
-    }
-#endif
-  }
-}
-
-/* Volume */
-
-#ifdef __VOLUME__
-
-ccl_device_inline float _shader_volume_phase_multi_eval(
-    ccl_private const ShaderData *sd,
-    ccl_private const ShaderVolumePhases *phases,
-    const float3 omega_in,
-    int skip_phase,
-    ccl_private BsdfEval *result_eval,
-    float sum_pdf,
-    float sum_sample_weight)
-{
-  for (int i = 0; i < phases->num_closure; i++) {
-    if (i == skip_phase)
-      continue;
-
-    ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
-    float phase_pdf = 0.0f;
-    float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
-
-    if (phase_pdf != 0.0f) {
-      bsdf_eval_accum(result_eval, false, eval, 1.0f);
-      sum_pdf += phase_pdf * svc->sample_weight;
-    }
-
-    sum_sample_weight += svc->sample_weight;
-  }
-
-  return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
-}
-
-ccl_device float shader_volume_phase_eval(KernelGlobals kg,
-                                          ccl_private const ShaderData *sd,
-                                          ccl_private const ShaderVolumePhases *phases,
-                                          const float3 omega_in,
-                                          ccl_private BsdfEval *phase_eval)
-{
-  bsdf_eval_init(phase_eval, false, zero_float3());
-
-  return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
-}
-
-ccl_device int shader_volume_phase_sample(KernelGlobals kg,
-                                          ccl_private const ShaderData *sd,
-                                          ccl_private const ShaderVolumePhases *phases,
-                                          float randu,
-                                          float randv,
-                                          ccl_private BsdfEval *phase_eval,
-                                          ccl_private float3 *omega_in,
-                                          ccl_private differential3 *domega_in,
-                                          ccl_private float *pdf)
-{
-  int sampled = 0;
-
-  if (phases->num_closure > 1) {
-    /* pick a phase closure based on sample weights */
-    float sum = 0.0f;
-
-    for (sampled = 0; sampled < phases->num_closure; sampled++) {
-      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
-      sum += svc->sample_weight;
-    }
-
-    float r = randu * sum;
-    float partial_sum = 0.0f;
-
-    for (sampled = 0; sampled < phases->num_closure; sampled++) {
-      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
-      float next_sum = partial_sum + svc->sample_weight;
-
-      if (r <= next_sum) {
-        /* Rescale to reuse for BSDF direction sample. */
-        randu = (r - partial_sum) / svc->sample_weight;
-        break;
-      }
-
-      partial_sum = next_sum;
-    }
-
-    if (sampled == phases->num_closure) {
-      *pdf = 0.0f;
-      return LABEL_NONE;
-    }
-  }
-
-  /* todo: this isn't quite correct, we don't weight anisotropy properly
-   * depending on color channels, even if this is perhaps not a common case */
-  ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
-  int label;
-  float3 eval = zero_float3();
-
-  *pdf = 0.0f;
-  label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
-
-  if (*pdf != 0.0f) {
-    bsdf_eval_init(phase_eval, false, eval);
-  }
-
-  return label;
-}
-
-ccl_device int shader_phase_sample_closure(KernelGlobals kg,
-                                           ccl_private const ShaderData *sd,
-                                           ccl_private const ShaderVolumeClosure *sc,
-                                           float randu,
-                                           float randv,
-                                           ccl_private BsdfEval *phase_eval,
-                                           ccl_private float3 *omega_in,
-                                           ccl_private differential3 *domega_in,
-                                           ccl_private float *pdf)
-{
-  int label;
-  float3 eval = zero_float3();
-
-  *pdf = 0.0f;
-  label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
-
-  if (*pdf != 0.0f)
-    bsdf_eval_init(phase_eval, false, eval);
-
-  return label;
-}
-
-/* Volume Evaluation */
-
-template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
-ccl_device_inline void shader_eval_volume(KernelGlobals kg,
-                                          ConstIntegratorGenericState state,
-                                          ccl_private ShaderData *ccl_restrict sd,
-                                          const uint32_t path_flag,
-                                          StackReadOp stack_read)
-{
-  /* If path is being terminated, we are tracing a shadow ray or evaluating
-   * emission, then we don't need to store closures. The emission and shadow
-   * shader data also do not have a closure array to save GPU memory. */
-  int max_closures;
-  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
-    max_closures = 0;
-  }
-  else {
-    max_closures = kernel_data.max_closures;
-  }
-
-  /* reset closures once at the start, we will be accumulating the closures
-   * for all volumes in the stack into a single array of closures */
-  sd->num_closure = 0;
-  sd->num_closure_left = max_closures;
-  sd->flag = 0;
-  sd->object_flag = 0;
-
-  for (int i = 0;; i++) {
-    const VolumeStack entry = stack_read(i);
-    if (entry.shader == SHADER_NONE) {
-      break;
-    }
-
-    /* Setup shader-data from stack. it's mostly setup already in
-     * shader_setup_from_volume, this switching should be quick. */
-    sd->object = entry.object;
-    sd->lamp = LAMP_NONE;
-    sd->shader = entry.shader;
-
-    sd->flag &= ~SD_SHADER_FLAGS;
-    sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
-    sd->object_flag &= ~SD_OBJECT_FLAGS;
-
-    if (sd->object != OBJECT_NONE) {
-      sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
-
-#  ifdef __OBJECT_MOTION__
-      /* todo: this is inefficient for motion blur, we should be
-       * caching matrices instead of recomputing them each step */
-      shader_setup_object_transforms(kg, sd, sd->time);
-#  endif
-    }
-
-    /* evaluate shader */
-#  ifdef __SVM__
-#    ifdef __OSL__
-    if (kg->osl) {
-      OSLShader::eval_volume(kg, state, sd, path_flag);
-    }
-    else
-#    endif
-    {
-      svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
-          kg, state, sd, NULL, path_flag);
-    }
-#  endif
-
-    /* Merge closures to avoid exceeding number of closures limit. */
-    if (!shadow) {
-      if (i > 0) {
-        shader_merge_volume_closures(sd);
-      }
-    }
-  }
-}
-
-#endif /* __VOLUME__ */
-
-/* Displacement Evaluation */
-
-template<typename ConstIntegratorGenericState>
-ccl_device void shader_eval_displacement(KernelGlobals kg,
-                                         ConstIntegratorGenericState state,
-                                         ccl_private ShaderData *sd)
-{
-  sd->num_closure = 0;
-  sd->num_closure_left = 0;
-
-  /* this will modify sd->P */
-#ifdef __SVM__
-#  ifdef __OSL__
-  if (kg->osl)
-    OSLShader::eval_displacement(kg, state, sd);
-  else
-#  endif
-  {
-    svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
-        kg, state, sd, NULL, 0);
-  }
-#endif
-}
-
-/* Cryptomatte */
-
-ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader)
-{
-  return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shadow_catcher.h b/intern/cycles/kernel/integrator/integrator_shadow_catcher.h
deleted file mode 100644
index 24d03466393..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shadow_catcher.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_state_util.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Check whether current surface bounce is where path is to be split for the shadow catcher. */
-ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals kg,
-                                                                  IntegratorState state,
-                                                                  const int object_flag)
-{
-#ifdef __SHADOW_CATCHER__
-  if (!kernel_data.integrator.has_shadow_catcher) {
-    return false;
-  }
-
-  /* Check the flag first, avoiding fetches form global memory. */
-  if ((object_flag & SD_OBJECT_SHADOW_CATCHER) == 0) {
-    return false;
-  }
-  if (object_flag & SD_OBJECT_HOLDOUT_MASK) {
-    return false;
-  }
-
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if ((path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) == 0) {
-    /* Split only on primary rays, secondary bounces are to treat shadow catcher as a regular
-     * object. */
-    return false;
-  }
-
-  if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
-    return false;
-  }
-
-  return true;
-#else
-  (void)object_flag;
-  return false;
-#endif
-}
-
-/* Check whether the current path can still split. */
-ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
-                                                            ConstIntegratorState state)
-{
-  if (INTEGRATOR_PATH_IS_TERMINATED) {
-    return false;
-  }
-
-  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
-
-  if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
-    /* Shadow catcher was already hit and the state was split. No further split is allowed. */
-    return false;
-  }
-
-  return (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) != 0;
-}
-
-/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
- * after this function. */
-ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg,
-                                                   IntegratorState state,
-                                                   const int object_flags)
-{
-#ifdef __SHADOW_CATCHER__
-
-  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
-    return false;
-  }
-
-  /* The split is to be done. Mark the current state as such, so that it stops contributing to the
-   * shadow catcher matte pass, but keeps contributing to the combined pass. */
-  INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
-
-  /* Split new state from the current one. This new state will only track contribution of shadow
-   * catcher objects ignoring non-catcher objects. */
-  integrator_state_shadow_catcher_split(kg, state);
-
-  return true;
-#else
-  (void)object_flags;
-  return false;
-#endif
-}
-
-#ifdef __SHADOW_CATCHER__
-
-ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(const uint32_t path_flag)
-{
-  return (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) == 0;
-}
-
-ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t path_flag)
-{
-  return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
-}
-
-#endif /* __SHADOW_CATCHER__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_shadow_state_template.h b/intern/cycles/kernel/integrator/integrator_shadow_state_template.h
deleted file mode 100644
index 1fbadde2642..00000000000
--- a/intern/cycles/kernel/integrator/integrator_shadow_state_template.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/********************************* Shadow Path State **************************/
-
-KERNEL_STRUCT_BEGIN(shadow_path)
-/* Index of a pixel within the device render buffer. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
-/* Current sample number. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
-/* Random number generator seed. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
-/* Random number dimension offset. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
-/* Current ray bounce depth. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current transparent ray bounce depth. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current diffuse ray bounce depth. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current glossy ray bounce depth. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current transmission ray bounce depth. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* DeviceKernel bit indicating queued kernels. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
-/* enum PathRayFlag */
-KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
-/* Throughput. */
-KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
-/* Throughput for shadow pass. */
-KERNEL_STRUCT_MEMBER(shadow_path,
-                     float3,
-                     unshadowed_throughput,
-                     KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
-/* Ratio of throughput to distinguish diffuse and glossy render passes. */
-KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
-/* Number of intersections found by ray-tracing. */
-KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END(shadow_path)
-
-/********************************** Shadow Ray *******************************/
-
-KERNEL_STRUCT_BEGIN(shadow_ray)
-KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END(shadow_ray)
-
-/*********************** Shadow Intersection result **************************/
-
-/* Result from scene intersection. */
-KERNEL_STRUCT_BEGIN(shadow_isect)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, t, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, u, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END_ARRAY(shadow_isect,
-                        INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
-                        INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
-
-/**************************** Shadow Volume Stack *****************************/
-
-KERNEL_STRUCT_BEGIN(shadow_volume_stack)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME)
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
-KERNEL_STRUCT_END_ARRAY(shadow_volume_stack,
-                        KERNEL_STRUCT_VOLUME_STACK_SIZE,
-                        KERNEL_STRUCT_VOLUME_STACK_SIZE)
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
deleted file mode 100644
index 09b399ff1b8..00000000000
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Integrator State
- *
- * This file defines the data structures that define the state of a path. Any state that is
- * preserved and passed between kernel executions is part of this.
- *
- * The size of this state must be kept as small as possible, to reduce cache misses and keep memory
- * usage under control on GPUs that may execute millions of kernels.
- *
- * Memory may be allocated and passed along in different ways depending on the device. There may
- * be a scalar layout, or AoS or SoA layout for batches. The state may be passed along as a pointer
- * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract
- * these differences between devices and experiment with different layouts, macros are used.
- *
- * Use IntegratorState to pass a reference to the integrator state for the current path. These are
- * defined differently on the CPU and GPU. Use ConstIntegratorState instead of const
- * IntegratorState for passing state as read-only, to avoid oddities in typedef behavior.
- *
- * INTEGRATOR_STATE(state, x, y): read nested struct member x.y of IntegratorState
- * INTEGRATOR_STATE_WRITE(state, x, y): write to nested struct member x.y of IntegratorState
- *
- * INTEGRATOR_STATE_ARRAY(state, x, index, y): read x[index].y
- * INTEGRATOR_STATE_ARRAY_WRITE(state, x, index, y): write x[index].y
- *
- * INTEGRATOR_STATE_NULL: use to pass empty state to other functions.
- */
-
-#include "kernel/kernel_types.h"
-
-#include "util/util_types.h"
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Data structures */
-
-/* Integrator State
- *
- * CPU rendering path state with AoS layout. */
-typedef struct IntegratorShadowStateCPU {
-#define KERNEL_STRUCT_BEGIN(name) struct {
-#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
-#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
-#define KERNEL_STRUCT_END(name) \
-  } \
-  name;
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
-  } \
-  name[cpu_size];
-#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
-#include "kernel/integrator/integrator_shadow_state_template.h"
-#undef KERNEL_STRUCT_BEGIN
-#undef KERNEL_STRUCT_MEMBER
-#undef KERNEL_STRUCT_ARRAY_MEMBER
-#undef KERNEL_STRUCT_END
-#undef KERNEL_STRUCT_END_ARRAY
-} IntegratorShadowStateCPU;
-
-typedef struct IntegratorStateCPU {
-#define KERNEL_STRUCT_BEGIN(name) struct {
-#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
-#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
-#define KERNEL_STRUCT_END(name) \
-  } \
-  name;
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
-  } \
-  name[cpu_size];
-#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
-#include "kernel/integrator/integrator_state_template.h"
-#undef KERNEL_STRUCT_BEGIN
-#undef KERNEL_STRUCT_MEMBER
-#undef KERNEL_STRUCT_ARRAY_MEMBER
-#undef KERNEL_STRUCT_END
-#undef KERNEL_STRUCT_END_ARRAY
-#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
-
-  IntegratorShadowStateCPU shadow;
-  IntegratorShadowStateCPU ao;
-} IntegratorStateCPU;
-
-/* Path Queue
- *
- * Keep track of which kernels are queued to be executed next in the path
- * for GPU rendering. */
-typedef struct IntegratorQueueCounter {
-  int num_queued[DEVICE_KERNEL_INTEGRATOR_NUM];
-} IntegratorQueueCounter;
-
-/* Integrator State GPU
- *
- * GPU rendering path state with SoA layout. */
-typedef struct IntegratorStateGPU {
-#define KERNEL_STRUCT_BEGIN(name) struct {
-#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
-#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
-#define KERNEL_STRUCT_END(name) \
-  } \
-  name;
-#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
-  } \
-  name[gpu_size];
-#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
-
-#include "kernel/integrator/integrator_state_template.h"
-
-#include "kernel/integrator/integrator_shadow_state_template.h"
-
-#undef KERNEL_STRUCT_BEGIN
-#undef KERNEL_STRUCT_MEMBER
-#undef KERNEL_STRUCT_ARRAY_MEMBER
-#undef KERNEL_STRUCT_END
-#undef KERNEL_STRUCT_END_ARRAY
-#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
-
-  /* Count number of queued kernels. */
-  ccl_global IntegratorQueueCounter *queue_counter;
-
-  /* Count number of kernels queued for specific shaders. */
-  ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
-
-  /* Index of shadow path which will be used by a next shadow path.  */
-  ccl_global int *next_shadow_path_index;
-
-  /* Index of main path which will be used by a next shadow catcher split.  */
-  ccl_global int *next_main_path_index;
-} IntegratorStateGPU;
-
-/* Abstraction
- *
- * Macros to access data structures on different devices.
- *
- * Note that there is a special access function for the shadow catcher state. This access is to
- * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
- * from a kernel which operates on a shadow catcher state will cause bad memory access. */
-
-#ifdef __KERNEL_CPU__
-
-/* Scalar access on CPU. */
-
-typedef IntegratorStateCPU *ccl_restrict IntegratorState;
-typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState;
-typedef IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState;
-typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
-
-#  define INTEGRATOR_STATE_NULL nullptr
-
-#  define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member)
-#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member)
-
-#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
-    ((state)->nested_struct[array_index].member)
-#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
-    ((state)->nested_struct[array_index].member)
-
-#else /* __KERNEL_CPU__ */
-
-/* Array access on GPU with Structure-of-Arrays. */
-
-typedef const int IntegratorState;
-typedef const int ConstIntegratorState;
-typedef const int IntegratorShadowState;
-typedef const int ConstIntegratorShadowState;
-
-#  define INTEGRATOR_STATE_NULL -1
-
-#  define INTEGRATOR_STATE(state, nested_struct, member) \
-    kernel_integrator_state.nested_struct.member[state]
-#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \
-    INTEGRATOR_STATE(state, nested_struct, member)
-
-#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
-    kernel_integrator_state.nested_struct[array_index].member[state]
-#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
-    INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
-
-#endif /* __KERNEL_CPU__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_state_flow.h b/intern/cycles/kernel/integrator/integrator_state_flow.h
deleted file mode 100644
index 1569bf68e24..00000000000
--- a/intern/cycles/kernel/integrator/integrator_state_flow.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/kernel_types.h"
-#include "util/util_atomic.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Control Flow
- *
- * Utilities for control flow between kernels. The implementation may differ per device
- * or even be handled on the host side. To abstract such differences, experiment with
- * different implementations and for debugging, this is abstracted using macros.
- *
- * There is a main path for regular path tracing camera for path tracing. Shadows for next
- * event estimation branch off from this into their own path, that may be computed in
- * parallel while the main path continues.
- *
- * Each kernel on the main path must call one of these functions. These may not be called
- * multiple times from the same kernel.
- *
- * INTEGRATOR_PATH_INIT(next_kernel)
- * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
- * INTEGRATOR_PATH_TERMINATE(current_kernel)
- *
- * For the shadow path similar functions are used, and again each shadow kernel must call
- * one of them, and only once.
- */
-
-#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
-#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
-  (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
-
-#ifdef __KERNEL_GPU__
-
-#  define INTEGRATOR_PATH_INIT(next_kernel) \
-    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
-                                1); \
-    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-#  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
-    atomic_fetch_and_sub_uint32( \
-        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
-                                1); \
-    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-#  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
-    atomic_fetch_and_sub_uint32( \
-        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-
-#  define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
-    IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
-        &kernel_integrator_state.next_shadow_path_index[0], 1); \
-    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
-                                1); \
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-#  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
-    atomic_fetch_and_sub_uint32( \
-        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
-                                1); \
-    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
-#  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
-    atomic_fetch_and_sub_uint32( \
-        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
-
-#  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
-    { \
-      const int key_ = key; \
-      atomic_fetch_and_add_uint32( \
-          &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
-      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
-      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
-      atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
-                                  1); \
-    }
-#  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
-    { \
-      const int key_ = key; \
-      atomic_fetch_and_sub_uint32( \
-          &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
-      atomic_fetch_and_add_uint32( \
-          &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
-      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
-      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
-      atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
-                                  1); \
-    }
-
-#else
-
-#  define INTEGRATOR_PATH_INIT(next_kernel) \
-    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
-#  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
-    { \
-      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
-      (void)key; \
-    }
-#  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
-    { \
-      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
-      (void)current_kernel; \
-    }
-#  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
-    { \
-      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
-      (void)current_kernel; \
-    }
-#  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
-    { \
-      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
-      (void)key; \
-      (void)current_kernel; \
-    }
-
-#  define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
-    IntegratorShadowState shadow_state = &state->shadow_type; \
-    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
-#  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
-    { \
-      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
-      (void)current_kernel; \
-    }
-#  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
-    { \
-      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
-      (void)current_kernel; \
-    }
-
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h
deleted file mode 100644
index b1a6fd36fae..00000000000
--- a/intern/cycles/kernel/integrator/integrator_state_template.h
+++ /dev/null
@@ -1,109 +0,0 @@
-
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/************************************ Path State *****************************/
-
-KERNEL_STRUCT_BEGIN(path)
-/* Index of a pixel within the device render buffer where this path will write its result.
- * To get an actual offset within the buffer the value needs to be multiplied by the
- * `kernel_data.film.pass_stride`.
- *
- * The multiplication is delayed for later, so that state can use 32bit integer. */
-KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
-/* Current sample number. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
-/* Current ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current transparent ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current diffuse ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current glossy ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current transmission ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current volume ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* Current volume bounds ray bounce depth. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* DeviceKernel bit indicating queued kernels. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
-/* Random number generator seed. */
-KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
-/* Random number dimension offset. */
-KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
-/* enum PathRayFlag */
-KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
-/* Multiple importance sampling
- * The PDF of BSDF sampling at the last scatter point, and distance to the
- * last scatter point minus the last ray segment. This distance lets us
- * compute the complete distance through transparent surfaces and volumes. */
-KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
-/* Filter glossy. */
-KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
-/* Throughput. */
-KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
-/* Ratio of throughput to distinguish diffuse and glossy render passes. */
-KERNEL_STRUCT_MEMBER(path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
-/* Denoising. */
-KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
-/* Shader sorting. */
-/* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */
-KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END(path)
-
-/************************************** Ray ***********************************/
-
-KERNEL_STRUCT_BEGIN(ray)
-KERNEL_STRUCT_MEMBER(ray, float3, P, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float3, D, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END(ray)
-
-/*************************** Intersection result ******************************/
-
-/* Result from scene intersection. */
-KERNEL_STRUCT_BEGIN(isect)
-KERNEL_STRUCT_MEMBER(isect, float, t, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(isect, float, u, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
-KERNEL_STRUCT_END(isect)
-
-/*************** Subsurface closure state for subsurface kernel ***************/
-
-KERNEL_STRUCT_BEGIN(subsurface)
-KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
-KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
-KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
-KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE)
-KERNEL_STRUCT_END(subsurface)
-
-/********************************** Volume Stack ******************************/
-
-KERNEL_STRUCT_BEGIN(volume_stack)
-KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME)
-KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
-KERNEL_STRUCT_END_ARRAY(volume_stack,
-                        KERNEL_STRUCT_VOLUME_STACK_SIZE,
-                        KERNEL_STRUCT_VOLUME_STACK_SIZE)
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
deleted file mode 100644
index 0b1f67daa92..00000000000
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/integrator/integrator_state.h"
-
-#include "kernel/util/util_differential.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Ray */
-
-ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
-                                                       IntegratorState state,
-                                                       ccl_private const Ray *ccl_restrict ray)
-{
-  INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
-  INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
-  INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
-  INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
-}
-
-ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
-                                                      ConstIntegratorState state,
-                                                      ccl_private Ray *ccl_restrict ray)
-{
-  ray->P = INTEGRATOR_STATE(state, ray, P);
-  ray->D = INTEGRATOR_STATE(state, ray, D);
-  ray->t = INTEGRATOR_STATE(state, ray, t);
-  ray->time = INTEGRATOR_STATE(state, ray, time);
-  ray->dP = INTEGRATOR_STATE(state, ray, dP);
-  ray->dD = INTEGRATOR_STATE(state, ray, dD);
-}
-
-/* Shadow Ray */
-
-ccl_device_forceinline void integrator_state_write_shadow_ray(
-    KernelGlobals kg, IntegratorShadowState state, ccl_private const Ray *ccl_restrict ray)
-{
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
-  INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
-}
-
-ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
-                                                             ConstIntegratorShadowState state,
-                                                             ccl_private Ray *ccl_restrict ray)
-{
-  ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
-  ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
-  ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
-  ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
-  ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
-  ray->dD = differential_zero_compact();
-}
-
-/* Intersection */
-
-ccl_device_forceinline void integrator_state_write_isect(
-    KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
-{
-  INTEGRATOR_STATE_WRITE(state, isect, t) = isect->t;
-  INTEGRATOR_STATE_WRITE(state, isect, u) = isect->u;
-  INTEGRATOR_STATE_WRITE(state, isect, v) = isect->v;
-  INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
-  INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
-  INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
-}
-
-ccl_device_forceinline void integrator_state_read_isect(
-    KernelGlobals kg, ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect)
-{
-  isect->prim = INTEGRATOR_STATE(state, isect, prim);
-  isect->object = INTEGRATOR_STATE(state, isect, object);
-  isect->type = INTEGRATOR_STATE(state, isect, type);
-  isect->u = INTEGRATOR_STATE(state, isect, u);
-  isect->v = INTEGRATOR_STATE(state, isect, v);
-  isect->t = INTEGRATOR_STATE(state, isect, t);
-}
-
-ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
-                                                                      int i)
-{
-  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, volume_stack, i, object),
-                       INTEGRATOR_STATE_ARRAY(state, volume_stack, i, shader)};
-  return entry;
-}
-
-ccl_device_forceinline void integrator_state_write_volume_stack(IntegratorState state,
-                                                                int i,
-                                                                VolumeStack entry)
-{
-  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, object) = entry.object;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, shader) = entry.shader;
-}
-
-ccl_device_forceinline bool integrator_state_volume_stack_is_empty(KernelGlobals kg,
-                                                                   ConstIntegratorState state)
-{
-  return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
-             INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, shader) == SHADER_NONE :
-             true;
-}
-
-/* Shadow Intersection */
-
-ccl_device_forceinline void integrator_state_write_shadow_isect(
-    IntegratorShadowState state,
-    ccl_private const Intersection *ccl_restrict isect,
-    const int index)
-{
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, t) = isect->t;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, u) = isect->u;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, v) = isect->v;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
-}
-
-ccl_device_forceinline void integrator_state_read_shadow_isect(
-    ConstIntegratorShadowState state,
-    ccl_private Intersection *ccl_restrict isect,
-    const int index)
-{
-  isect->prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, prim);
-  isect->object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, object);
-  isect->type = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, type);
-  isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
-  isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
-  isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
-}
-
-ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(
-    KernelGlobals kg, IntegratorShadowState shadow_state, ConstIntegratorState state)
-{
-  if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
-    int index = 0;
-    int shader;
-    do {
-      shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader);
-
-      INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_volume_stack, index, object) =
-          INTEGRATOR_STATE_ARRAY(state, volume_stack, index, object);
-      INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_volume_stack, index, shader) = shader;
-
-      ++index;
-    } while (shader != OBJECT_NONE);
-  }
-}
-
-ccl_device_forceinline void integrator_state_copy_volume_stack(KernelGlobals kg,
-                                                               IntegratorState to_state,
-                                                               ConstIntegratorState state)
-{
-  if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
-    int index = 0;
-    int shader;
-    do {
-      shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader);
-
-      INTEGRATOR_STATE_ARRAY_WRITE(to_state, volume_stack, index, object) = INTEGRATOR_STATE_ARRAY(
-          state, volume_stack, index, object);
-      INTEGRATOR_STATE_ARRAY_WRITE(to_state, volume_stack, index, shader) = shader;
-
-      ++index;
-    } while (shader != OBJECT_NONE);
-  }
-}
-
-ccl_device_forceinline VolumeStack
-integrator_state_read_shadow_volume_stack(ConstIntegratorShadowState state, int i)
-{
-  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, object),
-                       INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, shader)};
-  return entry;
-}
-
-ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty(
-    KernelGlobals kg, ConstIntegratorShadowState state)
-{
-  return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
-             INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, 0, shader) == SHADER_NONE :
-             true;
-}
-
-ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorShadowState state,
-                                                                       int i,
-                                                                       VolumeStack entry)
-{
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, object) = entry.object;
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, shader) = entry.shader;
-}
-
-#if defined(__KERNEL_GPU__)
-ccl_device_inline void integrator_state_copy_only(KernelGlobals kg,
-                                                  ConstIntegratorState to_state,
-                                                  ConstIntegratorState state)
-{
-  int index;
-
-  /* Rely on the compiler to optimize out unused assignments and `while(false)`'s. */
-
-#  define KERNEL_STRUCT_BEGIN(name) \
-    index = 0; \
-    do {
-
-#  define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
-    if (kernel_integrator_state.parent_struct.name != nullptr) { \
-      kernel_integrator_state.parent_struct.name[to_state] = \
-          kernel_integrator_state.parent_struct.name[state]; \
-    }
-
-#  define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
-    if (kernel_integrator_state.parent_struct[index].name != nullptr) { \
-      kernel_integrator_state.parent_struct[index].name[to_state] = \
-          kernel_integrator_state.parent_struct[index].name[state]; \
-    }
-
-#  define KERNEL_STRUCT_END(name) \
-    } \
-    while (false) \
-      ;
-
-#  define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
-    ++index; \
-    } \
-    while (index < gpu_array_size) \
-      ;
-
-#  define KERNEL_STRUCT_VOLUME_STACK_SIZE kernel_data.volume_stack_size
-
-#  include "kernel/integrator/integrator_state_template.h"
-
-#  undef KERNEL_STRUCT_BEGIN
-#  undef KERNEL_STRUCT_MEMBER
-#  undef KERNEL_STRUCT_ARRAY_MEMBER
-#  undef KERNEL_STRUCT_END
-#  undef KERNEL_STRUCT_END_ARRAY
-#  undef KERNEL_STRUCT_VOLUME_STACK_SIZE
-}
-
-ccl_device_inline void integrator_state_move(KernelGlobals kg,
-                                             ConstIntegratorState to_state,
-                                             ConstIntegratorState state)
-{
-  integrator_state_copy_only(kg, to_state, state);
-
-  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-}
-
-ccl_device_inline void integrator_shadow_state_copy_only(KernelGlobals kg,
-                                                         ConstIntegratorShadowState to_state,
-                                                         ConstIntegratorShadowState state)
-{
-  int index;
-
-  /* Rely on the compiler to optimize out unused assignments and `while(false)`'s. */
-
-#  define KERNEL_STRUCT_BEGIN(name) \
-    index = 0; \
-    do {
-
-#  define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
-    if (kernel_integrator_state.parent_struct.name != nullptr) { \
-      kernel_integrator_state.parent_struct.name[to_state] = \
-          kernel_integrator_state.parent_struct.name[state]; \
-    }
-
-#  define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
-    if (kernel_integrator_state.parent_struct[index].name != nullptr) { \
-      kernel_integrator_state.parent_struct[index].name[to_state] = \
-          kernel_integrator_state.parent_struct[index].name[state]; \
-    }
-
-#  define KERNEL_STRUCT_END(name) \
-    } \
-    while (false) \
-      ;
-
-#  define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
-    ++index; \
-    } \
-    while (index < gpu_array_size) \
-      ;
-
-#  define KERNEL_STRUCT_VOLUME_STACK_SIZE kernel_data.volume_stack_size
-
-#  include "kernel/integrator/integrator_shadow_state_template.h"
-
-#  undef KERNEL_STRUCT_BEGIN
-#  undef KERNEL_STRUCT_MEMBER
-#  undef KERNEL_STRUCT_ARRAY_MEMBER
-#  undef KERNEL_STRUCT_END
-#  undef KERNEL_STRUCT_END_ARRAY
-#  undef KERNEL_STRUCT_VOLUME_STACK_SIZE
-}
-
-ccl_device_inline void integrator_shadow_state_move(KernelGlobals kg,
-                                                    ConstIntegratorState to_state,
-                                                    ConstIntegratorState state)
-{
-  integrator_shadow_state_copy_only(kg, to_state, state);
-
-  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
-}
-
-#endif
-
-/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
- * after this function. */
-ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
-                                                             IntegratorState state)
-{
-#if defined(__KERNEL_GPU__)
-  ConstIntegratorState to_state = atomic_fetch_and_add_uint32(
-      &kernel_integrator_state.next_main_path_index[0], 1);
-
-  integrator_state_copy_only(kg, to_state, state);
-#else
-  IntegratorStateCPU *ccl_restrict to_state = state + 1;
-
-  /* Only copy the required subset, since shadow intersections are big and irrelevant here. */
-  to_state->path = state->path;
-  to_state->ray = state->ray;
-  to_state->isect = state->isect;
-  integrator_state_copy_volume_stack(kg, to_state, state);
-#endif
-
-  INTEGRATOR_STATE_WRITE(to_state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS;
-}
-
-#ifdef __KERNEL_CPU__
-ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
-{
-  return INTEGRATOR_STATE(state, path, bounce);
-}
-
-ccl_device_inline int integrator_state_bounce(ConstIntegratorShadowState state, const int)
-{
-  return INTEGRATOR_STATE(state, shadow_path, bounce);
-}
-
-ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorState state, const int)
-{
-  return INTEGRATOR_STATE(state, path, diffuse_bounce);
-}
-
-ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorShadowState state, const int)
-{
-  return INTEGRATOR_STATE(state, shadow_path, diffuse_bounce);
-}
-
-ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorState state, const int)
-{
-  return INTEGRATOR_STATE(state, path, glossy_bounce);
-}
-
-ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorShadowState state, const int)
-{
-  return INTEGRATOR_STATE(state, shadow_path, glossy_bounce);
-}
-
-ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorState state, const int)
-{
-  return INTEGRATOR_STATE(state, path, transmission_bounce);
-}
-
-ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorShadowState state,
-                                                           const int)
-{
-  return INTEGRATOR_STATE(state, shadow_path, transmission_bounce);
-}
-
-ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorState state, const int)
-{
-  return INTEGRATOR_STATE(state, path, transparent_bounce);
-}
-
-ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorShadowState state,
-                                                          const int)
-{
-  return INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
-}
-#else
-ccl_device_inline int integrator_state_bounce(ConstIntegratorShadowState state,
-                                              const uint32_t path_flag)
-{
-  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) :
-                                         INTEGRATOR_STATE(state, path, bounce);
-}
-
-ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorShadowState state,
-                                                      const uint32_t path_flag)
-{
-  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, diffuse_bounce) :
-                                         INTEGRATOR_STATE(state, path, diffuse_bounce);
-}
-
-ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorShadowState state,
-                                                     const uint32_t path_flag)
-{
-  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, glossy_bounce) :
-                                         INTEGRATOR_STATE(state, path, glossy_bounce);
-}
-
-ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorShadowState state,
-                                                           const uint32_t path_flag)
-{
-  return (path_flag & PATH_RAY_SHADOW) ?
-             INTEGRATOR_STATE(state, shadow_path, transmission_bounce) :
-             INTEGRATOR_STATE(state, path, transmission_bounce);
-}
-
-ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorShadowState state,
-                                                          const uint32_t path_flag)
-{
-  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, transparent_bounce) :
-                                         INTEGRATOR_STATE(state, path, transparent_bounce);
-}
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
deleted file mode 100644
index 9560641c460..00000000000
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/camera/camera_projection.h"
-
-#include "kernel/bvh/bvh.h"
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_diffuse.h"
-#include "kernel/closure/bsdf_principled_diffuse.h"
-#include "kernel/closure/bssrdf.h"
-#include "kernel/closure/volume.h"
-
-#include "kernel/integrator/integrator_intersect_volume_stack.h"
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-#include "kernel/integrator/integrator_subsurface_disk.h"
-#include "kernel/integrator/integrator_subsurface_random_walk.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __SUBSURFACE__
-
-ccl_device int subsurface_bounce(KernelGlobals kg,
-                                 IntegratorState state,
-                                 ccl_private ShaderData *sd,
-                                 ccl_private const ShaderClosure *sc)
-{
-  /* We should never have two consecutive BSSRDF bounces, the second one should
-   * be converted to a diffuse BSDF to avoid this. */
-  kernel_assert(!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
-
-  /* Setup path state for intersect_subsurface kernel. */
-  ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
-
-  /* Setup ray into surface. */
-  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
-  INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
-  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
-  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
-  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
-
-  /* Pass along object info, reusing isect to save memory. */
-  INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng;
-  INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
-
-  uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
-                       ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? PATH_RAY_SUBSURFACE_DISK :
-                                                                 PATH_RAY_SUBSURFACE_RANDOM_WALK);
-
-  /* Compute weight, optionally including Fresnel from entry point. */
-  float3 weight = shader_bssrdf_sample_weight(sd, sc);
-#  ifdef __PRINCIPLED__
-  if (bssrdf->roughness != FLT_MAX) {
-    path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL;
-  }
-#  endif
-
-  INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight;
-  INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag;
-
-  /* Advance random number offset for bounce. */
-  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
-
-  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
-    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
-      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
-    }
-  }
-
-  /* Pass BSSRDF parameters. */
-  INTEGRATOR_STATE_WRITE(state, subsurface, albedo) = bssrdf->albedo;
-  INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius;
-  INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy;
-
-  return LABEL_SUBSURFACE_SCATTER;
-}
-
-ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
-                                             IntegratorState state,
-                                             ccl_private ShaderData *sd,
-                                             const uint32_t path_flag)
-{
-  /* Get bump mapped normal from shader evaluation at exit point. */
-  float3 N = sd->N;
-  if (sd->flag & SD_HAS_BSSRDF_BUMP) {
-    N = shader_bssrdf_normal(sd);
-  }
-
-  /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */
-  sd->flag &= ~SD_CLOSURE_FLAGS;
-  sd->num_closure = 0;
-  sd->num_closure_left = kernel_data.max_closures;
-
-  const float3 weight = one_float3();
-
-#  ifdef __PRINCIPLED__
-  if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
-    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
-        sd, sizeof(PrincipledDiffuseBsdf), weight);
-
-    if (bsdf) {
-      bsdf->N = N;
-      bsdf->roughness = FLT_MAX;
-      sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT);
-    }
-  }
-  else
-#  endif /* __PRINCIPLED__ */
-  {
-    ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
-        sd, sizeof(DiffuseBsdf), weight);
-
-    if (bsdf) {
-      bsdf->N = N;
-      sd->flag |= bsdf_diffuse_setup(bsdf);
-    }
-  }
-}
-
-ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState state)
-{
-  RNGState rng_state;
-  path_state_rng_load(state, &rng_state);
-
-  Ray ray ccl_optional_struct_init;
-  LocalIntersection ss_isect ccl_optional_struct_init;
-
-  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) {
-    if (!subsurface_random_walk(kg, state, rng_state, ray, ss_isect)) {
-      return false;
-    }
-  }
-  else {
-    if (!subsurface_disk(kg, state, rng_state, ray, ss_isect)) {
-      return false;
-    }
-  }
-
-#  ifdef __VOLUME__
-  /* Update volume stack if needed. */
-  if (kernel_data.integrator.use_volumes) {
-    const int object = ss_isect.hits[0].object;
-    const int object_flag = kernel_tex_fetch(__object_flag, object);
-
-    if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
-      float3 P = INTEGRATOR_STATE(state, ray, P);
-      const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
-      const float3 offset_P = ray_offset(P, -Ng);
-
-      integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
-    }
-  }
-#  endif /* __VOLUME__ */
-
-  /* Pretend ray is coming from the outside towards the exit point. This ensures
-   * correct front/back facing normals.
-   * TODO: find a more elegant solution? */
-  ray.P += ray.D * ray.t * 2.0f;
-  ray.D = -ray.D;
-
-  integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
-  integrator_state_write_ray(kg, state, &ray);
-
-  /* Advance random number offset for bounce. */
-  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
-
-  const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
-  const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
-  if (shader_flags & SD_HAS_RAYTRACE) {
-    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
-                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
-                                shader);
-  }
-  else {
-    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
-                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
-                                shader);
-  }
-
-  return true;
-}
-
-#endif /* __SUBSURFACE__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
deleted file mode 100644
index e1cce13fb30..00000000000
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* BSSRDF using disk based importance sampling.
- *
- * BSSRDF Importance Sampling, SIGGRAPH 2013
- * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
- */
-
-ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r)
-{
-  const float3 eval = bssrdf_eval(radius, r);
-  const float pdf = bssrdf_pdf(radius, disk_r);
-  return (pdf > 0.0f) ? eval / pdf : zero_float3();
-}
-
-/* Subsurface scattering step, from a point on the surface to other
- * nearby points on the same object. */
-ccl_device_inline bool subsurface_disk(KernelGlobals kg,
-                                       IntegratorState state,
-                                       RNGState rng_state,
-                                       ccl_private Ray &ray,
-                                       ccl_private LocalIntersection &ss_isect)
-
-{
-  float disk_u, disk_v;
-  path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v);
-
-  /* Read shading point info from integrator state. */
-  const float3 P = INTEGRATOR_STATE(state, ray, P);
-  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
-  const float time = INTEGRATOR_STATE(state, ray, time);
-  const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
-  const int object = INTEGRATOR_STATE(state, isect, object);
-
-  /* Read subsurface scattering parameters. */
-  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
-
-  /* Pick random axis in local frame and point on disk. */
-  float3 disk_N, disk_T, disk_B;
-  float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
-  disk_N = Ng;
-  make_orthonormals(disk_N, &disk_T, &disk_B);
-
-  if (disk_v < 0.5f) {
-    pick_pdf_N = 0.5f;
-    pick_pdf_T = 0.25f;
-    pick_pdf_B = 0.25f;
-    disk_v *= 2.0f;
-  }
-  else if (disk_v < 0.75f) {
-    float3 tmp = disk_N;
-    disk_N = disk_T;
-    disk_T = tmp;
-    pick_pdf_N = 0.25f;
-    pick_pdf_T = 0.5f;
-    pick_pdf_B = 0.25f;
-    disk_v = (disk_v - 0.5f) * 4.0f;
-  }
-  else {
-    float3 tmp = disk_N;
-    disk_N = disk_B;
-    disk_B = tmp;
-    pick_pdf_N = 0.25f;
-    pick_pdf_T = 0.25f;
-    pick_pdf_B = 0.5f;
-    disk_v = (disk_v - 0.75f) * 4.0f;
-  }
-
-  /* Sample point on disk. */
-  float phi = M_2PI_F * disk_v;
-  float disk_height, disk_r;
-
-  bssrdf_sample(radius, disk_u, &disk_r, &disk_height);
-
-  float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
-
-  /* Create ray. */
-  ray.P = P + disk_N * disk_height + disk_P;
-  ray.D = -disk_N;
-  ray.t = 2.0f * disk_height;
-  ray.dP = ray_dP;
-  ray.dD = differential_zero_compact();
-  ray.time = time;
-
-  /* Intersect with the same object. if multiple intersections are found it
-   * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits. */
-  uint lcg_state = lcg_state_init(
-      rng_state.rng_hash, rng_state.rng_offset, rng_state.sample, 0x68bc21eb);
-  const int max_hits = BSSRDF_MAX_HITS;
-
-  scene_intersect_local(kg, &ray, &ss_isect, object, &lcg_state, max_hits);
-  const int num_eval_hits = min(ss_isect.num_hits, max_hits);
-  if (num_eval_hits == 0) {
-    return false;
-  }
-
-  /* Sort for consistent renders between CPU and GPU, independent of the BVH
-   * traversal algorithm. */
-  sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits);
-
-  float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */
-  float sum_weights = 0.0f;
-
-  for (int hit = 0; hit < num_eval_hits; hit++) {
-    /* Quickly retrieve P and Ng without setting up ShaderData. */
-    const float3 hit_P = ray.P + ray.D * ss_isect.hits[hit].t;
-
-    /* Get geometric normal. */
-    const int object = ss_isect.hits[hit].object;
-    const int object_flag = kernel_tex_fetch(__object_flag, object);
-    float3 hit_Ng = ss_isect.Ng[hit];
-    if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-      hit_Ng = -hit_Ng;
-    }
-
-    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      Transform itfm;
-      object_fetch_transform_motion_test(kg, object, time, &itfm);
-      hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
-    }
-
-    /* Probability densities for local frame axes. */
-    const float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
-    const float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
-    const float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
-
-    /* Multiple importance sample between 3 axes, power heuristic
-     * found to be slightly better than balance heuristic. pdf_N
-     * in the MIS weight and denominator cancelled out. */
-    float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
-    if (ss_isect.num_hits > max_hits) {
-      w *= ss_isect.num_hits / (float)max_hits;
-    }
-
-    /* Real distance to sampled point. */
-    const float r = len(hit_P - P);
-
-    /* Evaluate profiles. */
-    const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w;
-
-    /* Store result. */
-    ss_isect.Ng[hit] = hit_Ng;
-    weights[hit] = weight;
-    sum_weights += average(fabs(weight));
-  }
-
-  if (sum_weights == 0.0f) {
-    return false;
-  }
-
-  /* Use importance resampling, sampling one of the hits proportional to weight. */
-  const float r = lcg_step_float(&lcg_state) * sum_weights;
-  float partial_sum = 0.0f;
-
-  for (int hit = 0; hit < num_eval_hits; hit++) {
-    const float3 weight = weights[hit];
-    const float sample_weight = average(fabs(weight));
-    float next_sum = partial_sum + sample_weight;
-
-    if (r < next_sum) {
-      /* Return exit point. */
-      INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight;
-
-      ss_isect.hits[0] = ss_isect.hits[hit];
-      ss_isect.Ng[0] = ss_isect.Ng[hit];
-
-      ray.P = ray.P + ray.D * ss_isect.hits[hit].t;
-      ray.D = ss_isect.Ng[hit];
-      ray.t = 1.0f;
-      return true;
-    }
-
-    partial_sum = next_sum;
-  }
-
-  return false;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
deleted file mode 100644
index b98acda1f4d..00000000000
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/camera/camera_projection.h"
-
-#include "kernel/bvh/bvh.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Random walk subsurface scattering.
- *
- * "Practical and Controllable Subsurface Scattering for Production Path
- *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
-
-/* Support for anisotropy from:
- * "Path Traced Subsurface Scattering using Anisotropic Phase Functions
- * and Non-Exponential Free Flights".
- * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery.
- * https://graphics.pixar.com/library/PathTracedSubsurface/ */
-
-ccl_device void subsurface_random_walk_remap(const float albedo,
-                                             const float d,
-                                             float g,
-                                             ccl_private float *sigma_t,
-                                             ccl_private float *alpha)
-{
-  /* Compute attenuation and scattering coefficients from albedo. */
-  const float g2 = g * g;
-  const float g3 = g2 * g;
-  const float g4 = g3 * g;
-  const float g5 = g4 * g;
-  const float g6 = g5 * g;
-  const float g7 = g6 * g;
-
-  const float A = 1.8260523782f + -1.28451056436f * g + -1.79904629312f * g2 +
-                  9.19393289202f * g3 + -22.8215585862f * g4 + 32.0234874259f * g5 +
-                  -23.6264803333f * g6 + 7.21067002658f * g7;
-  const float B = 4.98511194385f +
-                  0.127355959438f *
-                      expf(31.1491581433f * g + -201.847017512f * g2 + 841.576016723f * g3 +
-                           -2018.09288505f * g4 + 2731.71560286f * g5 + -1935.41424244f * g6 +
-                           559.009054474f * g7);
-  const float C = 1.09686102424f + -0.394704063468f * g + 1.05258115941f * g2 +
-                  -8.83963712726f * g3 + 28.8643230661f * g4 + -46.8802913581f * g5 +
-                  38.5402837518f * g6 + -12.7181042538f * g7;
-  const float D = 0.496310210422f + 0.360146581622f * g + -2.15139309747f * g2 +
-                  17.8896899217f * g3 + -55.2984010333f * g4 + 82.065982243f * g5 +
-                  -58.5106008578f * g6 + 15.8478295021f * g7;
-  const float E = 4.23190299701f +
-                  0.00310603949088f *
-                      expf(76.7316253952f * g + -594.356773233f * g2 + 2448.8834203f * g3 +
-                           -5576.68528998f * g4 + 7116.60171912f * g5 + -4763.54467887f * g6 +
-                           1303.5318055f * g7);
-  const float F = 2.40602999408f + -2.51814844609f * g + 9.18494908356f * g2 +
-                  -79.2191708682f * g3 + 259.082868209f * g4 + -403.613804597f * g5 +
-                  302.85712436f * g6 + -87.4370473567f * g7;
-
-  const float blend = powf(albedo, 0.25f);
-
-  *alpha = (1.0f - blend) * A * powf(atanf(B * albedo), C) +
-           blend * D * powf(atanf(E * albedo), F);
-  *alpha = clamp(*alpha, 0.0f, 0.999999f);  // because of numerical precision
-
-  float sigma_t_prime = 1.0f / fmaxf(d, 1e-16f);
-  *sigma_t = sigma_t_prime / (1.0f - g);
-}
-
-ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
-                                                    const float3 radius,
-                                                    const float anisotropy,
-                                                    ccl_private float3 *sigma_t,
-                                                    ccl_private float3 *alpha,
-                                                    ccl_private float3 *throughput)
-{
-  float sigma_t_x, sigma_t_y, sigma_t_z;
-  float alpha_x, alpha_y, alpha_z;
-
-  subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x);
-  subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y);
-  subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z);
-
-  /* Throughput already contains closure weight at this point, which includes the
-   * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo
-   * which will be added through scattering. */
-  *throughput = safe_divide_color(*throughput, albedo);
-
-  /* With low albedo values (like 0.025) we get diffusion_length 1.0 and
-   * infinite phase functions. To avoid a sharp discontinuity as we go from
-   * such values to 0.0, increase alpha and reduce the throughput to compensate. */
-  const float min_alpha = 0.2f;
-  if (alpha_x < min_alpha) {
-    (*throughput).x *= alpha_x / min_alpha;
-    alpha_x = min_alpha;
-  }
-  if (alpha_y < min_alpha) {
-    (*throughput).y *= alpha_y / min_alpha;
-    alpha_y = min_alpha;
-  }
-  if (alpha_z < min_alpha) {
-    (*throughput).z *= alpha_z / min_alpha;
-    alpha_z = min_alpha;
-  }
-
-  *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
-  *alpha = make_float3(alpha_x, alpha_y, alpha_z);
-}
-
-/* References for Dwivedi sampling:
- *
- * [1] "A Zero-variance-based Sampling Scheme for Monte Carlo Subsurface Scattering"
- * by Jaroslav Křivánek and Eugene d'Eon (SIGGRAPH 2014)
- * https://cgg.mff.cuni.cz/~jaroslav/papers/2014-zerovar/
- *
- * [2] "Improving the Dwivedi Sampling Scheme"
- * by Johannes Meng, Johannes Hanika, and Carsten Dachsbacher (EGSR 2016)
- * https://cg.ivd.kit.edu/1951.php
- *
- * [3] "Zero-Variance Theory for Efficient Subsurface Scattering"
- * by Eugene d'Eon and Jaroslav Křivánek (SIGGRAPH 2020)
- * https://iliyan.com/publications/RenderingCourse2020
- */
-
-ccl_device_forceinline float eval_phase_dwivedi(float v, float phase_log, float cos_theta)
-{
-  /* Eq. 9 from [2] using precomputed log((v + 1) / (v - 1)) */
-  return 1.0f / ((v - cos_theta) * phase_log);
-}
-
-ccl_device_forceinline float sample_phase_dwivedi(float v, float phase_log, float rand)
-{
-  /* Based on Eq. 10 from [2]: `v - (v + 1) * pow((v - 1) / (v + 1), rand)`
-   * Since we're already pre-computing `phase_log = log((v + 1) / (v - 1))` for the evaluation,
-   * we can implement the power function like this. */
-  return v - (v + 1.0f) * expf(-rand * phase_log);
-}
-
-ccl_device_forceinline float diffusion_length_dwivedi(float alpha)
-{
-  /* Eq. 67 from [3] */
-  return 1.0f / sqrtf(1.0f - powf(alpha, 2.44294f - 0.0215813f * alpha + 0.578637f / alpha));
-}
-
-ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv)
-{
-  float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
-  float phi = M_2PI_F * randv;
-  float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
-
-  float3 T, B;
-  make_orthonormals(D, &T, &B);
-  return dir.x * T + dir.y * B + dir.z * D;
-}
-
-ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
-                                                         float t,
-                                                         bool hit,
-                                                         ccl_private float3 *transmittance)
-{
-  float3 T = volume_color_transmittance(sigma_t, t);
-  if (transmittance) {
-    *transmittance = T;
-  }
-  return hit ? T : sigma_t * T;
-}
-
-/* Define the below variable to get the similarity code active,
- * and the value represents the cutoff level */
-#define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9
-
-ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
-                                              IntegratorState state,
-                                              RNGState rng_state,
-                                              ccl_private Ray &ray,
-                                              ccl_private LocalIntersection &ss_isect)
-{
-  float bssrdf_u, bssrdf_v;
-  path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-
-  const float3 P = INTEGRATOR_STATE(state, ray, P);
-  const float3 N = INTEGRATOR_STATE(state, ray, D);
-  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
-  const float time = INTEGRATOR_STATE(state, ray, time);
-  const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
-  const int object = INTEGRATOR_STATE(state, isect, object);
-
-  /* Sample diffuse surface scatter into the object. */
-  float3 D;
-  float pdf;
-  sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf);
-  if (dot(-Ng, D) <= 0.0f) {
-    return false;
-  }
-
-  /* Setup ray. */
-  ray.P = ray_offset(P, -Ng);
-  ray.D = D;
-  ray.t = FLT_MAX;
-  ray.time = time;
-  ray.dP = ray_dP;
-  ray.dD = differential_zero_compact();
-
-#ifndef __KERNEL_OPTIX__
-  /* Compute or fetch object transforms. */
-  Transform ob_itfm ccl_optional_struct_init;
-  Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
-#endif
-
-  /* Convert subsurface to volume coefficients.
-   * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
-  const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
-  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
-  const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy);
-
-  float3 sigma_t, alpha;
-  float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
-  subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
-  float3 sigma_s = sigma_t * alpha;
-
-  /* Theoretically it should be better to use the exact alpha for the channel we're sampling at
-   * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange
-   * for making the code significantly more complex and slower (if direction sampling depends on
-   * the sampled channel, we need to compute its PDF per-channel and consider it for MIS later on).
-   *
-   * Since the strength of the guided sampling increases as alpha gets lower, using a value that
-   * is too low results in fireflies while one that's too high just gives a bit more noise.
-   * Therefore, the code here uses the highest of the three albedos to be safe. */
-  const float diffusion_length = diffusion_length_dwivedi(max3(alpha));
-
-  if (diffusion_length == 1.0f) {
-    /* With specific values of alpha the length might become 1, which in asymptotic makes phase to
-     * be infinite. After first bounce it will cause throughput to be 0. Do early output, avoiding
-     * numerical issues and extra unneeded work. */
-    return false;
-  }
-
-  /* Precompute term for phase sampling. */
-  const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
-
-  /* Modify state for RNGs, decorrelated from other paths. */
-  rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
-
-  /* Random walk until we hit the surface again. */
-  bool hit = false;
-  bool have_opposite_interface = false;
-  float opposite_distance = 0.0f;
-
-  /* TODO: Disable for `alpha > 0.999` or so? */
-  /* Our heuristic, a compromise between guiding and classic. */
-  const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f));
-
-#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
-  float3 sigma_s_star = sigma_s * (1.0f - anisotropy);
-  float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star;
-  float3 sigma_t_org = sigma_t;
-  float3 sigma_s_org = sigma_s;
-  const float anisotropy_org = anisotropy;
-  const float guided_fraction_org = guided_fraction;
-#endif
-
-  for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
-    /* Advance random number offset. */
-    rng_state.rng_offset += PRNG_BOUNCE_NUM;
-
-#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
-    // shadow with local variables according to depth
-    float anisotropy, guided_fraction;
-    float3 sigma_s, sigma_t;
-    if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) {
-      anisotropy = anisotropy_org;
-      guided_fraction = guided_fraction_org;
-      sigma_t = sigma_t_org;
-      sigma_s = sigma_s_org;
-    }
-    else {
-      anisotropy = 0.0f;
-      guided_fraction = 0.75f;  // back to isotropic heuristic from Blender
-      sigma_t = sigma_t_star;
-      sigma_s = sigma_s_star;
-    }
-#endif
-
-    /* Sample color channel, use MIS with balance heuristic. */
-    float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL);
-    float3 channel_pdf;
-    int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf);
-    float sample_sigma_t = volume_channel_get(sigma_t, channel);
-    float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE);
-
-    /* We need the result of the ray-cast to compute the full guided PDF, so just remember the
-     * relevant terms to avoid recomputing them later. */
-    float backward_fraction = 0.0f;
-    float forward_pdf_factor = 0.0f;
-    float forward_stretching = 1.0f;
-    float backward_pdf_factor = 0.0f;
-    float backward_stretching = 1.0f;
-
-    /* For the initial ray, we already know the direction, so just do classic distance sampling. */
-    if (bounce > 0) {
-      /* Decide whether we should use guided or classic sampling. */
-      bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction);
-
-      /* Determine if we want to sample away from the incoming interface.
-       * This only happens if we found a nearby opposite interface, and the probability for it
-       * depends on how close we are to it already.
-       * This probability term comes from the recorded presentation of [3]. */
-      bool guide_backward = false;
-      if (have_opposite_interface) {
-        /* Compute distance of the random walk between the tangent plane at the starting point
-         * and the assumed opposite interface (the parallel plane that contains the point we
-         * found in our ray query for the opposite side). */
-        float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance);
-        backward_fraction = 1.0f /
-                            (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length));
-        guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction;
-      }
-
-      /* Sample scattering direction. */
-      float scatter_u, scatter_v;
-      path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v);
-      float cos_theta;
-      float hg_pdf;
-      if (guided) {
-        cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u);
-        /* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the
-         * sign here is enough to sample from that instead. */
-        if (guide_backward) {
-          cos_theta = -cos_theta;
-        }
-        float3 newD = direction_from_cosine(N, cos_theta, scatter_v);
-        hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy);
-        ray.D = newD;
-      }
-      else {
-        float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf);
-        cos_theta = dot(newD, N);
-        ray.D = newD;
-      }
-
-      /* Compute PDF factor caused by phase sampling (as the ratio of guided / classic).
-       * Since phase sampling is channel-independent, we can get away with applying a factor
-       * to the guided PDF, which implicitly means pulling out the classic PDF term and letting
-       * it cancel with an equivalent term in the numerator of the full estimator.
-       * For the backward PDF, we again reuse the same probability distribution with a sign swap.
-       */
-      forward_pdf_factor = M_1_2PI_F * eval_phase_dwivedi(diffusion_length, phase_log, cos_theta) /
-                           hg_pdf;
-      backward_pdf_factor = M_1_2PI_F *
-                            eval_phase_dwivedi(diffusion_length, phase_log, -cos_theta) / hg_pdf;
-
-      /* Prepare distance sampling.
-       * For the backwards case, this also needs the sign swapped since now directions against
-       * `sd->N` (and therefore with negative cos_theta) are preferred. */
-      forward_stretching = (1.0f - cos_theta / diffusion_length);
-      backward_stretching = (1.0f + cos_theta / diffusion_length);
-      if (guided) {
-        sample_sigma_t *= guide_backward ? backward_stretching : forward_stretching;
-      }
-    }
-
-    /* Sample direction along ray. */
-    float t = -logf(1.0f - randt) / sample_sigma_t;
-
-    /* On the first bounce, we use the ray-cast to check if the opposite side is nearby.
-     * If yes, we will later use backwards guided sampling in order to have a decent
-     * chance of connecting to it.
-     * TODO: Maybe use less than 10 times the mean free path? */
-    ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t;
-    scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1);
-    hit = (ss_isect.num_hits > 0);
-
-    if (hit) {
-#ifdef __KERNEL_OPTIX__
-      /* t is always in world space with OptiX. */
-      ray.t = ss_isect.hits[0].t;
-#else
-      /* Compute world space distance to surface hit. */
-      float3 D = transform_direction(&ob_itfm, ray.D);
-      D = normalize(D) * ss_isect.hits[0].t;
-      ray.t = len(transform_direction(&ob_tfm, D));
-#endif
-    }
-
-    if (bounce == 0) {
-      /* Check if we hit the opposite side. */
-      if (hit) {
-        have_opposite_interface = true;
-        opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
-      }
-      /* Apart from the opposite side check, we were supposed to only trace up to distance t,
-       * so check if there would have been a hit in that case. */
-      hit = ray.t < t;
-    }
-
-    /* Use the distance to the exit point for the throughput update if we found one. */
-    if (hit) {
-      t = ray.t;
-    }
-    else if (bounce == 0) {
-      /* Restore original position if nothing was hit after the first bounce,
-       * without the ray_offset() that was added to avoid self-intersection.
-       * Otherwise if that offset is relatively large compared to the scattering
-       * radius, we never go back up high enough to exit the surface. */
-      ray.P = P;
-    }
-
-    /* Advance to new scatter location. */
-    ray.P += t * ray.D;
-
-    float3 transmittance;
-    float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
-    if (bounce > 0) {
-      /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */
-      float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
-
-      if (have_opposite_interface) {
-        /* First step of MIS: Depending on geometry we might have two methods for guided
-         * sampling, so perform MIS between them. */
-        float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL);
-        guided_pdf = mix(
-            guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction);
-      }
-      else {
-        /* Just include phase sampling factor otherwise. */
-        guided_pdf *= forward_pdf_factor;
-      }
-
-      /* Now we apply the MIS balance heuristic between the classic and guided sampling. */
-      pdf = mix(pdf, guided_pdf, guided_fraction);
-    }
-
-    /* Finally, we're applying MIS again to combine the three color channels.
-     * Altogether, the MIS computation combines up to nine different estimators:
-     * {classic, guided, backward_guided} x {r, g, b} */
-    throughput *= (hit ? transmittance : sigma_s * transmittance) / dot(channel_pdf, pdf);
-
-    if (hit) {
-      /* If we hit the surface, we are done. */
-      break;
-    }
-    else if (throughput.x < VOLUME_THROUGHPUT_EPSILON &&
-             throughput.y < VOLUME_THROUGHPUT_EPSILON &&
-             throughput.z < VOLUME_THROUGHPUT_EPSILON) {
-      /* Avoid unnecessary work and precision issue when throughput gets really small. */
-      break;
-    }
-  }
-
-  if (hit) {
-    kernel_assert(isfinite3_safe(throughput));
-    INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
-  }
-
-  return hit;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/integrator_volume_stack.h b/intern/cycles/kernel/integrator/integrator_volume_stack.h
deleted file mode 100644
index cf69826ffff..00000000000
--- a/intern/cycles/kernel/integrator/integrator_volume_stack.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright 2011-2021 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Volume Stack
- *
- * This is an array of object/shared ID's that the current segment of the path
- * is inside of. */
-
-template<typename StackReadOp, typename StackWriteOp>
-ccl_device void volume_stack_enter_exit(KernelGlobals kg,
-                                        ccl_private const ShaderData *sd,
-                                        StackReadOp stack_read,
-                                        StackWriteOp stack_write)
-{
-  /* todo: we should have some way for objects to indicate if they want the
-   * world shader to work inside them. excluding it by default is problematic
-   * because non-volume objects can't be assumed to be closed manifolds */
-  if (!(sd->flag & SD_HAS_VOLUME)) {
-    return;
-  }
-
-  if (sd->flag & SD_BACKFACING) {
-    /* Exit volume object: remove from stack. */
-    for (int i = 0;; i++) {
-      VolumeStack entry = stack_read(i);
-      if (entry.shader == SHADER_NONE) {
-        break;
-      }
-
-      if (entry.object == sd->object) {
-        /* Shift back next stack entries. */
-        do {
-          entry = stack_read(i + 1);
-          stack_write(i, entry);
-          i++;
-        } while (entry.shader != SHADER_NONE);
-
-        return;
-      }
-    }
-  }
-  else {
-    /* Enter volume object: add to stack. */
-    int i;
-    for (i = 0;; i++) {
-      VolumeStack entry = stack_read(i);
-      if (entry.shader == SHADER_NONE) {
-        break;
-      }
-
-      /* Already in the stack? then we have nothing to do. */
-      if (entry.object == sd->object) {
-        return;
-      }
-    }
-
-    /* If we exceed the stack limit, ignore. */
-    if (i >= kernel_data.volume_stack_size - 1) {
-      return;
-    }
-
-    /* Add to the end of the stack. */
-    const VolumeStack new_entry = {sd->object, sd->shader};
-    const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE};
-    stack_write(i, new_entry);
-    stack_write(i + 1, empty_entry);
-  }
-}
-
-ccl_device void volume_stack_enter_exit(KernelGlobals kg,
-                                        IntegratorState state,
-                                        ccl_private const ShaderData *sd)
-{
-  volume_stack_enter_exit(
-      kg,
-      sd,
-      [=](const int i) { return integrator_state_read_volume_stack(state, i); },
-      [=](const int i, const VolumeStack entry) {
-        integrator_state_write_volume_stack(state, i, entry);
-      });
-}
-
-ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
-                                               IntegratorShadowState state,
-                                               ccl_private const ShaderData *sd)
-{
-  volume_stack_enter_exit(
-      kg,
-      sd,
-      [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); },
-      [=](const int i, const VolumeStack entry) {
-        integrator_state_write_shadow_volume_stack(state, i, entry);
-      });
-}
-
-/* Clean stack after the last bounce.
- *
- * It is expected that all volumes are closed manifolds, so at the time when ray
- * hits nothing (for example, it is a last bounce which goes to environment) the
- * only expected volume in the stack is the world's one. All the rest volume
- * entries should have been exited already.
- *
- * This isn't always true because of ray intersection precision issues, which
- * could lead us to an infinite non-world volume in the stack, causing render
- * artifacts.
- *
- * Use this function after the last bounce to get rid of all volumes apart from
- * the world's one after the last bounce to avoid render artifacts.
- */
-ccl_device_inline void volume_stack_clean(KernelGlobals kg, IntegratorState state)
-{
-  if (kernel_data.background.volume_shader != SHADER_NONE) {
-    /* Keep the world's volume in stack. */
-    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
-  }
-  else {
-    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, shader) = SHADER_NONE;
-  }
-}
-
-template<typename StackReadOp>
-ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read)
-{
-  float step_size = FLT_MAX;
-
-  for (int i = 0;; i++) {
-    VolumeStack entry = stack_read(i);
-    if (entry.shader == SHADER_NONE) {
-      break;
-    }
-
-    int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
-
-    bool heterogeneous = false;
-
-    if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
-      heterogeneous = true;
-    }
-    else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
-      /* We want to render world or objects without any volume grids
-       * as homogeneous, but can only verify this at run-time since other
-       * heterogeneous volume objects may be using the same shader. */
-      int object = entry.object;
-      if (object != OBJECT_NONE) {
-        int object_flag = kernel_tex_fetch(__object_flag, object);
-        if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
-          heterogeneous = true;
-        }
-      }
-    }
-
-    if (heterogeneous) {
-      float object_step_size = object_volume_step_size(kg, entry.object);
-      object_step_size *= kernel_data.integrator.volume_step_rate;
-      step_size = fminf(object_step_size, step_size);
-    }
-  }
-
-  return step_size;
-}
-
-typedef enum VolumeSampleMethod {
-  VOLUME_SAMPLE_NONE = 0,
-  VOLUME_SAMPLE_DISTANCE = (1 << 0),
-  VOLUME_SAMPLE_EQUIANGULAR = (1 << 1),
-  VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR),
-} VolumeSampleMethod;
-
-ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, IntegratorState state)
-{
-  VolumeSampleMethod method = VOLUME_SAMPLE_NONE;
-
-  for (int i = 0;; i++) {
-    VolumeStack entry = integrator_state_read_volume_stack(state, i);
-    if (entry.shader == SHADER_NONE) {
-      break;
-    }
-
-    int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
-
-    if (shader_flag & SD_VOLUME_MIS) {
-      /* Multiple importance sampling. */
-      return VOLUME_SAMPLE_MIS;
-    }
-    else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
-      /* Distance + equiangular sampling -> multiple importance sampling. */
-      if (method == VOLUME_SAMPLE_DISTANCE) {
-        return VOLUME_SAMPLE_MIS;
-      }
-
-      /* Only equiangular sampling. */
-      method = VOLUME_SAMPLE_EQUIANGULAR;
-    }
-    else {
-      /* Distance + equiangular sampling -> multiple importance sampling. */
-      if (method == VOLUME_SAMPLE_EQUIANGULAR) {
-        return VOLUME_SAMPLE_MIS;
-      }
-
-      /* Distance sampling only. */
-      method = VOLUME_SAMPLE_DISTANCE;
-    }
-  }
-
-  return method;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
new file mode 100644
index 00000000000..d5a9df9669b
--- /dev/null
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/camera/projection.h"
+
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shadow_catcher.h"
+
+#include "kernel/light/light.h"
+
+#include "kernel/util/differential.h"
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/bvh/bvh.h"
+
+CCL_NAMESPACE_BEGIN
+
+template<uint32_t current_kernel>
+ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
+                                                           IntegratorState state,
+                                                           const int shader_flags)
+{
+
+  /* Optional AO bounce termination.
+   * We continue evaluating emissive/transparent surfaces and volumes, similar
+   * to direct lighting. Only if we know there are none can we terminate the
+   * path immediately. */
+  if (path_state_ao_bounce(kg, state)) {
+    if (shader_flags & (SD_HAS_TRANSPARENT_SHADOW | SD_HAS_EMISSION)) {
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+    }
+    else if (!integrator_state_volume_stack_is_empty(kg, state)) {
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_AFTER_VOLUME;
+    }
+    else {
+      return true;
+    }
+  }
+
+  /* Load random number state. */
+  RNGState rng_state;
+  path_state_rng_load(state, &rng_state);
+
+  /* We perform path termination in this kernel to avoid launching shade_surface
+   * and evaluating the shader when not needed. Only for emission and transparent
+   * surfaces in front of emission do we need to evaluate the shader, since we
+   * perform MIS as part of indirect rays. */
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+  const float probability = path_state_continuation_probability(kg, state, path_flag);
+
+  if (probability != 1.0f) {
+    const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
+
+    if (probability == 0.0f || terminate >= probability) {
+      if (shader_flags & SD_HAS_EMISSION) {
+        /* Mark path to be terminated right after shader evaluation on the surface. */
+        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
+      }
+      else if (!integrator_state_volume_stack_is_empty(kg, state)) {
+        /* TODO: only do this for emissive volumes. */
+        INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_IN_NEXT_VOLUME;
+      }
+      else {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/* Note that current_kernel is a template value since making this a variable
+ * leads to poor performance with CUDA atomics. */
+template<uint32_t current_kernel>
+ccl_device_forceinline void integrator_intersect_shader_next_kernel(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const Intersection *ccl_restrict isect,
+    const int shader,
+    const int shader_flags)
+{
+  /* Note on scheduling.
+   *
+   * When there is no shadow catcher split the scheduling is simple: schedule surface shading with
+   * or without raytrace support, depending on the shader used.
+   *
+   * When there is a shadow catcher split the general idea is to have the following configuration:
+   *
+   *  - Schedule surface shading kernel (with corresponding raytrace support) for the ray which
+   *    will trace shadow catcher object.
+   *
+   *  - When no alpha-over of approximate shadow catcher is needed, schedule surface shading for
+   *    the matte ray.
+   *
+   *  - Otherwise schedule background shading kernel, so that we have a background to alpha-over
+   *    on. The background kernel will then schedule surface shading for the matte ray.
+   *
+   * Note that the splitting leaves kernel and sorting counters as-is, so use INIT semantic for
+   * the matte path. */
+
+  const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE);
+
+  if (use_raytrace_kernel) {
+    INTEGRATOR_PATH_NEXT_SORTED(
+        current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+  }
+  else {
+    INTEGRATOR_PATH_NEXT_SORTED(current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+  }
+
+#ifdef __SHADOW_CATCHER__
+  const int object_flags = intersection_get_object_flags(kg, isect);
+  if (kernel_shadow_catcher_split(kg, state, object_flags)) {
+    if (kernel_data.film.pass_background != PASS_UNUSED && !kernel_data.background.transparent) {
+      INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+
+      INTEGRATOR_PATH_INIT(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    }
+    else if (use_raytrace_kernel) {
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader);
+    }
+    else {
+      INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, shader);
+    }
+  }
+#endif
+}
+
+ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
+{
+  PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
+
+  /* Read ray from integrator state into local memory. */
+  Ray ray ccl_optional_struct_init;
+  integrator_state_read_ray(kg, state, &ray);
+  kernel_assert(ray.t != 0.0f);
+
+  const uint visibility = path_state_ray_visibility(state);
+  const int last_isect_prim = INTEGRATOR_STATE(state, isect, prim);
+  const int last_isect_object = INTEGRATOR_STATE(state, isect, object);
+
+  /* Trick to use short AO rays to approximate indirect light at the end of the path. */
+  if (path_state_ao_bounce(kg, state)) {
+    ray.t = kernel_data.integrator.ao_bounces_distance;
+
+    const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance;
+    if (object_ao_distance != 0.0f) {
+      ray.t = object_ao_distance;
+    }
+  }
+
+  /* Scene Intersection. */
+  Intersection isect ccl_optional_struct_init;
+  bool hit = scene_intersect(kg, &ray, visibility, &isect);
+
+  /* TODO: remove this and do it in the various intersection functions instead. */
+  if (!hit) {
+    isect.prim = PRIM_NONE;
+  }
+
+  /* Light intersection for MIS. */
+  if (kernel_data.integrator.use_lamp_mis) {
+    /* NOTE: if we make lights visible to camera rays, we'll need to initialize
+     * these in the path_state_init. */
+    const int last_type = INTEGRATOR_STATE(state, isect, type);
+    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+    hit = lights_intersect(
+              kg, &ray, &isect, last_isect_prim, last_isect_object, last_type, path_flag) ||
+          hit;
+  }
+
+  /* Write intersection result into global integrator state memory. */
+  integrator_state_write_isect(kg, state, &isect);
+
+#ifdef __VOLUME__
+  if (!integrator_state_volume_stack_is_empty(kg, state)) {
+    const bool hit_surface = hit && !(isect.type & PRIMITIVE_LAMP);
+    const int shader = (hit_surface) ? intersection_get_shader(kg, &isect) : SHADER_NONE;
+    const int flags = (hit_surface) ? kernel_tex_fetch(__shaders, shader).flags : 0;
+
+    if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
+            kg, state, flags)) {
+      /* Continue with volume kernel if we are inside a volume, regardless
+       * if we hit anything. */
+      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+    }
+    else {
+      INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+    }
+    return;
+  }
+#endif
+
+  if (hit) {
+    /* Hit a surface, continue with light or surface kernel. */
+    if (isect.type & PRIMITIVE_LAMP) {
+      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+      return;
+    }
+    else {
+      /* Hit a surface, continue with surface kernel unless terminated. */
+      const int shader = intersection_get_shader(kg, &isect);
+      const int flags = kernel_tex_fetch(__shaders, shader).flags;
+
+      if (!integrator_intersect_terminate<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
+              kg, state, flags)) {
+        integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
+            kg, state, &isect, shader, flags);
+        return;
+      }
+      else {
+        INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+        return;
+      }
+    }
+  }
+  else {
+    /* Nothing hit, continue with background kernel. */
+    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+                         DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+    return;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h
new file mode 100644
index 00000000000..90422445fad
--- /dev/null
+++ b/intern/cycles/kernel/integrator/intersect_shadow.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Visibility for the shadow ray. */
+ccl_device_forceinline uint integrate_intersect_shadow_visibility(KernelGlobals kg,
+                                                                  ConstIntegratorShadowState state)
+{
+  uint visibility = PATH_RAY_SHADOW;
+
+#ifdef __SHADOW_CATCHER__
+  const uint32_t path_flag = INTEGRATOR_STATE(state, shadow_path, flag);
+  visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
+#endif
+
+  return visibility;
+}
+
+ccl_device bool integrate_intersect_shadow_opaque(KernelGlobals kg,
+                                                  IntegratorShadowState state,
+                                                  ccl_private const Ray *ray,
+                                                  const uint visibility)
+{
+  /* Mask which will pick only opaque visibility bits from the `visibility`.
+   * Calculate the mask at compile time: the visibility will either be a high bits for the shadow
+   * catcher objects, or lower bits for the regular objects (there is no need to check the path
+   * state here again). */
+  constexpr const uint opaque_mask = SHADOW_CATCHER_VISIBILITY_SHIFT(PATH_RAY_SHADOW_OPAQUE) |
+                                     PATH_RAY_SHADOW_OPAQUE;
+
+  Intersection isect;
+  const bool opaque_hit = scene_intersect(kg, ray, visibility & opaque_mask, &isect);
+
+  if (!opaque_hit) {
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
+  }
+
+  return opaque_hit;
+}
+
+ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals kg,
+                                                                 ConstIntegratorShadowState state)
+{
+  const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+  const int transparent_bounce = INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
+
+  return max(transparent_max_bounce - transparent_bounce - 1, 0);
+}
+
+#ifdef __TRANSPARENT_SHADOWS__
+#  if defined(__KERNEL_CPU__)
+ccl_device int shadow_intersections_compare(const void *a, const void *b)
+{
+  const Intersection *isect_a = (const Intersection *)a;
+  const Intersection *isect_b = (const Intersection *)b;
+
+  if (isect_a->t < isect_b->t)
+    return -1;
+  else if (isect_a->t > isect_b->t)
+    return 1;
+  else
+    return 0;
+}
+#  endif
+
+ccl_device_inline void sort_shadow_intersections(IntegratorShadowState state, uint num_hits)
+{
+  kernel_assert(num_hits > 0);
+
+#  ifdef __KERNEL_GPU__
+  /* Use bubble sort which has more friendly memory pattern on GPU. */
+  bool swapped;
+  do {
+    swapped = false;
+    for (int j = 0; j < num_hits - 1; ++j) {
+      if (INTEGRATOR_STATE_ARRAY(state, shadow_isect, j, t) >
+          INTEGRATOR_STATE_ARRAY(state, shadow_isect, j + 1, t)) {
+        struct Intersection tmp_j ccl_optional_struct_init;
+        struct Intersection tmp_j_1 ccl_optional_struct_init;
+        integrator_state_read_shadow_isect(state, &tmp_j, j);
+        integrator_state_read_shadow_isect(state, &tmp_j_1, j + 1);
+        integrator_state_write_shadow_isect(state, &tmp_j_1, j);
+        integrator_state_write_shadow_isect(state, &tmp_j, j + 1);
+        swapped = true;
+      }
+    }
+    --num_hits;
+  } while (swapped);
+#  else
+  Intersection *isect_array = (Intersection *)state->shadow_isect;
+  qsort(isect_array, num_hits, sizeof(Intersection), shadow_intersections_compare);
+#  endif
+}
+
+ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg,
+                                                       IntegratorShadowState state,
+                                                       ccl_private const Ray *ray,
+                                                       const uint visibility)
+{
+  /* Limit the number hits to the max transparent bounces allowed and the size that we
+   * have available in the integrator state. */
+  const uint max_hits = integrate_shadow_max_transparent_hits(kg, state);
+  uint num_hits = 0;
+  float throughput = 1.0f;
+  bool opaque_hit = scene_intersect_shadow_all(
+      kg, state, ray, visibility, max_hits, &num_hits, &throughput);
+
+  /* Computed throughput from baked shadow transparency, where we can bypass recording
+   * intersections and shader evaluation. */
+  if (throughput != 1.0f) {
+    INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) *= throughput;
+  }
+
+  /* If number of hits exceed the transparent bounces limit, make opaque. */
+  if (num_hits > max_hits) {
+    opaque_hit = true;
+  }
+
+  if (!opaque_hit) {
+    const uint num_recorded_hits = min(num_hits, min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE));
+
+    if (num_recorded_hits > 0) {
+      sort_shadow_intersections(state, num_recorded_hits);
+    }
+
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = num_hits;
+  }
+  else {
+    INTEGRATOR_STATE_WRITE(state, shadow_path, num_hits) = 0;
+  }
+
+  return opaque_hit;
+}
+#endif
+
+ccl_device void integrator_intersect_shadow(KernelGlobals kg, IntegratorShadowState state)
+{
+  PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW);
+
+  /* Read ray from integrator state into local memory. */
+  Ray ray ccl_optional_struct_init;
+  integrator_state_read_shadow_ray(kg, state, &ray);
+
+  /* Compute visibility. */
+  const uint visibility = integrate_intersect_shadow_visibility(kg, state);
+
+#ifdef __TRANSPARENT_SHADOWS__
+  /* TODO: compile different kernels depending on this? Especially for OptiX
+   * conditional trace calls are bad. */
+  const bool opaque_hit = (kernel_data.integrator.transparent_shadows) ?
+                              integrate_intersect_shadow_transparent(kg, state, &ray, visibility) :
+                              integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
+#else
+  const bool opaque_hit = integrate_intersect_shadow_opaque(kg, state, &ray, visibility);
+#endif
+
+  if (opaque_hit) {
+    /* Hit an opaque surface, shadow path ends here. */
+    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+    return;
+  }
+  else {
+    /* Hit nothing or transparent surfaces, continue to shadow kernel
+     * for shading and render buffer output.
+     *
+     * TODO: could also write to render buffer directly if no transparent shadows?
+     * Could save a kernel execution for the common case. */
+    INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
+                                DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+    return;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/intersect_subsurface.h b/intern/cycles/kernel/integrator/intersect_subsurface.h
new file mode 100644
index 00000000000..27b8e1e5f5a
--- /dev/null
+++ b/intern/cycles/kernel/integrator/intersect_subsurface.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/subsurface.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void integrator_intersect_subsurface(KernelGlobals kg, IntegratorState state)
+{
+  PROFILING_INIT(kg, PROFILING_INTERSECT_SUBSURFACE);
+
+#ifdef __SUBSURFACE__
+  if (subsurface_scatter(kg, state)) {
+    return;
+  }
+#endif
+
+  INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h
new file mode 100644
index 00000000000..1c91318ff9c
--- /dev/null
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/bvh/bvh.h"
+#include "kernel/geom/geom.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/volume_stack.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
+                                                              IntegratorState state,
+                                                              const float3 from_P,
+                                                              const float3 to_P)
+{
+  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
+
+  ShaderDataTinyStorage stack_sd_storage;
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+
+  kernel_assert(kernel_data.integrator.use_volumes);
+
+  Ray volume_ray ccl_optional_struct_init;
+  volume_ray.P = from_P;
+  volume_ray.D = normalize_len(to_P - from_P, &volume_ray.t);
+
+  /* Store to avoid global fetches on every intersection step. */
+  const uint volume_stack_size = kernel_data.volume_stack_size;
+
+#ifdef __VOLUME_RECORD_ALL__
+  Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
+  uint num_hits = scene_intersect_volume_all(
+      kg, &volume_ray, hits, 2 * volume_stack_size, PATH_RAY_ALL_VISIBILITY);
+  if (num_hits > 0) {
+    Intersection *isect = hits;
+
+    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+      shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
+      volume_stack_enter_exit(kg, state, stack_sd);
+    }
+  }
+#else
+  Intersection isect;
+  int step = 0;
+  while (step < 2 * volume_stack_size &&
+         scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) {
+    shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
+    volume_stack_enter_exit(kg, state, stack_sd);
+
+    /* Move ray forward. */
+    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    if (volume_ray.t != FLT_MAX) {
+      volume_ray.D = normalize_len(to_P - volume_ray.P, &volume_ray.t);
+    }
+    ++step;
+  }
+#endif
+}
+
+ccl_device void integrator_intersect_volume_stack(KernelGlobals kg, IntegratorState state)
+{
+  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_STACK);
+
+  ShaderDataTinyStorage stack_sd_storage;
+  ccl_private ShaderData *stack_sd = AS_SHADER_DATA(&stack_sd_storage);
+
+  Ray volume_ray ccl_optional_struct_init;
+  integrator_state_read_ray(kg, state, &volume_ray);
+  volume_ray.t = FLT_MAX;
+
+  const uint visibility = (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_ALL_VISIBILITY);
+  int stack_index = 0, enclosed_index = 0;
+
+  /* Write background shader. */
+  if (kernel_data.background.volume_shader != SHADER_NONE) {
+    const VolumeStack new_entry = {OBJECT_NONE, kernel_data.background.volume_shader};
+    integrator_state_write_volume_stack(state, stack_index, new_entry);
+    stack_index++;
+  }
+
+  /* Store to avoid global fetches on every intersection step. */
+  const uint volume_stack_size = kernel_data.volume_stack_size;
+
+#ifdef __VOLUME_RECORD_ALL__
+  Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
+  uint num_hits = scene_intersect_volume_all(
+      kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+  if (num_hits > 0) {
+    int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
+    Intersection *isect = hits;
+
+    qsort(hits, num_hits, sizeof(Intersection), intersections_compare);
+
+    for (uint hit = 0; hit < num_hits; ++hit, ++isect) {
+      shader_setup_from_ray(kg, stack_sd, &volume_ray, isect);
+      if (stack_sd->flag & SD_BACKFACING) {
+        bool need_add = true;
+        for (int i = 0; i < enclosed_index && need_add; ++i) {
+          /* If ray exited the volume and never entered to that volume
+           * it means that camera is inside such a volume.
+           */
+          if (enclosed_volumes[i] == stack_sd->object) {
+            need_add = false;
+          }
+        }
+        for (int i = 0; i < stack_index && need_add; ++i) {
+          /* Don't add intersections twice. */
+          VolumeStack entry = integrator_state_read_volume_stack(state, i);
+          if (entry.object == stack_sd->object) {
+            need_add = false;
+            break;
+          }
+        }
+        if (need_add && stack_index < volume_stack_size - 1) {
+          const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
+          integrator_state_write_volume_stack(state, stack_index, new_entry);
+          ++stack_index;
+        }
+      }
+      else {
+        /* If ray from camera enters the volume, this volume shouldn't
+         * be added to the stack on exit.
+         */
+        enclosed_volumes[enclosed_index++] = stack_sd->object;
+      }
+    }
+  }
+#else
+  /* CUDA does not support definition of a variable size arrays, so use the maximum possible. */
+  int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
+  int step = 0;
+
+  while (stack_index < volume_stack_size - 1 && enclosed_index < volume_stack_size - 1 &&
+         step < 2 * volume_stack_size) {
+    Intersection isect;
+    if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) {
+      break;
+    }
+
+    shader_setup_from_ray(kg, stack_sd, &volume_ray, &isect);
+    if (stack_sd->flag & SD_BACKFACING) {
+      /* If ray exited the volume and never entered to that volume
+       * it means that camera is inside such a volume.
+       */
+      bool need_add = true;
+      for (int i = 0; i < enclosed_index && need_add; ++i) {
+        /* If ray exited the volume and never entered to that volume
+         * it means that camera is inside such a volume.
+         */
+        if (enclosed_volumes[i] == stack_sd->object) {
+          need_add = false;
+        }
+      }
+      for (int i = 0; i < stack_index && need_add; ++i) {
+        /* Don't add intersections twice. */
+        VolumeStack entry = integrator_state_read_volume_stack(state, i);
+        if (entry.object == stack_sd->object) {
+          need_add = false;
+          break;
+        }
+      }
+      if (need_add) {
+        const VolumeStack new_entry = {stack_sd->object, stack_sd->shader};
+        integrator_state_write_volume_stack(state, stack_index, new_entry);
+        ++stack_index;
+      }
+    }
+    else {
+      /* If ray from camera enters the volume, this volume shouldn't
+       * be added to the stack on exit.
+       */
+      enclosed_volumes[enclosed_index++] = stack_sd->object;
+    }
+
+    /* Move ray forward. */
+    volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng);
+    ++step;
+  }
+#endif
+
+  /* Write terminator. */
+  const VolumeStack new_entry = {OBJECT_NONE, SHADER_NONE};
+  integrator_state_write_volume_stack(state, stack_index, new_entry);
+
+  INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
+                       DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h
new file mode 100644
index 00000000000..d8cc794dc7a
--- /dev/null
+++ b/intern/cycles/kernel/integrator/megakernel.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/init_from_camera.h"
+#include "kernel/integrator/intersect_closest.h"
+#include "kernel/integrator/intersect_shadow.h"
+#include "kernel/integrator/intersect_subsurface.h"
+#include "kernel/integrator/intersect_volume_stack.h"
+#include "kernel/integrator/shade_background.h"
+#include "kernel/integrator/shade_light.h"
+#include "kernel/integrator/shade_shadow.h"
+#include "kernel/integrator/shade_surface.h"
+#include "kernel/integrator/shade_volume.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void integrator_megakernel(KernelGlobals kg,
+                                      IntegratorState state,
+                                      ccl_global float *ccl_restrict render_buffer)
+{
+  /* Each kernel indicates the next kernel to execute, so here we simply
+   * have to check what that kernel is and execute it. */
+  while (true) {
+    /* Handle any shadow paths before we potentially create more shadow paths. */
+    const uint32_t shadow_queued_kernel = INTEGRATOR_STATE(
+        &state->shadow, shadow_path, queued_kernel);
+    if (shadow_queued_kernel) {
+      switch (shadow_queued_kernel) {
+        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
+          integrator_intersect_shadow(kg, &state->shadow);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
+          integrator_shade_shadow(kg, &state->shadow, render_buffer);
+          break;
+        default:
+          kernel_assert(0);
+          break;
+      }
+      continue;
+    }
+
+    /* Handle any AO paths before we potentially create more AO paths. */
+    const uint32_t ao_queued_kernel = INTEGRATOR_STATE(&state->ao, shadow_path, queued_kernel);
+    if (ao_queued_kernel) {
+      switch (ao_queued_kernel) {
+        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
+          integrator_intersect_shadow(kg, &state->ao);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
+          integrator_shade_shadow(kg, &state->ao, render_buffer);
+          break;
+        default:
+          kernel_assert(0);
+          break;
+      }
+      continue;
+    }
+
+    /* Then handle regular path kernels. */
+    const uint32_t queued_kernel = INTEGRATOR_STATE(state, path, queued_kernel);
+    if (queued_kernel) {
+      switch (queued_kernel) {
+        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
+          integrator_intersect_closest(kg, state);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
+          integrator_shade_background(kg, state, render_buffer);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
+          integrator_shade_surface(kg, state, render_buffer);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
+          integrator_shade_volume(kg, state, render_buffer);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+          integrator_shade_surface_raytrace(kg, state, render_buffer);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
+          integrator_shade_light(kg, state, render_buffer);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
+          integrator_intersect_subsurface(kg, state);
+          break;
+        case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
+          integrator_intersect_volume_stack(kg, state);
+          break;
+        default:
+          kernel_assert(0);
+          break;
+      }
+      continue;
+    }
+
+    break;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h
new file mode 100644
index 00000000000..8311b97dedb
--- /dev/null
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/sample/pattern.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Initialize queues, so that the this path is considered terminated.
+ * Used for early outputs in the camera ray initialization, as well as initialization of split
+ * states for shadow catcher. */
+ccl_device_inline void path_state_init_queues(IntegratorState state)
+{
+  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+#ifdef __KERNEL_CPU__
+  INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
+  INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
+#endif
+}
+
+/* Minimalistic initialization of the path state, which is needed for early outputs in the
+ * integrator initialization to work. */
+ccl_device_inline void path_state_init(IntegratorState state,
+                                       ccl_global const KernelWorkTile *ccl_restrict tile,
+                                       const int x,
+                                       const int y)
+{
+  const uint render_pixel_index = (uint)tile->offset + x + y * tile->stride;
+
+  INTEGRATOR_STATE_WRITE(state, path, render_pixel_index) = render_pixel_index;
+
+  path_state_init_queues(state);
+}
+
+/* Initialize the rest of the path state needed to continue the path integration. */
+ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
+                                                  IntegratorState state,
+                                                  const int sample,
+                                                  const uint rng_hash)
+{
+  INTEGRATOR_STATE_WRITE(state, path, sample) = sample;
+  INTEGRATOR_STATE_WRITE(state, path, bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0;
+  INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash;
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM;
+  INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
+                                              PATH_RAY_TRANSPARENT_BACKGROUND;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, object) = OBJECT_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(
+        state, volume_stack, 0, shader) = kernel_data.background.volume_shader;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, object) = OBJECT_NONE;
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
+  }
+
+#ifdef __DENOISING_FEATURES__
+  if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) {
+    INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES;
+    INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3();
+  }
+#endif
+}
+
+ccl_device_inline void path_state_next(KernelGlobals kg, IntegratorState state, int label)
+{
+  uint32_t flag = INTEGRATOR_STATE(state, path, flag);
+
+  /* ray through transparent keeps same flags from previous ray and is
+   * not counted as a regular bounce, transparent has separate max */
+  if (label & LABEL_TRANSPARENT) {
+    uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
+
+    flag |= PATH_RAY_TRANSPARENT;
+    if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
+      flag |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
+    }
+
+    if (!kernel_data.integrator.transparent_shadows)
+      flag |= PATH_RAY_MIS_SKIP;
+
+    INTEGRATOR_STATE_WRITE(state, path, flag) = flag;
+    INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
+    /* Random number generator next bounce. */
+    INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
+    return;
+  }
+
+  uint32_t bounce = INTEGRATOR_STATE(state, path, bounce) + 1;
+  if (bounce >= kernel_data.integrator.max_bounce) {
+    flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+  }
+
+  flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP);
+
+#ifdef __VOLUME__
+  if (label & LABEL_VOLUME_SCATTER) {
+    /* volume scatter */
+    flag |= PATH_RAY_VOLUME_SCATTER;
+    flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+    if (bounce == 1) {
+      flag |= PATH_RAY_VOLUME_PASS;
+    }
+
+    const int volume_bounce = INTEGRATOR_STATE(state, path, volume_bounce) + 1;
+    INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = volume_bounce;
+    if (volume_bounce >= kernel_data.integrator.max_volume_bounce) {
+      flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+    }
+  }
+  else
+#endif
+  {
+    /* surface reflection/transmission */
+    if (label & LABEL_REFLECT) {
+      flag |= PATH_RAY_REFLECT;
+      flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+
+      if (label & LABEL_DIFFUSE) {
+        const int diffuse_bounce = INTEGRATOR_STATE(state, path, diffuse_bounce) + 1;
+        INTEGRATOR_STATE_WRITE(state, path, diffuse_bounce) = diffuse_bounce;
+        if (diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) {
+          flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+        }
+      }
+      else {
+        const int glossy_bounce = INTEGRATOR_STATE(state, path, glossy_bounce) + 1;
+        INTEGRATOR_STATE_WRITE(state, path, glossy_bounce) = glossy_bounce;
+        if (glossy_bounce >= kernel_data.integrator.max_glossy_bounce) {
+          flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+        }
+      }
+    }
+    else {
+      kernel_assert(label & LABEL_TRANSMIT);
+
+      flag |= PATH_RAY_TRANSMIT;
+
+      if (!(label & LABEL_TRANSMIT_TRANSPARENT)) {
+        flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND;
+      }
+
+      const int transmission_bounce = INTEGRATOR_STATE(state, path, transmission_bounce) + 1;
+      INTEGRATOR_STATE_WRITE(state, path, transmission_bounce) = transmission_bounce;
+      if (transmission_bounce >= kernel_data.integrator.max_transmission_bounce) {
+        flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT;
+      }
+    }
+
+    /* diffuse/glossy/singular */
+    if (label & LABEL_DIFFUSE) {
+      flag |= PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR;
+    }
+    else if (label & LABEL_GLOSSY) {
+      flag |= PATH_RAY_GLOSSY;
+    }
+    else {
+      kernel_assert(label & LABEL_SINGULAR);
+      flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP;
+    }
+
+    /* Render pass categories. */
+    if (bounce == 1) {
+      flag |= (label & LABEL_TRANSMIT) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
+    }
+  }
+
+  INTEGRATOR_STATE_WRITE(state, path, flag) = flag;
+  INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce;
+
+  /* Random number generator next bounce. */
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
+}
+
+#ifdef __VOLUME__
+ccl_device_inline bool path_state_volume_next(IntegratorState state)
+{
+  /* For volume bounding meshes we pass through without counting transparent
+   * bounces, only sanity check in case self intersection gets us stuck. */
+  uint32_t volume_bounds_bounce = INTEGRATOR_STATE(state, path, volume_bounds_bounce) + 1;
+  INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = volume_bounds_bounce;
+  if (volume_bounds_bounce > VOLUME_BOUNDS_MAX) {
+    return false;
+  }
+
+  /* Random number generator next bounce. */
+  if (volume_bounds_bounce > 1) {
+    INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
+  }
+
+  return true;
+}
+#endif
+
+ccl_device_inline uint path_state_ray_visibility(ConstIntegratorState state)
+{
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  uint32_t visibility = path_flag & PATH_RAY_ALL_VISIBILITY;
+
+  /* For visibility, diffuse/glossy are for reflection only. */
+  if (visibility & PATH_RAY_TRANSMIT) {
+    visibility &= ~(PATH_RAY_DIFFUSE | PATH_RAY_GLOSSY);
+  }
+
+  /* todo: this is not supported as its own ray visibility yet. */
+  if (path_flag & PATH_RAY_VOLUME_SCATTER) {
+    visibility |= PATH_RAY_DIFFUSE;
+  }
+
+  visibility = SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility);
+
+  return visibility;
+}
+
+ccl_device_inline float path_state_continuation_probability(KernelGlobals kg,
+                                                            ConstIntegratorState state,
+                                                            const uint32_t path_flag)
+{
+  if (path_flag & PATH_RAY_TRANSPARENT) {
+    const uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+    /* Do at least specified number of bounces without RR. */
+    if (transparent_bounce <= kernel_data.integrator.transparent_min_bounce) {
+      return 1.0f;
+    }
+  }
+  else {
+    const uint32_t bounce = INTEGRATOR_STATE(state, path, bounce);
+    /* Do at least specified number of bounces without RR. */
+    if (bounce <= kernel_data.integrator.min_bounce) {
+      return 1.0f;
+    }
+  }
+
+  /* Probabilistic termination: use sqrt() to roughly match typical view
+   * transform and do path termination a bit later on average. */
+  return min(sqrtf(max3(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f);
+}
+
+ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state)
+{
+  if (!kernel_data.integrator.ao_bounces) {
+    return false;
+  }
+
+  const int bounce = INTEGRATOR_STATE(state, path, bounce) -
+                     INTEGRATOR_STATE(state, path, transmission_bounce) -
+                     (INTEGRATOR_STATE(state, path, glossy_bounce) > 0) + 1;
+  return (bounce > kernel_data.integrator.ao_bounces);
+}
+
+/* Random Number Sampling Utility Functions
+ *
+ * For each random number in each step of the path we must have a unique
+ * dimension to avoid using the same sequence twice.
+ *
+ * For branches in the path we must be careful not to reuse the same number
+ * in a sequence and offset accordingly.
+ */
+
+/* RNG State loaded onto stack. */
+typedef struct RNGState {
+  uint rng_hash;
+  uint rng_offset;
+  int sample;
+} RNGState;
+
+ccl_device_inline void path_state_rng_load(ConstIntegratorState state,
+                                           ccl_private RNGState *rng_state)
+{
+  rng_state->rng_hash = INTEGRATOR_STATE(state, path, rng_hash);
+  rng_state->rng_offset = INTEGRATOR_STATE(state, path, rng_offset);
+  rng_state->sample = INTEGRATOR_STATE(state, path, sample);
+}
+
+ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState state,
+                                                  ccl_private RNGState *rng_state)
+{
+  rng_state->rng_hash = INTEGRATOR_STATE(state, shadow_path, rng_hash);
+  rng_state->rng_offset = INTEGRATOR_STATE(state, shadow_path, rng_offset);
+  rng_state->sample = INTEGRATOR_STATE(state, shadow_path, sample);
+}
+
+ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
+                                          ccl_private const RNGState *rng_state,
+                                          int dimension)
+{
+  return path_rng_1D(
+      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
+}
+
+ccl_device_inline void path_state_rng_2D(KernelGlobals kg,
+                                         ccl_private const RNGState *rng_state,
+                                         int dimension,
+                                         ccl_private float *fx,
+                                         ccl_private float *fy)
+{
+  path_rng_2D(
+      kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
+}
+
+ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg,
+                                               ccl_private const RNGState *rng_state,
+                                               uint hash)
+{
+  /* Use a hash instead of dimension, this is not great but avoids adding
+   * more dimensions to each bounce which reduces quality of dimensions we
+   * are already using. */
+  return path_rng_1D(
+      kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
+}
+
+ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
+                                             ccl_private const RNGState *rng_state,
+                                             int branch,
+                                             int num_branches,
+                                             int dimension)
+{
+  return path_rng_1D(kg,
+                     rng_state->rng_hash,
+                     rng_state->sample * num_branches + branch,
+                     rng_state->rng_offset + dimension);
+}
+
+ccl_device_inline void path_branched_rng_2D(KernelGlobals kg,
+                                            ccl_private const RNGState *rng_state,
+                                            int branch,
+                                            int num_branches,
+                                            int dimension,
+                                            ccl_private float *fx,
+                                            ccl_private float *fy)
+{
+  path_rng_2D(kg,
+              rng_state->rng_hash,
+              rng_state->sample * num_branches + branch,
+              rng_state->rng_offset + dimension,
+              fx,
+              fy);
+}
+
+/* Utility functions to get light termination value,
+ * since it might not be needed in many cases.
+ */
+ccl_device_inline float path_state_rng_light_termination(KernelGlobals kg,
+                                                         ccl_private const RNGState *state)
+{
+  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+    return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE);
+  }
+  return 0.0f;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h
new file mode 100644
index 00000000000..71a590749bd
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/accumulate.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/light/light.h"
+#include "kernel/light/sample.h"
+#include "kernel/sample/mis.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
+                                                    IntegratorState state,
+                                                    ccl_global float *ccl_restrict render_buffer)
+{
+#ifdef __BACKGROUND__
+  const int shader = kernel_data.background.surface_shader;
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  /* Use visibility flag to skip lights. */
+  if (shader & SHADER_EXCLUDE_ANY) {
+    if (((shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
+        ((shader & SHADER_EXCLUDE_GLOSSY) && ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+                                              (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+        ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
+        ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
+        ((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
+      return zero_float3();
+  }
+
+  /* Use fast constant background color if available. */
+  float3 L = zero_float3();
+  if (!shader_constant_emission_eval(kg, shader, &L)) {
+    /* Evaluate background shader. */
+
+    /* TODO: does aliasing like this break automatic SoA in CUDA?
+     * Should we instead store closures separate from ShaderData? */
+    ShaderDataTinyStorage emission_sd_storage;
+    ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
+    PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
+    shader_setup_from_background(kg,
+                                 emission_sd,
+                                 INTEGRATOR_STATE(state, ray, P),
+                                 INTEGRATOR_STATE(state, ray, D),
+                                 INTEGRATOR_STATE(state, ray, time));
+
+    PROFILING_SHADER(emission_sd->object, emission_sd->shader);
+    PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
+    shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
+        kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
+
+    L = shader_background_eval(emission_sd);
+  }
+
+  /* Background MIS weights. */
+#  ifdef __BACKGROUND_MIS__
+  /* Check if background light exists or if we should skip pdf. */
+  if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
+      kernel_data.background.use_mis) {
+    const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
+
+    /* multiple importance sampling, get background light pdf for ray
+     * direction, and compute weight with respect to BSDF pdf */
+    const float pdf = background_light_pdf(kg, ray_P - ray_D * mis_ray_t, ray_D);
+    const float mis_weight = power_heuristic(mis_ray_pdf, pdf);
+
+    L *= mis_weight;
+  }
+#  endif
+
+  return L;
+#else
+  return make_float3(0.8f, 0.8f, 0.8f);
+#endif
+}
+
+ccl_device_inline void integrate_background(KernelGlobals kg,
+                                            IntegratorState state,
+                                            ccl_global float *ccl_restrict render_buffer)
+{
+  /* Accumulate transparency for transparent background. We can skip background
+   * shader evaluation unless a background pass is used. */
+  bool eval_background = true;
+  float transparent = 0.0f;
+
+  const bool is_transparent_background_ray = kernel_data.background.transparent &&
+                                             (INTEGRATOR_STATE(state, path, flag) &
+                                              PATH_RAY_TRANSPARENT_BACKGROUND);
+
+  if (is_transparent_background_ray) {
+    transparent = average(INTEGRATOR_STATE(state, path, throughput));
+
+#ifdef __PASSES__
+    eval_background = (kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND));
+#else
+    eval_background = false;
+#endif
+  }
+
+  /* Evaluate background shader. */
+  float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
+                                 zero_float3();
+
+  /* When using the ao bounces approximation, adjust background
+   * shader intensity with ao factor. */
+  if (path_state_ao_bounce(kg, state)) {
+    L *= kernel_data.integrator.ao_bounces_factor;
+  }
+
+  /* Write to render buffer. */
+  kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
+}
+
+ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
+                                                IntegratorState state,
+                                                ccl_global float *ccl_restrict render_buffer)
+{
+  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+  const float ray_time = INTEGRATOR_STATE(state, ray, time);
+  LightSample ls ccl_optional_struct_init;
+  for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+    if (light_sample_from_distant_ray(kg, ray_D, lamp, &ls)) {
+      /* Use visibility flag to skip lights. */
+#ifdef __PASSES__
+      const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+      if (ls.shader & SHADER_EXCLUDE_ANY) {
+        if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
+            ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+             ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+              (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+            ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
+            ((ls.shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
+            ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
+          return;
+      }
+#endif
+
+      /* Evaluate light shader. */
+      /* TODO: does aliasing like this break automatic SoA in CUDA? */
+      ShaderDataTinyStorage emission_sd_storage;
+      ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+      float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
+      if (is_zero(light_eval)) {
+        return;
+      }
+
+      /* MIS weighting. */
+      if (!(path_flag & PATH_RAY_MIS_SKIP)) {
+        /* multiple importance sampling, get regular light pdf,
+         * and compute weight with respect to BSDF pdf */
+        const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+        const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
+        light_eval *= mis_weight;
+      }
+
+      /* Write to render buffer. */
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+      kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
+    }
+  }
+}
+
+ccl_device void integrator_shade_background(KernelGlobals kg,
+                                            IntegratorState state,
+                                            ccl_global float *ccl_restrict render_buffer)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
+
+  /* TODO: unify these in a single loop to only have a single shader evaluation call. */
+  integrate_distant_lights(kg, state, render_buffer);
+  integrate_background(kg, state, render_buffer);
+
+#ifdef __SHADOW_CATCHER__
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SHADOW_CATCHER_BACKGROUND) {
+    INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND;
+
+    const int isect_prim = INTEGRATOR_STATE(state, isect, prim);
+    const int isect_type = INTEGRATOR_STATE(state, isect, type);
+    const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type);
+    const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
+
+    if (shader_flags & SD_HAS_RAYTRACE) {
+      INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
+                                  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
+                                  shader);
+    }
+    else {
+      INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
+                                  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
+                                  shader);
+    }
+    return;
+  }
+#endif
+
+  INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h
new file mode 100644
index 00000000000..7dad3b4e49d
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/accumulate.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/light/light.h"
+#include "kernel/light/sample.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void integrate_light(KernelGlobals kg,
+                                       IntegratorState state,
+                                       ccl_global float *ccl_restrict render_buffer)
+{
+  /* Setup light sample. */
+  Intersection isect ccl_optional_struct_init;
+  integrator_state_read_isect(kg, state, &isect);
+
+  float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+  const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+  const float ray_time = INTEGRATOR_STATE(state, ray, time);
+
+  /* Advance ray beyond light. */
+  /* TODO: can we make this more numerically robust to avoid reintersecting the
+   * same light in some cases? */
+  const float3 new_ray_P = ray_offset(ray_P + ray_D * isect.t, ray_D);
+  INTEGRATOR_STATE_WRITE(state, ray, P) = new_ray_P;
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= isect.t;
+
+  /* Set position to where the BSDF was sampled, for correct MIS PDF. */
+  const float mis_ray_t = INTEGRATOR_STATE(state, path, mis_ray_t);
+  ray_P -= ray_D * mis_ray_t;
+  isect.t += mis_ray_t;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = mis_ray_t + isect.t;
+
+  LightSample ls ccl_optional_struct_init;
+  const bool use_light_sample = light_sample_from_intersection(kg, &isect, ray_P, ray_D, &ls);
+
+  if (!use_light_sample) {
+    return;
+  }
+
+  /* Use visibility flag to skip lights. */
+#ifdef __PASSES__
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  if (ls.shader & SHADER_EXCLUDE_ANY) {
+    if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
+        ((ls.shader & SHADER_EXCLUDE_GLOSSY) &&
+         ((path_flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) ==
+          (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) ||
+        ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
+        ((ls.shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
+      return;
+  }
+#endif
+
+  /* Evaluate light shader. */
+  /* TODO: does aliasing like this break automatic SoA in CUDA? */
+  ShaderDataTinyStorage emission_sd_storage;
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
+  if (is_zero(light_eval)) {
+    return;
+  }
+
+  /* MIS weighting. */
+  if (!(path_flag & PATH_RAY_MIS_SKIP)) {
+    /* multiple importance sampling, get regular light pdf,
+     * and compute weight with respect to BSDF pdf */
+    const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float mis_weight = power_heuristic(mis_ray_pdf, ls.pdf);
+    light_eval *= mis_weight;
+  }
+
+  /* Write to render buffer. */
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_accum_emission(kg, state, throughput, light_eval, render_buffer);
+}
+
+ccl_device void integrator_shade_light(KernelGlobals kg,
+                                       IntegratorState state,
+                                       ccl_global float *ccl_restrict render_buffer)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_LIGHT_SETUP);
+
+  integrate_light(kg, state, render_buffer);
+
+  /* TODO: we could get stuck in an infinite loop if there are precision issues
+   * and the same light is hit again.
+   *
+   * As a workaround count this as a transparent bounce. It makes some sense
+   * to interpret lights as transparent surfaces (and support making them opaque),
+   * but this needs to be revisited. */
+  uint32_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce) + 1;
+  INTEGRATOR_STATE_WRITE(state, path, transparent_bounce) = transparent_bounce;
+
+  if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
+    INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+    return;
+  }
+  else {
+    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
+                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+    return;
+  }
+
+  /* TODO: in some cases we could continue directly to SHADE_BACKGROUND, but
+   * probably that optimization is probably not practical if we add lights to
+   * scene geometry. */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h
new file mode 100644
index 00000000000..1de890aae29
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/shade_volume.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/volume_stack.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits)
+{
+  return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
+}
+
+#ifdef __TRANSPARENT_SHADOWS__
+ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
+                                                              IntegratorShadowState state,
+                                                              const int hit)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
+
+  /* TODO: does aliasing like this break automatic SoA in CUDA?
+   * Should we instead store closures separate from ShaderData?
+   *
+   * TODO: is it better to declare this outside the loop or keep it local
+   * so the compiler can see there is no dependency between iterations? */
+  ShaderDataTinyStorage shadow_sd_storage;
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+
+  /* Setup shader data at surface. */
+  Intersection isect ccl_optional_struct_init;
+  integrator_state_read_shadow_isect(state, &isect, hit);
+
+  Ray ray ccl_optional_struct_init;
+  integrator_state_read_shadow_ray(kg, state, &ray);
+
+  shader_setup_from_ray(kg, shadow_sd, &ray, &isect);
+
+  /* Evaluate shader. */
+  if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
+    shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+        kg, state, shadow_sd, NULL, PATH_RAY_SHADOW);
+  }
+
+#  ifdef __VOLUME__
+  /* Exit/enter volume. */
+  shadow_volume_stack_enter_exit(kg, state, shadow_sd);
+#  endif
+
+  /* Compute transparency from closures. */
+  return shader_bsdf_transparency(kg, shadow_sd);
+}
+
+#  ifdef __VOLUME__
+ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
+                                                           IntegratorShadowState state,
+                                                           const int hit,
+                                                           const int num_recorded_hits,
+                                                           ccl_private float3 *ccl_restrict
+                                                               throughput)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
+
+  /* TODO: deduplicate with surface, or does it not matter for memory usage? */
+  ShaderDataTinyStorage shadow_sd_storage;
+  ccl_private ShaderData *shadow_sd = AS_SHADER_DATA(&shadow_sd_storage);
+
+  /* Setup shader data. */
+  Ray ray ccl_optional_struct_init;
+  integrator_state_read_shadow_ray(kg, state, &ray);
+
+  /* Modify ray position and length to match current segment. */
+  const float start_t = (hit == 0) ? 0.0f :
+                                     INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit - 1, t);
+  const float end_t = (hit < num_recorded_hits) ?
+                          INTEGRATOR_STATE_ARRAY(state, shadow_isect, hit, t) :
+                          ray.t;
+  ray.P += start_t * ray.D;
+  ray.t = end_t - start_t;
+
+  shader_setup_from_volume(kg, shadow_sd, &ray);
+
+  const float step_size = volume_stack_step_size(
+      kg, [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); });
+
+  volume_shadow_heterogeneous(kg, state, &ray, shadow_sd, throughput, step_size);
+}
+#  endif
+
+ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
+                                                    IntegratorShadowState state,
+                                                    const uint num_hits)
+{
+  /* Accumulate shadow for transparent surfaces. */
+  const uint num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+
+  for (uint hit = 0; hit < num_recorded_hits + 1; hit++) {
+    /* Volume shaders. */
+    if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
+#  ifdef __VOLUME__
+      if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) {
+        float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+        integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput);
+        if (is_zero(throughput)) {
+          return true;
+        }
+
+        INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
+      }
+#  endif
+    }
+
+    /* Surface shaders. */
+    if (hit < num_recorded_hits) {
+      const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit);
+      const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
+      if (is_zero(throughput)) {
+        return true;
+      }
+
+      INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) = throughput;
+      INTEGRATOR_STATE_WRITE(state, shadow_path, transparent_bounce) += 1;
+      INTEGRATOR_STATE_WRITE(state, shadow_path, rng_offset) += PRNG_BOUNCE_NUM;
+    }
+
+    /* Note we do not need to check max_transparent_bounce here, the number
+     * of intersections is already limited and made opaque in the
+     * INTERSECT_SHADOW kernel. */
+  }
+
+  if (shadow_intersections_has_remaining(num_hits)) {
+    /* There are more hits that we could not recorded due to memory usage,
+     * adjust ray to intersect again from the last hit. */
+    const float last_hit_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, num_recorded_hits - 1, t);
+    const float3 ray_P = INTEGRATOR_STATE(state, shadow_ray, P);
+    const float3 ray_D = INTEGRATOR_STATE(state, shadow_ray, D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray_offset(ray_P + last_hit_t * ray_D, ray_D);
+    INTEGRATOR_STATE_WRITE(state, shadow_ray, t) -= last_hit_t;
+  }
+
+  return false;
+}
+#endif /* __TRANSPARENT_SHADOWS__ */
+
+ccl_device void integrator_shade_shadow(KernelGlobals kg,
+                                        IntegratorShadowState state,
+                                        ccl_global float *ccl_restrict render_buffer)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
+  const uint num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
+
+#ifdef __TRANSPARENT_SHADOWS__
+  /* Evaluate transparent shadows. */
+  const bool opaque = integrate_transparent_shadow(kg, state, num_hits);
+  if (opaque) {
+    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+    return;
+  }
+#endif
+
+  if (shadow_intersections_has_remaining(num_hits)) {
+    /* More intersections to find, continue shadow ray. */
+    INTEGRATOR_SHADOW_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
+                                DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW);
+    return;
+  }
+  else {
+    kernel_accum_light(kg, state, render_buffer);
+    INTEGRATOR_SHADOW_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
+    return;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
new file mode 100644
index 00000000000..cce591eb219
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/accumulate.h"
+#include "kernel/film/passes.h"
+
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/subsurface.h"
+#include "kernel/integrator/volume_stack.h"
+
+#include "kernel/light/light.h"
+#include "kernel/light/sample.h"
+
+#include "kernel/sample/mis.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
+                                                           ConstIntegratorState state,
+                                                           ccl_private ShaderData *sd)
+{
+  Intersection isect ccl_optional_struct_init;
+  integrator_state_read_isect(kg, state, &isect);
+
+  Ray ray ccl_optional_struct_init;
+  integrator_state_read_ray(kg, state, &ray);
+
+  shader_setup_from_ray(kg, sd, &ray, &isect);
+}
+
+#ifdef __HOLDOUT__
+ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
+                                                      ConstIntegratorState state,
+                                                      ccl_private ShaderData *sd,
+                                                      ccl_global float *ccl_restrict render_buffer)
+{
+  /* Write holdout transparency to render buffer and stop if fully holdout. */
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+      (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+    const float3 holdout_weight = shader_holdout_apply(kg, sd);
+    if (kernel_data.background.transparent) {
+      const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+      const float transparent = average(holdout_weight * throughput);
+      kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer);
+    }
+    if (isequal_float3(holdout_weight, one_float3())) {
+      return false;
+    }
+  }
+
+  return true;
+}
+#endif /* __HOLDOUT__ */
+
+#ifdef __EMISSION__
+ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
+                                                       ConstIntegratorState state,
+                                                       ccl_private const ShaderData *sd,
+                                                       ccl_global float *ccl_restrict
+                                                           render_buffer)
+{
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  /* Evaluate emissive closure. */
+  float3 L = shader_emissive_eval(sd);
+
+#  ifdef __HAIR__
+  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
+      (sd->type & PRIMITIVE_ALL_TRIANGLE))
+#  else
+  if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
+#  endif
+  {
+    const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+    const float t = sd->ray_length + INTEGRATOR_STATE(state, path, mis_ray_t);
+
+    /* Multiple importance sampling, get triangle light pdf,
+     * and compute weight with respect to BSDF pdf. */
+    float pdf = triangle_light_pdf(kg, sd, t);
+    float mis_weight = power_heuristic(bsdf_pdf, pdf);
+
+    L *= mis_weight;
+  }
+
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  kernel_accum_emission(kg, state, throughput, L, render_buffer);
+}
+#endif /* __EMISSION__ */
+
+#ifdef __EMISSION__
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
+                                                           IntegratorState state,
+                                                           ccl_private ShaderData *sd,
+                                                           ccl_private const RNGState *rng_state)
+{
+  /* Test if there is a light or BSDF that needs direct light. */
+  if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) {
+    return;
+  }
+
+  /* Sample position on a light. */
+  LightSample ls ccl_optional_struct_init;
+  {
+    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
+    float light_u, light_v;
+    path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+    if (!light_distribution_sample_from_position(
+            kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
+      return;
+    }
+  }
+
+  kernel_assert(ls.pdf != 0.0f);
+
+  /* Evaluate light shader.
+   *
+   * TODO: can we reuse sd memory? In theory we can move this after
+   * integrate_surface_bounce, evaluate the BSDF, and only then evaluate
+   * the light shader. This could also move to its own kernel, for
+   * non-constant light sources. */
+  ShaderDataTinyStorage emission_sd_storage;
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
+  if (is_zero(light_eval)) {
+    return;
+  }
+
+  /* Evaluate BSDF. */
+  const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
+
+  BsdfEval bsdf_eval ccl_optional_struct_init;
+  const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
+  bsdf_eval_mul3(&bsdf_eval, light_eval / ls.pdf);
+
+  if (ls.shader & SHADER_USE_MIS) {
+    const float mis_weight = power_heuristic(ls.pdf, bsdf_pdf);
+    bsdf_eval_mul(&bsdf_eval, mis_weight);
+  }
+
+  /* Path termination. */
+  const float terminate = path_state_rng_light_termination(kg, rng_state);
+  if (light_sample_terminate(kg, &ls, &bsdf_eval, terminate)) {
+    return;
+  }
+
+  /* Create shadow ray. */
+  Ray ray ccl_optional_struct_init;
+  light_sample_to_surface_shadow_ray(kg, sd, &ls, &ray);
+  const bool is_light = light_sample_is_light(&ls);
+
+  /* Branch off shadow kernel. */
+  INTEGRATOR_SHADOW_PATH_INIT(
+      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+
+  /* Copy volume stack and enter/exit volume. */
+  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
+
+  if (is_transmission) {
+#  ifdef __VOLUME__
+    shadow_volume_stack_enter_exit(kg, shadow_state, sd);
+#  endif
+  }
+
+  /* Write shadow ray and associated state to global memory. */
+  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
+
+  /* Copy state from main path to shadow path. */
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
+  shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
+  shadow_flag |= (is_transmission) ? PATH_RAY_TRANSMISSION_PASS : PATH_RAY_REFLECT_PASS;
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+    const float3 diffuse_glossy_ratio = (bounce == 0) ?
+                                            bsdf_eval_diffuse_glossy_ratio(&bsdf_eval) :
+                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+  }
+
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
+      state, path, render_pixel_index);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
+      state, path, rng_offset);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
+      state, path, rng_hash);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
+      state, path, sample);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_bounce) = INTEGRATOR_STATE(
+      state, path, diffuse_bounce);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
+      state, path, glossy_bounce);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
+      state, path, transmission_bounce);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput;
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput;
+  }
+}
+#endif
+
+/* Path tracing: bounce off or through surface with new direction. */
+ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state)
+{
+  /* Sample BSDF or BSSRDF. */
+  if (!(sd->flag & (SD_BSDF | SD_BSSRDF))) {
+    return LABEL_NONE;
+  }
+
+  float bsdf_u, bsdf_v;
+  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+  ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+
+#ifdef __SUBSURFACE__
+  /* BSSRDF closure, we schedule subsurface intersection kernel. */
+  if (CLOSURE_IS_BSSRDF(sc->type)) {
+    return subsurface_bounce(kg, state, sd, sc);
+  }
+#endif
+
+  /* BSDF closure, sample direction. */
+  float bsdf_pdf;
+  BsdfEval bsdf_eval ccl_optional_struct_init;
+  float3 bsdf_omega_in ccl_optional_struct_init;
+  differential3 bsdf_domega_in ccl_optional_struct_init;
+  int label;
+
+  label = shader_bsdf_sample_closure(
+      kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+
+  if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
+    return LABEL_NONE;
+  }
+
+  /* Setup ray. Note that clipping works through transparent bounces. */
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P,
+                                                     (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng);
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = (label & LABEL_TRANSPARENT) ?
+                                              INTEGRATOR_STATE(state, ray, t) - sd->ray_length :
+                                              FLT_MAX;
+
+#ifdef __RAY_DIFFERENTIALS__
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
+#endif
+
+  /* Update throughput. */
+  float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
+      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = bsdf_eval_diffuse_glossy_ratio(
+          &bsdf_eval);
+    }
+  }
+
+  /* Update path state */
+  if (label & LABEL_TRANSPARENT) {
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
+  }
+  else {
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
+    INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+    INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
+        bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
+  }
+
+  path_state_next(kg, state, label);
+  return label;
+}
+
+#ifdef __VOLUME__
+ccl_device_forceinline bool integrate_surface_volume_only_bounce(IntegratorState state,
+                                                                 ccl_private ShaderData *sd)
+{
+  if (!path_state_volume_next(state)) {
+    return LABEL_NONE;
+  }
+
+  /* Setup ray position, direction stays unchanged. */
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray_offset(sd->P, -sd->Ng);
+
+  /* Clipping works through transparent. */
+  INTEGRATOR_STATE_WRITE(state, ray, t) -= sd->ray_length;
+
+#  ifdef __RAY_DIFFERENTIALS__
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+#  endif
+
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) += sd->ray_length;
+
+  return LABEL_TRANSMIT | LABEL_TRANSPARENT;
+}
+#endif
+
+#if defined(__AO__)
+ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
+                                                 IntegratorState state,
+                                                 ccl_private const ShaderData *ccl_restrict sd,
+                                                 ccl_private const RNGState *ccl_restrict
+                                                     rng_state,
+                                                 ccl_global float *ccl_restrict render_buffer)
+{
+  if (!(kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) &&
+      !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_CAMERA)) {
+    return;
+  }
+
+  float bsdf_u, bsdf_v;
+  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+  float3 ao_N;
+  const float3 ao_weight = shader_bsdf_ao(
+      kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N);
+
+  float3 ao_D;
+  float ao_pdf;
+  sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+  if (!(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f)) {
+    return;
+  }
+
+  Ray ray ccl_optional_struct_init;
+  ray.P = ray_offset(sd->P, sd->Ng);
+  ray.D = ao_D;
+  ray.t = kernel_data.integrator.ao_bounces_distance;
+  ray.time = sd->time;
+  ray.dP = differential_zero_compact();
+  ray.dD = differential_zero_compact();
+
+  /* Branch off shadow kernel. */
+  INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, ao);
+
+  /* Copy volume stack and enter/exit volume. */
+  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
+
+  /* Write shadow ray and associated state to global memory. */
+  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
+
+  /* Copy state from main path to shadow path. */
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO;
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd);
+
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
+      state, path, render_pixel_index);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
+      state, path, rng_offset);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
+      state, path, rng_hash);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
+      state, path, sample);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput;
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) {
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = ao_weight;
+  }
+}
+#endif /* defined(__AO__) */
+
+template<uint node_feature_mask>
+ccl_device bool integrate_surface(KernelGlobals kg,
+                                  IntegratorState state,
+                                  ccl_global float *ccl_restrict render_buffer)
+
+{
+  PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_SURFACE_SETUP);
+
+  /* Setup shader data. */
+  ShaderData sd;
+  integrate_surface_shader_setup(kg, state, &sd);
+  PROFILING_SHADER(sd.object, sd.shader);
+
+  int continue_path_label = 0;
+
+  /* Skip most work for volume bounding surface. */
+#ifdef __VOLUME__
+  if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#endif
+    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+#ifdef __SUBSURFACE__
+    /* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
+    if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP)))
+#endif
+    {
+      /* Evaluate shader. */
+      PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
+      shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
+
+      /* Initialize additional RNG for BSDFs. */
+      if (sd.flag & SD_BSDF_NEEDS_LCG) {
+        sd.lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
+                                      INTEGRATOR_STATE(state, path, rng_offset),
+                                      INTEGRATOR_STATE(state, path, sample),
+                                      0xb4bc3953);
+      }
+    }
+
+#ifdef __SUBSURFACE__
+    if (path_flag & PATH_RAY_SUBSURFACE) {
+      /* When coming from inside subsurface scattering, setup a diffuse
+       * closure to perform lighting at the exit point. */
+      subsurface_shader_data_setup(kg, state, &sd, path_flag);
+      INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_SUBSURFACE;
+    }
+#endif
+
+    shader_prepare_surface_closures(kg, state, &sd);
+
+#ifdef __HOLDOUT__
+    /* Evaluate holdout. */
+    if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
+      return false;
+    }
+#endif
+
+#ifdef __EMISSION__
+    /* Write emission. */
+    if (sd.flag & SD_EMISSION) {
+      integrate_surface_emission(kg, state, &sd, render_buffer);
+    }
+#endif
+
+#ifdef __PASSES__
+    /* Write render passes. */
+    PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
+    kernel_write_data_passes(kg, state, &sd, render_buffer);
+#endif
+
+    /* Load random number state. */
+    RNGState rng_state;
+    path_state_rng_load(state, &rng_state);
+
+    /* Perform path termination. Most paths have already been terminated in
+     * the intersect_closest kernel, this is just for emission and for dividing
+     * throughput by the probability at the right moment.
+     *
+     * Also ensure we don't do it twice for SSS at both the entry and exit point. */
+    if (!(path_flag & PATH_RAY_SUBSURFACE)) {
+      const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
+                                    0.0f :
+                                    path_state_continuation_probability(kg, state, path_flag);
+      if (probability == 0.0f) {
+        return false;
+      }
+      else if (probability != 1.0f) {
+        INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability;
+      }
+    }
+
+#ifdef __DENOISING_FEATURES__
+    kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
+#endif
+
+#ifdef __SHADOW_CATCHER__
+    kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
+#endif
+
+    /* Direct light. */
+    PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
+    integrate_surface_direct_light(kg, state, &sd, &rng_state);
+
+#if defined(__AO__)
+    /* Ambient occlusion pass. */
+    if (kernel_data.kernel_features & KERNEL_FEATURE_AO) {
+      PROFILING_EVENT(PROFILING_SHADE_SURFACE_AO);
+      integrate_surface_ao(kg, state, &sd, &rng_state, render_buffer);
+    }
+#endif
+
+    PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
+    continue_path_label = integrate_surface_bsdf_bssrdf_bounce(kg, state, &sd, &rng_state);
+#ifdef __VOLUME__
+  }
+  else {
+    PROFILING_EVENT(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT);
+    continue_path_label = integrate_surface_volume_only_bounce(state, &sd);
+  }
+
+  if (continue_path_label & LABEL_TRANSMIT) {
+    /* Enter/Exit volume. */
+    volume_stack_enter_exit(kg, state, &sd);
+  }
+#endif
+
+  return continue_path_label != 0;
+}
+
+template<uint node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE,
+         int current_kernel = DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE>
+ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
+                                                     IntegratorState state,
+                                                     ccl_global float *ccl_restrict render_buffer)
+{
+  if (integrate_surface<node_feature_mask>(kg, state, render_buffer)) {
+    if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE) {
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
+    }
+    else {
+      kernel_assert(INTEGRATOR_STATE(state, ray, t) != 0.0f);
+      INTEGRATOR_PATH_NEXT(current_kernel, DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+    }
+  }
+  else {
+    INTEGRATOR_PATH_TERMINATE(current_kernel);
+  }
+}
+
+ccl_device_forceinline void integrator_shade_surface_raytrace(
+    KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+  integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE,
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE>(
+      kg, state, render_buffer);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
new file mode 100644
index 00000000000..f455152dcf9
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -0,0 +1,1049 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/accumulate.h"
+#include "kernel/film/passes.h"
+
+#include "kernel/integrator/intersect_closest.h"
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/volume_stack.h"
+
+#include "kernel/light/light.h"
+#include "kernel/light/sample.h"
+
+#include "kernel/sample/mis.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Events for probabilistic scattering. */
+
+typedef enum VolumeIntegrateEvent {
+  VOLUME_PATH_SCATTERED = 0,
+  VOLUME_PATH_ATTENUATED = 1,
+  VOLUME_PATH_MISSED = 2
+} VolumeIntegrateEvent;
+
+typedef struct VolumeIntegrateResult {
+  /* Throughput and offset for direct light scattering. */
+  bool direct_scatter;
+  float3 direct_throughput;
+  float direct_t;
+  ShaderVolumePhases direct_phases;
+
+  /* Throughput and offset for indirect light scattering. */
+  bool indirect_scatter;
+  float3 indirect_throughput;
+  float indirect_t;
+  ShaderVolumePhases indirect_phases;
+} VolumeIntegrateResult;
+
+/* Ignore paths that have volume throughput below this value, to avoid unnecessary work
+ * and precision issues.
+ * todo: this value could be tweaked or turned into a probability to avoid unnecessary
+ * work in volumes and subsurface scattering. */
+#  define VOLUME_THROUGHPUT_EPSILON 1e-6f
+
+/* Volume shader properties
+ *
+ * extinction coefficient = absorption coefficient + scattering coefficient
+ * sigma_t = sigma_a + sigma_s */
+
+typedef struct VolumeShaderCoefficients {
+  float3 sigma_t;
+  float3 sigma_s;
+  float3 emission;
+} VolumeShaderCoefficients;
+
+/* Evaluate shader to get extinction coefficient at P. */
+ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
+                                                   IntegratorShadowState state,
+                                                   ccl_private ShaderData *ccl_restrict sd,
+                                                   ccl_private float3 *ccl_restrict extinction)
+{
+  shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, [=](const int i) {
+    return integrator_state_read_shadow_volume_stack(state, i);
+  });
+
+  if (!(sd->flag & SD_EXTINCTION)) {
+    return false;
+  }
+
+  const float density = object_volume_density(kg, sd->object);
+  *extinction = sd->closure_transparent_extinction * density;
+  return true;
+}
+
+/* Evaluate shader to get absorption, scattering and emission at P. */
+ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
+                                            IntegratorState state,
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private VolumeShaderCoefficients *coeff)
+{
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+  shader_eval_volume<false>(kg, state, sd, path_flag, [=](const int i) {
+    return integrator_state_read_volume_stack(state, i);
+  });
+
+  if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
+    return false;
+  }
+
+  coeff->sigma_s = zero_float3();
+  coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3();
+  coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3();
+
+  if (sd->flag & SD_SCATTER) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+      if (CLOSURE_IS_VOLUME(sc->type)) {
+        coeff->sigma_s += sc->weight;
+      }
+    }
+  }
+
+  const float density = object_volume_density(kg, sd->object);
+  coeff->sigma_s *= density;
+  coeff->sigma_t *= density;
+  coeff->emission *= density;
+
+  return true;
+}
+
+ccl_device_forceinline void volume_step_init(KernelGlobals kg,
+                                             ccl_private const RNGState *rng_state,
+                                             const float object_step_size,
+                                             float t,
+                                             ccl_private float *step_size,
+                                             ccl_private float *step_shade_offset,
+                                             ccl_private float *steps_offset,
+                                             ccl_private int *max_steps)
+{
+  if (object_step_size == FLT_MAX) {
+    /* Homogeneous volume. */
+    *step_size = t;
+    *step_shade_offset = 0.0f;
+    *steps_offset = 1.0f;
+    *max_steps = 1;
+  }
+  else {
+    /* Heterogeneous volume. */
+    *max_steps = kernel_data.integrator.volume_max_steps;
+    float step = min(object_step_size, t);
+
+    /* compute exact steps in advance for malloc */
+    if (t > *max_steps * step) {
+      step = t / (float)*max_steps;
+    }
+
+    *step_size = step;
+
+    /* Perform shading at this offset within a step, to integrate over
+     * over the entire step segment. */
+    *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4);
+
+    /* Shift starting point of all segment by this random amount to avoid
+     * banding artifacts from the volume bounding shape. */
+    *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3);
+  }
+}
+
+/* Volume Shadows
+ *
+ * These functions are used to attenuate shadow rays to lights. Both absorption
+ * and scattering will block light, represented by the extinction coefficient. */
+
+#  if 0
+/* homogeneous volume: assume shader evaluation at the starts gives
+ * the extinction coefficient for the entire line segment */
+ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state,
+                                          ccl_private Ray *ccl_restrict ray,
+                                          ccl_private ShaderData *ccl_restrict sd,
+                                          ccl_global float3 *ccl_restrict throughput)
+{
+  float3 sigma_t = zero_float3();
+
+  if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
+    *throughput *= volume_color_transmittance(sigma_t, ray->t);
+  }
+}
+#  endif
+
+/* heterogeneous volume: integrate stepping through the volume until we
+ * reach the end, get absorbed entirely, or run out of iterations */
+ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
+                                            IntegratorShadowState state,
+                                            ccl_private Ray *ccl_restrict ray,
+                                            ccl_private ShaderData *ccl_restrict sd,
+                                            ccl_private float3 *ccl_restrict throughput,
+                                            const float object_step_size)
+{
+  /* Load random number state. */
+  RNGState rng_state;
+  shadow_path_state_rng_load(state, &rng_state);
+
+  float3 tp = *throughput;
+
+  /* Prepare for stepping.
+   * For shadows we do not offset all segments, since the starting point is
+   * already a random distance inside the volume. It also appears to create
+   * banding artifacts for unknown reasons. */
+  int max_steps;
+  float step_size, step_shade_offset, unused;
+  volume_step_init(kg,
+                   &rng_state,
+                   object_step_size,
+                   ray->t,
+                   &step_size,
+                   &step_shade_offset,
+                   &unused,
+                   &max_steps);
+  const float steps_offset = 1.0f;
+
+  /* compute extinction at the start */
+  float t = 0.0f;
+
+  float3 sum = zero_float3();
+
+  for (int i = 0; i < max_steps; i++) {
+    /* advance to new position */
+    float new_t = min(ray->t, (i + steps_offset) * step_size);
+    float dt = new_t - t;
+
+    float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
+    float3 sigma_t = zero_float3();
+
+    /* compute attenuation over segment */
+    sd->P = new_P;
+    if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
+      /* Compute `expf()` only for every Nth step, to save some calculations
+       * because `exp(a)*exp(b) = exp(a+b)`, also do a quick #VOLUME_THROUGHPUT_EPSILON
+       * check then. */
+      sum += (-sigma_t * dt);
+      if ((i & 0x07) == 0) { /* TODO: Other interval? */
+        tp = *throughput * exp3(sum);
+
+        /* stop if nearly all light is blocked */
+        if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON &&
+            tp.z < VOLUME_THROUGHPUT_EPSILON)
+          break;
+      }
+    }
+
+    /* stop if at the end of the volume */
+    t = new_t;
+    if (t == ray->t) {
+      /* Update throughput in case we haven't done it above */
+      tp = *throughput * exp3(sum);
+      break;
+    }
+  }
+
+  *throughput = tp;
+}
+
+/* Equi-angular sampling as in:
+ * "Importance Sampling Techniques for Path Tracing in Participating Media" */
+
+ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray,
+                                           const float3 light_P,
+                                           const float xi,
+                                           ccl_private float *pdf)
+{
+  const float t = ray->t;
+  const float delta = dot((light_P - ray->P), ray->D);
+  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    *pdf = 0.0f;
+    return 0.0f;
+  }
+  const float theta_a = -atan2f(delta, D);
+  const float theta_b = atan2f(t - delta, D);
+  const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
+  if (UNLIKELY(theta_b == theta_a)) {
+    *pdf = 0.0f;
+    return 0.0f;
+  }
+  *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+  return min(t, delta + t_); /* min is only for float precision errors */
+}
+
+ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray,
+                                        const float3 light_P,
+                                        const float sample_t)
+{
+  const float delta = dot((light_P - ray->P), ray->D);
+  const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    return 0.0f;
+  }
+
+  const float t = ray->t;
+  const float t_ = sample_t - delta;
+
+  const float theta_a = -atan2f(delta, D);
+  const float theta_b = atan2f(t - delta, D);
+  if (UNLIKELY(theta_b == theta_a)) {
+    return 0.0f;
+  }
+
+  const float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+  return pdf;
+}
+
+ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
+                                        const float3 light_P,
+                                        const float sample_t)
+{
+  float delta = dot((light_P - ray->P), ray->D);
+  float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta);
+  if (UNLIKELY(D == 0.0f)) {
+    return 0.0f;
+  }
+
+  const float t = ray->t;
+  const float t_ = sample_t - delta;
+
+  const float theta_a = -atan2f(delta, D);
+  const float theta_b = atan2f(t - delta, D);
+  if (UNLIKELY(theta_b == theta_a)) {
+    return 0.0f;
+  }
+
+  const float theta_sample = atan2f(t_, D);
+  const float cdf = (theta_sample - theta_a) / (theta_b - theta_a);
+
+  return cdf;
+}
+
+/* Distance sampling */
+
+ccl_device float volume_distance_sample(float max_t,
+                                        float3 sigma_t,
+                                        int channel,
+                                        float xi,
+                                        ccl_private float3 *transmittance,
+                                        ccl_private float3 *pdf)
+{
+  /* xi is [0, 1[ so log(0) should never happen, division by zero is
+   * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
+  float sample_sigma_t = volume_channel_get(sigma_t, channel);
+  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+  float sample_transmittance = volume_channel_get(full_transmittance, channel);
+
+  float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
+
+  *transmittance = volume_color_transmittance(sigma_t, sample_t);
+  *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance);
+
+  /* todo: optimization: when taken together with hit/miss decision,
+   * the full_transmittance cancels out drops out and xi does not
+   * need to be remapped */
+
+  return sample_t;
+}
+
+ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
+{
+  float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+  float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+
+  return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance);
+}
+
+/* Emission */
+
+ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
+                                            int closure_flag,
+                                            float3 transmittance,
+                                            float t)
+{
+  /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
+   * this goes to E * t as sigma_t goes to zero
+   *
+   * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+  float3 emission = coeff->emission;
+
+  if (closure_flag & SD_EXTINCTION) {
+    float3 sigma_t = coeff->sigma_t;
+
+    emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
+    emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
+    emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+  }
+  else
+    emission *= t;
+
+  return emission;
+}
+
+/* Volume Integration */
+
+typedef struct VolumeIntegrateState {
+  /* Volume segment extents. */
+  float start_t;
+  float end_t;
+
+  /* If volume is absorption-only up to this point, and no probabilistic
+   * scattering or termination has been used yet. */
+  bool absorption_only;
+
+  /* Random numbers for scattering. */
+  float rscatter;
+  float rphase;
+
+  /* Multiple importance sampling. */
+  VolumeSampleMethod direct_sample_method;
+  bool use_mis;
+  float distance_pdf;
+  float equiangular_pdf;
+} VolumeIntegrateState;
+
+ccl_device_forceinline void volume_integrate_step_scattering(
+    ccl_private const ShaderData *sd,
+    ccl_private const Ray *ray,
+    const float3 equiangular_light_P,
+    ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
+    const float3 transmittance,
+    ccl_private VolumeIntegrateState &ccl_restrict vstate,
+    ccl_private VolumeIntegrateResult &ccl_restrict result)
+{
+  /* Pick random color channel, we use the Veach one-sample
+   * model with balance heuristic for the channels. */
+  const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+  float3 channel_pdf;
+  const int channel = volume_sample_channel(
+      albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
+
+  /* Equiangular sampling for direct lighting. */
+  if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR && !result.direct_scatter) {
+    if (result.direct_t >= vstate.start_t && result.direct_t <= vstate.end_t) {
+      const float new_dt = result.direct_t - vstate.start_t;
+      const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+
+      result.direct_scatter = true;
+      result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf;
+      shader_copy_volume_phases(&result.direct_phases, sd);
+
+      /* Multiple importance sampling. */
+      if (vstate.use_mis) {
+        const float distance_pdf = vstate.distance_pdf *
+                                   dot(channel_pdf, coeff.sigma_t * new_transmittance);
+        const float mis_weight = 2.0f * power_heuristic(vstate.equiangular_pdf, distance_pdf);
+        result.direct_throughput *= mis_weight;
+      }
+    }
+    else {
+      result.direct_throughput *= transmittance;
+      vstate.distance_pdf *= dot(channel_pdf, transmittance);
+    }
+  }
+
+  /* Distance sampling for indirect and optional direct lighting. */
+  if (!result.indirect_scatter) {
+    /* decide if we will scatter or continue */
+    const float sample_transmittance = volume_channel_get(transmittance, channel);
+
+    if (1.0f - vstate.rscatter >= sample_transmittance) {
+      /* compute sampling distance */
+      const float sample_sigma_t = volume_channel_get(coeff.sigma_t, channel);
+      const float new_dt = -logf(1.0f - vstate.rscatter) / sample_sigma_t;
+      const float new_t = vstate.start_t + new_dt;
+
+      /* transmittance and pdf */
+      const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+      const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
+
+      /* throughput */
+      result.indirect_scatter = true;
+      result.indirect_t = new_t;
+      result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
+      shader_copy_volume_phases(&result.indirect_phases, sd);
+
+      if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
+        /* If using distance sampling for direct light, just copy parameters
+         * of indirect light since we scatter at the same point then. */
+        result.direct_scatter = true;
+        result.direct_t = result.indirect_t;
+        result.direct_throughput = result.indirect_throughput;
+        shader_copy_volume_phases(&result.direct_phases, sd);
+
+        /* Multiple importance sampling. */
+        if (vstate.use_mis) {
+          const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t);
+          const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf,
+                                                   equiangular_pdf);
+          result.direct_throughput *= 2.0f * mis_weight;
+        }
+      }
+    }
+    else {
+      /* throughput */
+      const float pdf = dot(channel_pdf, transmittance);
+      result.indirect_throughput *= transmittance / pdf;
+      if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
+        vstate.distance_pdf *= pdf;
+      }
+
+      /* remap rscatter so we can reuse it and keep thing stratified */
+      vstate.rscatter = 1.0f - (1.0f - vstate.rscatter) / sample_transmittance;
+    }
+  }
+}
+
+/* heterogeneous volume distance sampling: integrate stepping through the
+ * volume until we reach the end, get absorbed entirely, or run out of
+ * iterations. this does probabilistically scatter or get transmitted through
+ * for path tracing where we don't want to branch. */
+ccl_device_forceinline void volume_integrate_heterogeneous(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private Ray *ccl_restrict ray,
+    ccl_private ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *rng_state,
+    ccl_global float *ccl_restrict render_buffer,
+    const float object_step_size,
+    const VolumeSampleMethod direct_sample_method,
+    const float3 equiangular_light_P,
+    ccl_private VolumeIntegrateResult &result)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE);
+
+  /* Prepare for stepping.
+   * Using a different step offset for the first step avoids banding artifacts. */
+  int max_steps;
+  float step_size, step_shade_offset, steps_offset;
+  volume_step_init(kg,
+                   rng_state,
+                   object_step_size,
+                   ray->t,
+                   &step_size,
+                   &step_shade_offset,
+                   &steps_offset,
+                   &max_steps);
+
+  /* Initialize volume integration state. */
+  VolumeIntegrateState vstate ccl_optional_struct_init;
+  vstate.start_t = 0.0f;
+  vstate.end_t = 0.0f;
+  vstate.absorption_only = true;
+  vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
+  vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
+
+  /* Multiple importance sampling: pick between equiangular and distance sampling strategy. */
+  vstate.direct_sample_method = direct_sample_method;
+  vstate.use_mis = (direct_sample_method == VOLUME_SAMPLE_MIS);
+  if (vstate.use_mis) {
+    if (vstate.rscatter < 0.5f) {
+      vstate.rscatter *= 2.0f;
+      vstate.direct_sample_method = VOLUME_SAMPLE_DISTANCE;
+    }
+    else {
+      vstate.rscatter = (vstate.rscatter - 0.5f) * 2.0f;
+      vstate.direct_sample_method = VOLUME_SAMPLE_EQUIANGULAR;
+    }
+  }
+  vstate.equiangular_pdf = 0.0f;
+  vstate.distance_pdf = 1.0f;
+
+  /* Initialize volume integration result. */
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  result.direct_throughput = throughput;
+  result.indirect_throughput = throughput;
+
+  /* Equiangular sampling: compute distance and PDF in advance. */
+  if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) {
+    result.direct_t = volume_equiangular_sample(
+        ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf);
+  }
+
+#  ifdef __DENOISING_FEATURES__
+  const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
+                                         PATH_RAY_DENOISING_FEATURES);
+  float3 accum_albedo = zero_float3();
+#  endif
+  float3 accum_emission = zero_float3();
+
+  for (int i = 0; i < max_steps; i++) {
+    /* Advance to new position */
+    vstate.end_t = min(ray->t, (i + steps_offset) * step_size);
+    const float shade_t = vstate.start_t + (vstate.end_t - vstate.start_t) * step_shade_offset;
+    sd->P = ray->P + ray->D * shade_t;
+
+    /* compute segment */
+    VolumeShaderCoefficients coeff ccl_optional_struct_init;
+    if (volume_shader_sample(kg, state, sd, &coeff)) {
+      const int closure_flag = sd->flag;
+
+      /* Evaluate transmittance over segment. */
+      const float dt = (vstate.end_t - vstate.start_t);
+      const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
+                                       volume_color_transmittance(coeff.sigma_t, dt) :
+                                       one_float3();
+
+      /* Emission. */
+      if (closure_flag & SD_EMISSION) {
+        /* Only write emission before indirect light scatter position, since we terminate
+         * stepping at that point if we have already found a direct light scatter position. */
+        if (!result.indirect_scatter) {
+          const float3 emission = volume_emission_integrate(
+              &coeff, closure_flag, transmittance, dt);
+          accum_emission += emission;
+        }
+      }
+
+      if (closure_flag & SD_EXTINCTION) {
+        if ((closure_flag & SD_SCATTER) || !vstate.absorption_only) {
+#  ifdef __DENOISING_FEATURES__
+          /* Accumulate albedo for denoising features. */
+          if (write_denoising_features && (closure_flag & SD_SCATTER)) {
+            const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+            accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance);
+          }
+#  endif
+
+          /* Scattering and absorption. */
+          volume_integrate_step_scattering(
+              sd, ray, equiangular_light_P, coeff, transmittance, vstate, result);
+        }
+        else {
+          /* Absorption only. */
+          result.indirect_throughput *= transmittance;
+          result.direct_throughput *= transmittance;
+        }
+
+        /* Stop if nearly all light blocked. */
+        if (!result.indirect_scatter) {
+          if (max3(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+            result.indirect_throughput = zero_float3();
+            break;
+          }
+        }
+        else if (!result.direct_scatter) {
+          if (max3(result.direct_throughput) < VOLUME_THROUGHPUT_EPSILON) {
+            break;
+          }
+        }
+      }
+
+      /* If we have scattering data for both direct and indirect, we're done. */
+      if (result.direct_scatter && result.indirect_scatter) {
+        break;
+      }
+    }
+
+    /* Stop if at the end of the volume. */
+    vstate.start_t = vstate.end_t;
+    if (vstate.start_t == ray->t) {
+      break;
+    }
+  }
+
+  /* Write accumulated emission. */
+  if (!is_zero(accum_emission)) {
+    kernel_accum_emission(kg, state, result.indirect_throughput, accum_emission, render_buffer);
+  }
+
+#  ifdef __DENOISING_FEATURES__
+  /* Write denoising features. */
+  if (write_denoising_features) {
+    kernel_write_denoising_features_volume(
+        kg, state, accum_albedo, result.indirect_scatter, render_buffer);
+  }
+#  endif /* __DENOISING_FEATURES__ */
+}
+
+#  ifdef __EMISSION__
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline bool integrate_volume_sample_light(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    ccl_private LightSample *ccl_restrict ls)
+{
+  /* Test if there is a light or BSDF that needs direct light. */
+  if (!kernel_data.integrator.use_direct_light) {
+    return false;
+  }
+
+  /* Sample position on a light. */
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+  const uint bounce = INTEGRATOR_STATE(state, path, bounce);
+  float light_u, light_v;
+  path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+  light_distribution_sample_from_volume_segment(
+      kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls);
+
+  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+    return false;
+  }
+
+  return true;
+}
+
+/* Path tracing: sample point on light and evaluate light shader, then
+ * queue shadow ray to be traced. */
+ccl_device_forceinline void integrate_volume_direct_light(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const RNGState *ccl_restrict rng_state,
+    const float3 P,
+    ccl_private const ShaderVolumePhases *ccl_restrict phases,
+    ccl_private const float3 throughput,
+    ccl_private LightSample *ccl_restrict ls)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
+
+  if (!kernel_data.integrator.use_direct_light) {
+    return;
+  }
+
+  /* Sample position on the same light again, now from the shading
+   * point where we scattered.
+   *
+   * TODO: decorrelate random numbers and use light_sample_new_position to
+   * avoid resampling the CDF. */
+  {
+    const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+    const uint bounce = INTEGRATOR_STATE(state, path, bounce);
+    float light_u, light_v;
+    path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+
+    if (!light_distribution_sample_from_position(
+            kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) {
+      return;
+    }
+  }
+
+  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+    return;
+  }
+
+  /* Evaluate light shader.
+   *
+   * TODO: can we reuse sd memory? In theory we can move this after
+   * integrate_surface_bounce, evaluate the BSDF, and only then evaluate
+   * the light shader. This could also move to its own kernel, for
+   * non-constant light sources. */
+  ShaderDataTinyStorage emission_sd_storage;
+  ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
+  if (is_zero(light_eval)) {
+    return;
+  }
+
+  /* Evaluate BSDF. */
+  BsdfEval phase_eval ccl_optional_struct_init;
+  const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval);
+
+  if (ls->shader & SHADER_USE_MIS) {
+    float mis_weight = power_heuristic(ls->pdf, phase_pdf);
+    bsdf_eval_mul(&phase_eval, mis_weight);
+  }
+
+  bsdf_eval_mul3(&phase_eval, light_eval / ls->pdf);
+
+  /* Path termination. */
+  const float terminate = path_state_rng_light_termination(kg, rng_state);
+  if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
+    return;
+  }
+
+  /* Create shadow ray. */
+  Ray ray ccl_optional_struct_init;
+  light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
+  const bool is_light = light_sample_is_light(ls);
+
+  /* Branch off shadow kernel. */
+  INTEGRATOR_SHADOW_PATH_INIT(
+      shadow_state, state, DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW, shadow);
+
+  /* Write shadow ray and associated state to global memory. */
+  integrator_state_write_shadow_ray(kg, shadow_state, &ray);
+
+  /* Copy state from main path to shadow path. */
+  const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
+  const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
+  uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
+  shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
+  shadow_flag |= PATH_RAY_VOLUME_PASS;
+  const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+    const float3 diffuse_glossy_ratio = (bounce == 0) ?
+                                            one_float3() :
+                                            INTEGRATOR_STATE(state, path, diffuse_glossy_ratio);
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_glossy_ratio) = diffuse_glossy_ratio;
+  }
+
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
+      state, path, render_pixel_index);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_offset) = INTEGRATOR_STATE(
+      state, path, rng_offset);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, rng_hash) = INTEGRATOR_STATE(
+      state, path, rng_hash);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, sample) = INTEGRATOR_STATE(
+      state, path, sample);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, flag) = shadow_flag;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, bounce) = bounce;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transparent_bounce) = transparent_bounce;
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, diffuse_bounce) = INTEGRATOR_STATE(
+      state, path, diffuse_bounce);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
+      state, path, glossy_bounce);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
+      state, path, transmission_bounce);
+  INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, throughput) = throughput_phase;
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_SHADOW_PASS) {
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unshadowed_throughput) = throughput;
+  }
+
+  integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
+}
+#  endif
+
+/* Path tracing: scatter in new direction using phase function */
+ccl_device_forceinline bool integrate_volume_phase_scatter(
+    KernelGlobals kg,
+    IntegratorState state,
+    ccl_private ShaderData *sd,
+    ccl_private const RNGState *rng_state,
+    ccl_private const ShaderVolumePhases *phases)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
+
+  float phase_u, phase_v;
+  path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v);
+
+  /* Phase closure, sample direction. */
+  float phase_pdf;
+  BsdfEval phase_eval ccl_optional_struct_init;
+  float3 phase_omega_in ccl_optional_struct_init;
+  differential3 phase_domega_in ccl_optional_struct_init;
+
+  const int label = shader_volume_phase_sample(kg,
+                                               sd,
+                                               phases,
+                                               phase_u,
+                                               phase_v,
+                                               &phase_eval,
+                                               &phase_omega_in,
+                                               &phase_domega_in,
+                                               &phase_pdf);
+
+  if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
+    return false;
+  }
+
+  /* Setup ray. */
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(phase_omega_in);
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+
+#  ifdef __RAY_DIFFERENTIALS__
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
+#  endif
+
+  /* Update throughput. */
+  const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+  const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+    INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
+  }
+
+  /* Update path state */
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
+  INTEGRATOR_STATE_WRITE(state, path, mis_ray_t) = 0.0f;
+  INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
+      phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
+
+  path_state_next(kg, state, label);
+  return true;
+}
+
+/* get the volume attenuation and emission over line segment defined by
+ * ray, with the assumption that there are no surfaces blocking light
+ * between the endpoints. distance sampling is used to decide if we will
+ * scatter or not. */
+ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
+                                                 IntegratorState state,
+                                                 ccl_private Ray *ccl_restrict ray,
+                                                 ccl_global float *ccl_restrict render_buffer)
+{
+  ShaderData sd;
+  shader_setup_from_volume(kg, &sd, ray);
+
+  /* Load random number state. */
+  RNGState rng_state;
+  path_state_rng_load(state, &rng_state);
+
+  /* Sample light ahead of volume stepping, for equiangular sampling. */
+  /* TODO: distant lights are ignored now, but could instead use even distribution. */
+  LightSample ls ccl_optional_struct_init;
+  const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
+  const bool have_equiangular_sample = need_light_sample &&
+                                       integrate_volume_sample_light(
+                                           kg, state, &sd, &rng_state, &ls) &&
+                                       (ls.t != FLT_MAX);
+
+  VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
+                                                volume_stack_sample_method(kg, state) :
+                                                VOLUME_SAMPLE_DISTANCE;
+
+  /* Step through volume. */
+  const float step_size = volume_stack_step_size(
+      kg, [=](const int i) { return integrator_state_read_volume_stack(state, i); });
+
+  /* TODO: expensive to zero closures? */
+  VolumeIntegrateResult result = {};
+  volume_integrate_heterogeneous(kg,
+                                 state,
+                                 ray,
+                                 &sd,
+                                 &rng_state,
+                                 render_buffer,
+                                 step_size,
+                                 direct_sample_method,
+                                 ls.P,
+                                 result);
+
+  /* Perform path termination. The intersect_closest will have already marked this path
+   * to be terminated. That will shading evaluating to leave out any scattering closures,
+   * but emission and absorption are still handled for multiple importance sampling. */
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+  const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
+                                0.0f :
+                                path_state_continuation_probability(kg, state, path_flag);
+  if (probability == 0.0f) {
+    return VOLUME_PATH_MISSED;
+  }
+
+  /* Direct light. */
+  if (result.direct_scatter) {
+    const float3 direct_P = ray->P + result.direct_t * ray->D;
+    result.direct_throughput /= probability;
+    integrate_volume_direct_light(kg,
+                                  state,
+                                  &sd,
+                                  &rng_state,
+                                  direct_P,
+                                  &result.direct_phases,
+                                  result.direct_throughput,
+                                  &ls);
+  }
+
+  /* Indirect light.
+   *
+   * Only divide throughput by probability if we scatter. For the attenuation
+   * case the next surface will already do this division. */
+  if (result.indirect_scatter) {
+    result.indirect_throughput /= probability;
+  }
+  INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
+
+  if (result.indirect_scatter) {
+    sd.P = ray->P + result.indirect_t * ray->D;
+
+    if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
+      return VOLUME_PATH_SCATTERED;
+    }
+    else {
+      return VOLUME_PATH_MISSED;
+    }
+  }
+  else {
+    return VOLUME_PATH_ATTENUATED;
+  }
+}
+
+#endif
+
+ccl_device void integrator_shade_volume(KernelGlobals kg,
+                                        IntegratorState state,
+                                        ccl_global float *ccl_restrict render_buffer)
+{
+  PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_SETUP);
+
+#ifdef __VOLUME__
+  /* Setup shader data. */
+  Ray ray ccl_optional_struct_init;
+  integrator_state_read_ray(kg, state, &ray);
+
+  Intersection isect ccl_optional_struct_init;
+  integrator_state_read_isect(kg, state, &isect);
+
+  /* Set ray length to current segment. */
+  ray.t = (isect.prim != PRIM_NONE) ? isect.t : FLT_MAX;
+
+  /* Clean volume stack for background rays. */
+  if (isect.prim == PRIM_NONE) {
+    volume_stack_clean(kg, state);
+  }
+
+  VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer);
+
+  if (event == VOLUME_PATH_SCATTERED) {
+    /* Queue intersect_closest kernel. */
+    INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+                         DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST);
+    return;
+  }
+  else if (event == VOLUME_PATH_MISSED) {
+    /* End path. */
+    INTEGRATOR_PATH_TERMINATE(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);
+    return;
+  }
+  else {
+    /* Continue to background, light or surface. */
+    if (isect.prim == PRIM_NONE) {
+      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND);
+      return;
+    }
+    else if (isect.type & PRIMITIVE_LAMP) {
+      INTEGRATOR_PATH_NEXT(DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+                           DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT);
+      return;
+    }
+    else {
+      /* Hit a surface, continue with surface kernel unless terminated. */
+      const int shader = intersection_get_shader(kg, &isect);
+      const int flags = kernel_tex_fetch(__shaders, shader).flags;
+
+      integrator_intersect_shader_next_kernel<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
+          kg, state, &isect, shader, flags);
+      return;
+    }
+  }
+#endif /* __VOLUME__ */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h
new file mode 100644
index 00000000000..68f1ef8c118
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shader_eval.h
@@ -0,0 +1,869 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Functions to evaluate shaders and use the resulting shader closures. */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#include "kernel/film/accumulate.h"
+
+#include "kernel/svm/svm.h"
+
+#ifdef __OSL__
+#  include "kernel/osl/shader.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Merging */
+
+#if defined(__VOLUME__)
+ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd)
+{
+  /* Merge identical closures to save closure space with stacked volumes. */
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private ShaderClosure *sci = &sd->closure[i];
+
+    if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+      continue;
+    }
+
+    for (int j = i + 1; j < sd->num_closure; j++) {
+      ccl_private ShaderClosure *scj = &sd->closure[j];
+      if (sci->type != scj->type) {
+        continue;
+      }
+
+      ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
+          sci;
+      ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
+          scj;
+      if (!(hgi->g == hgj->g)) {
+        continue;
+      }
+
+      sci->weight += scj->weight;
+      sci->sample_weight += scj->sample_weight;
+
+      int size = sd->num_closure - (j + 1);
+      if (size > 0) {
+        for (int k = 0; k < size; k++) {
+          scj[k] = scj[k + 1];
+        }
+      }
+
+      sd->num_closure--;
+      kernel_assert(sd->num_closure >= 0);
+      j--;
+    }
+  }
+}
+
+ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict
+                                                     phases,
+                                                 ccl_private const ShaderData *ccl_restrict sd)
+{
+  phases->num_closure = 0;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *from_sc = &sd->closure[i];
+    ccl_private const HenyeyGreensteinVolume *from_hg =
+        (ccl_private const HenyeyGreensteinVolume *)from_sc;
+
+    if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+      ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
+
+      to_sc->weight = from_sc->weight;
+      to_sc->sample_weight = from_sc->sample_weight;
+      to_sc->g = from_hg->g;
+      phases->num_closure++;
+      if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
+        break;
+      }
+    }
+  }
+}
+#endif /* __VOLUME__ */
+
+ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
+                                                       ConstIntegratorState state,
+                                                       ccl_private ShaderData *sd)
+{
+  /* Defensive sampling.
+   *
+   * We can likely also do defensive sampling at deeper bounces, particularly
+   * for cases like a perfect mirror but possibly also others. This will need
+   * a good heuristic. */
+  if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
+          0 &&
+      sd->num_closure > 1) {
+    float sum = 0.0f;
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private ShaderClosure *sc = &sd->closure[i];
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sum += sc->sample_weight;
+      }
+    }
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private ShaderClosure *sc = &sd->closure[i];
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
+      }
+    }
+  }
+
+  /* Filter glossy.
+   *
+   * Blurring of bsdf after bounces, for rays that have a small likelihood
+   * of following this particular path (diffuse, rough glossy) */
+  if (kernel_data.integrator.filter_glossy != FLT_MAX) {
+    float blur_pdf = kernel_data.integrator.filter_glossy *
+                     INTEGRATOR_STATE(state, path, min_ray_pdf);
+
+    if (blur_pdf < 1.0f) {
+      float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+
+      for (int i = 0; i < sd->num_closure; i++) {
+        ccl_private ShaderClosure *sc = &sd->closure[i];
+        if (CLOSURE_IS_BSDF(sc->type)) {
+          bsdf_blur(kg, sc, blur_roughness);
+        }
+      }
+    }
+  }
+}
+
+/* BSDF */
+
+ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd,
+                                                   const float3 omega_in)
+{
+  return dot(sd->N, omega_in) < 0.0f;
+}
+
+ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags)
+{
+  if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) {
+    return false;
+  }
+  if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) {
+    if (CLOSURE_IS_BSDF_DIFFUSE(type)) {
+      return true;
+    }
+  }
+  if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) {
+    if (CLOSURE_IS_BSDF_GLOSSY(type)) {
+      return true;
+    }
+  }
+  if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) {
+    if (CLOSURE_IS_BSDF_TRANSMISSION(type)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
+                                                ccl_private ShaderData *sd,
+                                                const float3 omega_in,
+                                                const bool is_transmission,
+                                                ccl_private const ShaderClosure *skip_sc,
+                                                ccl_private BsdfEval *result_eval,
+                                                float sum_pdf,
+                                                float sum_sample_weight,
+                                                const uint light_shader_flags)
+{
+  /* This is the veach one-sample model with balance heuristic,
+   * some PDF factors drop out when using balance heuristic weighting. */
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (sc == skip_sc) {
+      continue;
+    }
+
+    if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+      if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) {
+        float bsdf_pdf = 0.0f;
+        float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
+
+        if (bsdf_pdf != 0.0f) {
+          const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
+          bsdf_eval_accum(result_eval, is_diffuse, eval * sc->weight, 1.0f);
+          sum_pdf += bsdf_pdf * sc->sample_weight;
+        }
+      }
+
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
+
+  return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+#ifndef __KERNEL_CUDA__
+ccl_device
+#else
+ccl_device_inline
+#endif
+    float
+    shader_bsdf_eval(KernelGlobals kg,
+                     ccl_private ShaderData *sd,
+                     const float3 omega_in,
+                     const bool is_transmission,
+                     ccl_private BsdfEval *bsdf_eval,
+                     const uint light_shader_flags)
+{
+  bsdf_eval_init(bsdf_eval, false, zero_float3());
+
+  return _shader_bsdf_multi_eval(
+      kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
+}
+
+/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
+ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
+    ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
+{
+  int sampled = 0;
+
+  if (sd->num_closure > 1) {
+    /* Pick a BSDF or based on sample weights. */
+    float sum = 0.0f;
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sum += sc->sample_weight;
+      }
+    }
+
+    float r = (*randu) * sum;
+    float partial_sum = 0.0f;
+
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        float next_sum = partial_sum + sc->sample_weight;
+
+        if (r < next_sum) {
+          sampled = i;
+
+          /* Rescale to reuse for direction sample, to better preserve stratification. */
+          *randu = (r - partial_sum) / sc->sample_weight;
+          break;
+        }
+
+        partial_sum = next_sum;
+      }
+    }
+  }
+
+  return &sd->closure[sampled];
+}
+
+/* Return weight for picked BSSRDF. */
+ccl_device_inline float3
+shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
+                            ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
+{
+  float3 weight = bssrdf_sc->weight;
+
+  if (sd->num_closure > 1) {
+    float sum = 0.0f;
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+      if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+        sum += sc->sample_weight;
+      }
+    }
+    weight *= sum / bssrdf_sc->sample_weight;
+  }
+
+  return weight;
+}
+
+/* Sample direction for picked BSDF, and return evaluation and pdf for all
+ * BSDFs combined using MIS. */
+ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private const ShaderClosure *sc,
+                                          float randu,
+                                          float randv,
+                                          ccl_private BsdfEval *bsdf_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private differential3 *domega_in,
+                                          ccl_private float *pdf)
+{
+  /* BSSRDF should already have been handled elsewhere. */
+  kernel_assert(CLOSURE_IS_BSDF(sc->type));
+
+  int label;
+  float3 eval = zero_float3();
+
+  *pdf = 0.0f;
+  label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+
+  if (*pdf != 0.0f) {
+    const bool is_diffuse = CLOSURE_IS_BSDF_DIFFUSE(sc->type);
+    bsdf_eval_init(bsdf_eval, is_diffuse, eval * sc->weight);
+
+    if (sd->num_closure > 1) {
+      const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
+      float sweight = sc->sample_weight;
+      *pdf = _shader_bsdf_multi_eval(
+          kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0);
+    }
+  }
+
+  return label;
+}
+
+ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
+{
+  float roughness = 0.0f;
+  float sum_weight = 0.0f;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSDF(sc->type)) {
+      /* sqrt once to undo the squaring from multiplying roughness on the
+       * two axes, and once for the squared roughness convention. */
+      float weight = fabsf(average(sc->weight));
+      roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
+      sum_weight += weight;
+    }
+  }
+
+  return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
+}
+
+ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  if (sd->flag & SD_HAS_ONLY_VOLUME) {
+    return one_float3();
+  }
+  else if (sd->flag & SD_TRANSPARENT) {
+    return sd->closure_transparent_extinction;
+  }
+  else {
+    return zero_float3();
+  }
+}
+
+ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+  if (sd->flag & SD_TRANSPARENT) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private ShaderClosure *sc = &sd->closure[i];
+
+      if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+        sc->sample_weight = 0.0f;
+        sc->weight = zero_float3();
+      }
+    }
+
+    sd->flag &= ~SD_TRANSPARENT;
+  }
+}
+
+ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
+
+  alpha = max(alpha, zero_float3());
+  alpha = min(alpha, one_float3());
+
+  return alpha;
+}
+
+ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float3 eval = zero_float3();
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
+      eval += sc->weight;
+  }
+
+  return eval;
+}
+
+ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float3 eval = zero_float3();
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+      eval += sc->weight;
+  }
+
+  return eval;
+}
+
+ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float3 eval = zero_float3();
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
+      eval += sc->weight;
+  }
+
+  return eval;
+}
+
+ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+  float3 N = zero_float3();
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+    if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+      N += sc->N * fabsf(average(sc->weight));
+  }
+
+  return (is_zero(N)) ? sd->N : normalize(N);
+}
+
+ccl_device float3 shader_bsdf_ao(KernelGlobals kg,
+                                 ccl_private const ShaderData *sd,
+                                 const float ao_factor,
+                                 ccl_private float3 *N_)
+{
+  float3 eval = zero_float3();
+  float3 N = zero_float3();
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+      ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
+      eval += sc->weight * ao_factor;
+      N += bsdf->N * fabsf(average(sc->weight));
+    }
+  }
+
+  *N_ = (is_zero(N)) ? sd->N : normalize(N);
+  return eval;
+}
+
+#ifdef __SUBSURFACE__
+ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
+{
+  float3 N = zero_float3();
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+    if (CLOSURE_IS_BSSRDF(sc->type)) {
+      ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
+      float avg_weight = fabsf(average(sc->weight));
+
+      N += bssrdf->N * avg_weight;
+    }
+  }
+
+  return (is_zero(N)) ? sd->N : normalize(N);
+}
+#endif /* __SUBSURFACE__ */
+
+/* Constant emission optimization */
+
+ccl_device bool shader_constant_emission_eval(KernelGlobals kg,
+                                              int shader,
+                                              ccl_private float3 *eval)
+{
+  int shader_index = shader & SHADER_MASK;
+  int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags;
+
+  if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
+    *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0],
+                        kernel_tex_fetch(__shaders, shader_index).constant_emission[1],
+                        kernel_tex_fetch(__shaders, shader_index).constant_emission[2]);
+
+    return true;
+  }
+
+  return false;
+}
+
+/* Background */
+
+ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd)
+{
+  if (sd->flag & SD_EMISSION) {
+    return sd->closure_emission_background;
+  }
+  else {
+    return zero_float3();
+  }
+}
+
+/* Emission */
+
+ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
+{
+  if (sd->flag & SD_EMISSION) {
+    return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
+  }
+  else {
+    return zero_float3();
+  }
+}
+
+/* Holdout */
+
+ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+  float3 weight = zero_float3();
+
+  /* For objects marked as holdout, preserve transparency and remove all other
+   * closures, replacing them with a holdout weight. */
+  if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+    if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
+      weight = one_float3() - sd->closure_transparent_extinction;
+
+      for (int i = 0; i < sd->num_closure; i++) {
+        ccl_private ShaderClosure *sc = &sd->closure[i];
+        if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+          sc->type = NBUILTIN_CLOSURES;
+        }
+      }
+
+      sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
+    }
+    else {
+      weight = one_float3();
+    }
+  }
+  else {
+    for (int i = 0; i < sd->num_closure; i++) {
+      ccl_private const ShaderClosure *sc = &sd->closure[i];
+      if (CLOSURE_IS_HOLDOUT(sc->type)) {
+        weight += sc->weight;
+      }
+    }
+  }
+
+  return weight;
+}
+
+/* Surface Evaluation */
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+ccl_device void shader_eval_surface(KernelGlobals kg,
+                                    ConstIntegratorGenericState state,
+                                    ccl_private ShaderData *ccl_restrict sd,
+                                    ccl_global float *ccl_restrict buffer,
+                                    uint32_t path_flag)
+{
+  /* If path is being terminated, we are tracing a shadow ray or evaluating
+   * emission, then we don't need to store closures. The emission and shadow
+   * shader data also do not have a closure array to save GPU memory. */
+  int max_closures;
+  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+    max_closures = 0;
+  }
+  else {
+    max_closures = kernel_data.max_closures;
+  }
+
+  sd->num_closure = 0;
+  sd->num_closure_left = max_closures;
+
+#ifdef __OSL__
+  if (kg->osl) {
+    if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
+      OSLShader::eval_background(kg, state, sd, path_flag);
+    }
+    else {
+      OSLShader::eval_surface(kg, state, sd, path_flag);
+    }
+  }
+  else
+#endif
+  {
+#ifdef __SVM__
+    svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
+#else
+    if (sd->object == OBJECT_NONE) {
+      sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
+      sd->flag |= SD_EMISSION;
+    }
+    else {
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
+      if (bsdf != NULL) {
+        bsdf->N = sd->N;
+        sd->flag |= bsdf_diffuse_setup(bsdf);
+      }
+    }
+#endif
+  }
+}
+
+/* Volume */
+
+#ifdef __VOLUME__
+
+ccl_device_inline float _shader_volume_phase_multi_eval(
+    ccl_private const ShaderData *sd,
+    ccl_private const ShaderVolumePhases *phases,
+    const float3 omega_in,
+    int skip_phase,
+    ccl_private BsdfEval *result_eval,
+    float sum_pdf,
+    float sum_sample_weight)
+{
+  for (int i = 0; i < phases->num_closure; i++) {
+    if (i == skip_phase)
+      continue;
+
+    ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
+    float phase_pdf = 0.0f;
+    float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
+
+    if (phase_pdf != 0.0f) {
+      bsdf_eval_accum(result_eval, false, eval, 1.0f);
+      sum_pdf += phase_pdf * svc->sample_weight;
+    }
+
+    sum_sample_weight += svc->sample_weight;
+  }
+
+  return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+ccl_device float shader_volume_phase_eval(KernelGlobals kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
+                                          const float3 omega_in,
+                                          ccl_private BsdfEval *phase_eval)
+{
+  bsdf_eval_init(phase_eval, false, zero_float3());
+
+  return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
+}
+
+ccl_device int shader_volume_phase_sample(KernelGlobals kg,
+                                          ccl_private const ShaderData *sd,
+                                          ccl_private const ShaderVolumePhases *phases,
+                                          float randu,
+                                          float randv,
+                                          ccl_private BsdfEval *phase_eval,
+                                          ccl_private float3 *omega_in,
+                                          ccl_private differential3 *domega_in,
+                                          ccl_private float *pdf)
+{
+  int sampled = 0;
+
+  if (phases->num_closure > 1) {
+    /* pick a phase closure based on sample weights */
+    float sum = 0.0f;
+
+    for (sampled = 0; sampled < phases->num_closure; sampled++) {
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      sum += svc->sample_weight;
+    }
+
+    float r = randu * sum;
+    float partial_sum = 0.0f;
+
+    for (sampled = 0; sampled < phases->num_closure; sampled++) {
+      ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+      float next_sum = partial_sum + svc->sample_weight;
+
+      if (r <= next_sum) {
+        /* Rescale to reuse for BSDF direction sample. */
+        randu = (r - partial_sum) / svc->sample_weight;
+        break;
+      }
+
+      partial_sum = next_sum;
+    }
+
+    if (sampled == phases->num_closure) {
+      *pdf = 0.0f;
+      return LABEL_NONE;
+    }
+  }
+
+  /* todo: this isn't quite correct, we don't weight anisotropy properly
+   * depending on color channels, even if this is perhaps not a common case */
+  ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+  int label;
+  float3 eval = zero_float3();
+
+  *pdf = 0.0f;
+  label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
+
+  if (*pdf != 0.0f) {
+    bsdf_eval_init(phase_eval, false, eval);
+  }
+
+  return label;
+}
+
+ccl_device int shader_phase_sample_closure(KernelGlobals kg,
+                                           ccl_private const ShaderData *sd,
+                                           ccl_private const ShaderVolumeClosure *sc,
+                                           float randu,
+                                           float randv,
+                                           ccl_private BsdfEval *phase_eval,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private differential3 *domega_in,
+                                           ccl_private float *pdf)
+{
+  int label;
+  float3 eval = zero_float3();
+
+  *pdf = 0.0f;
+  label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
+
+  if (*pdf != 0.0f)
+    bsdf_eval_init(phase_eval, false, eval);
+
+  return label;
+}
+
+/* Volume Evaluation */
+
+template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
+ccl_device_inline void shader_eval_volume(KernelGlobals kg,
+                                          ConstIntegratorGenericState state,
+                                          ccl_private ShaderData *ccl_restrict sd,
+                                          const uint32_t path_flag,
+                                          StackReadOp stack_read)
+{
+  /* If path is being terminated, we are tracing a shadow ray or evaluating
+   * emission, then we don't need to store closures. The emission and shadow
+   * shader data also do not have a closure array to save GPU memory. */
+  int max_closures;
+  if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+    max_closures = 0;
+  }
+  else {
+    max_closures = kernel_data.max_closures;
+  }
+
+  /* reset closures once at the start, we will be accumulating the closures
+   * for all volumes in the stack into a single array of closures */
+  sd->num_closure = 0;
+  sd->num_closure_left = max_closures;
+  sd->flag = 0;
+  sd->object_flag = 0;
+
+  for (int i = 0;; i++) {
+    const VolumeStack entry = stack_read(i);
+    if (entry.shader == SHADER_NONE) {
+      break;
+    }
+
+    /* Setup shader-data from stack. it's mostly setup already in
+     * shader_setup_from_volume, this switching should be quick. */
+    sd->object = entry.object;
+    sd->lamp = LAMP_NONE;
+    sd->shader = entry.shader;
+
+    sd->flag &= ~SD_SHADER_FLAGS;
+    sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+    sd->object_flag &= ~SD_OBJECT_FLAGS;
+
+    if (sd->object != OBJECT_NONE) {
+      sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object);
+
+#  ifdef __OBJECT_MOTION__
+      /* todo: this is inefficient for motion blur, we should be
+       * caching matrices instead of recomputing them each step */
+      shader_setup_object_transforms(kg, sd, sd->time);
+#  endif
+    }
+
+    /* evaluate shader */
+#  ifdef __SVM__
+#    ifdef __OSL__
+    if (kg->osl) {
+      OSLShader::eval_volume(kg, state, sd, path_flag);
+    }
+    else
+#    endif
+    {
+      svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
+          kg, state, sd, NULL, path_flag);
+    }
+#  endif
+
+    /* Merge closures to avoid exceeding number of closures limit. */
+    if (!shadow) {
+      if (i > 0) {
+        shader_merge_volume_closures(sd);
+      }
+    }
+  }
+}
+
+#endif /* __VOLUME__ */
+
+/* Displacement Evaluation */
+
+template<typename ConstIntegratorGenericState>
+ccl_device void shader_eval_displacement(KernelGlobals kg,
+                                         ConstIntegratorGenericState state,
+                                         ccl_private ShaderData *sd)
+{
+  sd->num_closure = 0;
+  sd->num_closure_left = 0;
+
+  /* this will modify sd->P */
+#ifdef __SVM__
+#  ifdef __OSL__
+  if (kg->osl)
+    OSLShader::eval_displacement(kg, state, sd);
+  else
+#  endif
+  {
+    svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
+        kg, state, sd, NULL, 0);
+  }
+#endif
+}
+
+/* Cryptomatte */
+
+ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader)
+{
+  return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h
new file mode 100644
index 00000000000..7beae235dbc
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shadow_catcher.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/state_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Check whether current surface bounce is where path is to be split for the shadow catcher. */
+ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals kg,
+                                                                  IntegratorState state,
+                                                                  const int object_flag)
+{
+#ifdef __SHADOW_CATCHER__
+  if (!kernel_data.integrator.has_shadow_catcher) {
+    return false;
+  }
+
+  /* Check the flag first, avoiding fetches form global memory. */
+  if ((object_flag & SD_OBJECT_SHADOW_CATCHER) == 0) {
+    return false;
+  }
+  if (object_flag & SD_OBJECT_HOLDOUT_MASK) {
+    return false;
+  }
+
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  if ((path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) == 0) {
+    /* Split only on primary rays, secondary bounces are to treat shadow catcher as a regular
+     * object. */
+    return false;
+  }
+
+  if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
+    return false;
+  }
+
+  return true;
+#else
+  (void)object_flag;
+  return false;
+#endif
+}
+
+/* Check whether the current path can still split. */
+ccl_device_inline bool kernel_shadow_catcher_path_can_split(KernelGlobals kg,
+                                                            ConstIntegratorState state)
+{
+  if (INTEGRATOR_PATH_IS_TERMINATED) {
+    return false;
+  }
+
+  const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+  if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
+    /* Shadow catcher was already hit and the state was split. No further split is allowed. */
+    return false;
+  }
+
+  return (path_flag & PATH_RAY_TRANSPARENT_BACKGROUND) != 0;
+}
+
+/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
+ * after this function. */
+ccl_device_inline bool kernel_shadow_catcher_split(KernelGlobals kg,
+                                                   IntegratorState state,
+                                                   const int object_flags)
+{
+#ifdef __SHADOW_CATCHER__
+
+  if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, object_flags)) {
+    return false;
+  }
+
+  /* The split is to be done. Mark the current state as such, so that it stops contributing to the
+   * shadow catcher matte pass, but keeps contributing to the combined pass. */
+  INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
+
+  /* Split new state from the current one. This new state will only track contribution of shadow
+   * catcher objects ignoring non-catcher objects. */
+  integrator_state_shadow_catcher_split(kg, state);
+
+  return true;
+#else
+  (void)object_flags;
+  return false;
+#endif
+}
+
+#ifdef __SHADOW_CATCHER__
+
+ccl_device_forceinline bool kernel_shadow_catcher_is_matte_path(const uint32_t path_flag)
+{
+  return (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) == 0;
+}
+
+ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t path_flag)
+{
+  return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
+}
+
+#endif /* __SHADOW_CATCHER__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h
new file mode 100644
index 00000000000..1fbadde2642
--- /dev/null
+++ b/intern/cycles/kernel/integrator/shadow_state_template.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/********************************* Shadow Path State **************************/
+
+KERNEL_STRUCT_BEGIN(shadow_path)
+/* Index of a pixel within the device render buffer. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
+/* Current sample number. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
+/* Random number generator seed. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
+/* Random number dimension offset. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
+/* Current ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transparent ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current diffuse ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current glossy ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transmission ray bounce depth. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* enum PathRayFlag */
+KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
+/* Throughput. */
+KERNEL_STRUCT_MEMBER(shadow_path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+/* Throughput for shadow pass. */
+KERNEL_STRUCT_MEMBER(shadow_path,
+                     float3,
+                     unshadowed_throughput,
+                     KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
+/* Ratio of throughput to distinguish diffuse and glossy render passes. */
+KERNEL_STRUCT_MEMBER(shadow_path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
+/* Number of intersections found by ray-tracing. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(shadow_path)
+
+/********************************** Shadow Ray *******************************/
+
+KERNEL_STRUCT_BEGIN(shadow_ray)
+KERNEL_STRUCT_MEMBER(shadow_ray, float3, P, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float3, D, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, time, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(shadow_ray)
+
+/*********************** Shadow Intersection result **************************/
+
+/* Result from scene intersection. */
+KERNEL_STRUCT_BEGIN(shadow_isect)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, u, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END_ARRAY(shadow_isect,
+                        INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
+                        INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
+
+/**************************** Shadow Volume Stack *****************************/
+
+KERNEL_STRUCT_BEGIN(shadow_volume_stack)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, object, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_ARRAY_MEMBER(shadow_volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_END_ARRAY(shadow_volume_stack,
+                        KERNEL_STRUCT_VOLUME_STACK_SIZE,
+                        KERNEL_STRUCT_VOLUME_STACK_SIZE)
diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h
new file mode 100644
index 00000000000..86dac0a65cf
--- /dev/null
+++ b/intern/cycles/kernel/integrator/state.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Integrator State
+ *
+ * This file defines the data structures that define the state of a path. Any state that is
+ * preserved and passed between kernel executions is part of this.
+ *
+ * The size of this state must be kept as small as possible, to reduce cache misses and keep memory
+ * usage under control on GPUs that may execute millions of kernels.
+ *
+ * Memory may be allocated and passed along in different ways depending on the device. There may
+ * be a scalar layout, or AoS or SoA layout for batches. The state may be passed along as a pointer
+ * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract
+ * these differences between devices and experiment with different layouts, macros are used.
+ *
+ * Use IntegratorState to pass a reference to the integrator state for the current path. These are
+ * defined differently on the CPU and GPU. Use ConstIntegratorState instead of const
+ * IntegratorState for passing state as read-only, to avoid oddities in typedef behavior.
+ *
+ * INTEGRATOR_STATE(state, x, y): read nested struct member x.y of IntegratorState
+ * INTEGRATOR_STATE_WRITE(state, x, y): write to nested struct member x.y of IntegratorState
+ *
+ * INTEGRATOR_STATE_ARRAY(state, x, index, y): read x[index].y
+ * INTEGRATOR_STATE_ARRAY_WRITE(state, x, index, y): write x[index].y
+ *
+ * INTEGRATOR_STATE_NULL: use to pass empty state to other functions.
+ */
+
+#include "kernel/types.h"
+
+#include "util/types.h"
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Data structures */
+
+/* Integrator State
+ *
+ * CPU rendering path state with AoS layout. */
+typedef struct IntegratorShadowStateCPU {
+#define KERNEL_STRUCT_BEGIN(name) struct {
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
+#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
+#define KERNEL_STRUCT_END(name) \
+  } \
+  name;
+#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
+  } \
+  name[cpu_size];
+#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
+#include "kernel/integrator/shadow_state_template.h"
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_ARRAY_MEMBER
+#undef KERNEL_STRUCT_END
+#undef KERNEL_STRUCT_END_ARRAY
+} IntegratorShadowStateCPU;
+
+typedef struct IntegratorStateCPU {
+#define KERNEL_STRUCT_BEGIN(name) struct {
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
+#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
+#define KERNEL_STRUCT_END(name) \
+  } \
+  name;
+#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
+  } \
+  name[cpu_size];
+#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
+#include "kernel/integrator/state_template.h"
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_ARRAY_MEMBER
+#undef KERNEL_STRUCT_END
+#undef KERNEL_STRUCT_END_ARRAY
+#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
+
+  IntegratorShadowStateCPU shadow;
+  IntegratorShadowStateCPU ao;
+} IntegratorStateCPU;
+
+/* Path Queue
+ *
+ * Keep track of which kernels are queued to be executed next in the path
+ * for GPU rendering. */
+typedef struct IntegratorQueueCounter {
+  int num_queued[DEVICE_KERNEL_INTEGRATOR_NUM];
+} IntegratorQueueCounter;
+
+/* Integrator State GPU
+ *
+ * GPU rendering path state with SoA layout. */
+typedef struct IntegratorStateGPU {
+#define KERNEL_STRUCT_BEGIN(name) struct {
+#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
+#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
+#define KERNEL_STRUCT_END(name) \
+  } \
+  name;
+#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
+  } \
+  name[gpu_size];
+#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
+
+#include "kernel/integrator/state_template.h"
+
+#include "kernel/integrator/shadow_state_template.h"
+
+#undef KERNEL_STRUCT_BEGIN
+#undef KERNEL_STRUCT_MEMBER
+#undef KERNEL_STRUCT_ARRAY_MEMBER
+#undef KERNEL_STRUCT_END
+#undef KERNEL_STRUCT_END_ARRAY
+#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
+
+  /* Count number of queued kernels. */
+  ccl_global IntegratorQueueCounter *queue_counter;
+
+  /* Count number of kernels queued for specific shaders. */
+  ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
+
+  /* Index of shadow path which will be used by a next shadow path.  */
+  ccl_global int *next_shadow_path_index;
+
+  /* Index of main path which will be used by a next shadow catcher split.  */
+  ccl_global int *next_main_path_index;
+} IntegratorStateGPU;
+
+/* Abstraction
+ *
+ * Macros to access data structures on different devices.
+ *
+ * Note that there is a special access function for the shadow catcher state. This access is to
+ * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
+ * from a kernel which operates on a shadow catcher state will cause bad memory access. */
+
+#ifdef __KERNEL_CPU__
+
+/* Scalar access on CPU. */
+
+typedef IntegratorStateCPU *ccl_restrict IntegratorState;
+typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState;
+typedef IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState;
+typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
+
+#  define INTEGRATOR_STATE_NULL nullptr
+
+#  define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member)
+#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member)
+
+#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
+    ((state)->nested_struct[array_index].member)
+#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
+    ((state)->nested_struct[array_index].member)
+
+#else /* __KERNEL_CPU__ */
+
+/* Array access on GPU with Structure-of-Arrays. */
+
+typedef const int IntegratorState;
+typedef const int ConstIntegratorState;
+typedef const int IntegratorShadowState;
+typedef const int ConstIntegratorShadowState;
+
+#  define INTEGRATOR_STATE_NULL -1
+
+#  define INTEGRATOR_STATE(state, nested_struct, member) \
+    kernel_integrator_state.nested_struct.member[state]
+#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \
+    INTEGRATOR_STATE(state, nested_struct, member)
+
+#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
+    kernel_integrator_state.nested_struct[array_index].member[state]
+#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
+    INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
+
+#endif /* __KERNEL_CPU__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h
new file mode 100644
index 00000000000..38a2b396847
--- /dev/null
+++ b/intern/cycles/kernel/integrator/state_flow.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/types.h"
+#include "util/atomic.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Control Flow
+ *
+ * Utilities for control flow between kernels. The implementation may differ per device
+ * or even be handled on the host side. To abstract such differences, experiment with
+ * different implementations and for debugging, this is abstracted using macros.
+ *
+ * There is a main path for regular path tracing camera for path tracing. Shadows for next
+ * event estimation branch off from this into their own path, that may be computed in
+ * parallel while the main path continues.
+ *
+ * Each kernel on the main path must call one of these functions. These may not be called
+ * multiple times from the same kernel.
+ *
+ * INTEGRATOR_PATH_INIT(next_kernel)
+ * INTEGRATOR_PATH_NEXT(current_kernel, next_kernel)
+ * INTEGRATOR_PATH_TERMINATE(current_kernel)
+ *
+ * For the shadow path similar functions are used, and again each shadow kernel must call
+ * one of them, and only once.
+ */
+
+#define INTEGRATOR_PATH_IS_TERMINATED (INTEGRATOR_STATE(state, path, queued_kernel) == 0)
+#define INTEGRATOR_SHADOW_PATH_IS_TERMINATED \
+  (INTEGRATOR_STATE(state, shadow_path, queued_kernel) == 0)
+
+#ifdef __KERNEL_GPU__
+
+#  define INTEGRATOR_PATH_INIT(next_kernel) \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+
+#  define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
+    IntegratorShadowState shadow_state = atomic_fetch_and_add_uint32( \
+        &kernel_integrator_state.next_shadow_path_index[0], 1); \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], \
+                                1); \
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
+    atomic_fetch_and_sub_uint32( \
+        &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+    INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
+
+#  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
+    { \
+      const int key_ = key; \
+      atomic_fetch_and_add_uint32( \
+          &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
+      atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
+                                  1); \
+    }
+#  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
+    { \
+      const int key_ = key; \
+      atomic_fetch_and_sub_uint32( \
+          &kernel_integrator_state.queue_counter->num_queued[current_kernel], 1); \
+      atomic_fetch_and_add_uint32( \
+          &kernel_integrator_state.queue_counter->num_queued[next_kernel], 1); \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_; \
+      atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], \
+                                  1); \
+    }
+
+#else
+
+#  define INTEGRATOR_PATH_INIT(next_kernel) \
+    INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_PATH_INIT_SORTED(next_kernel, key) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      (void)key; \
+    }
+#  define INTEGRATOR_PATH_NEXT(current_kernel, next_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      (void)current_kernel; \
+    }
+#  define INTEGRATOR_PATH_TERMINATE(current_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0; \
+      (void)current_kernel; \
+    }
+#  define INTEGRATOR_PATH_NEXT_SORTED(current_kernel, next_kernel, key) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel; \
+      (void)key; \
+      (void)current_kernel; \
+    }
+
+#  define INTEGRATOR_SHADOW_PATH_INIT(shadow_state, state, next_kernel, shadow_type) \
+    IntegratorShadowState shadow_state = &state->shadow_type; \
+    INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+#  define INTEGRATOR_SHADOW_PATH_NEXT(current_kernel, next_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel; \
+      (void)current_kernel; \
+    }
+#  define INTEGRATOR_SHADOW_PATH_TERMINATE(current_kernel) \
+    { \
+      INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0; \
+      (void)current_kernel; \
+    }
+
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
new file mode 100644
index 00000000000..b1a6fd36fae
--- /dev/null
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -0,0 +1,109 @@
+
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/************************************ Path State *****************************/
+
+KERNEL_STRUCT_BEGIN(path)
+/* Index of a pixel within the device render buffer where this path will write its result.
+ * To get an actual offset within the buffer the value needs to be multiplied by the
+ * `kernel_data.film.pass_stride`.
+ *
+ * The multiplication is delayed for later, so that state can use 32bit integer. */
+KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
+/* Current sample number. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, sample, KERNEL_FEATURE_PATH_TRACING)
+/* Current ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transparent ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current diffuse ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current glossy ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current transmission ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, transmission_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current volume ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* Current volume bounds ray bounce depth. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* Random number generator seed. */
+KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
+/* Random number dimension offset. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
+/* enum PathRayFlag */
+KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
+/* Multiple importance sampling
+ * The PDF of BSDF sampling at the last scatter point, and distance to the
+ * last scatter point minus the last ray segment. This distance lets us
+ * compute the complete distance through transparent surfaces and volumes. */
+KERNEL_STRUCT_MEMBER(path, float, mis_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, float, mis_ray_t, KERNEL_FEATURE_PATH_TRACING)
+/* Filter glossy. */
+KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
+/* Throughput. */
+KERNEL_STRUCT_MEMBER(path, float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+/* Ratio of throughput to distinguish diffuse and glossy render passes. */
+KERNEL_STRUCT_MEMBER(path, float3, diffuse_glossy_ratio, KERNEL_FEATURE_LIGHT_PASSES)
+/* Denoising. */
+KERNEL_STRUCT_MEMBER(path, float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
+/* Shader sorting. */
+/* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */
+KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(path)
+
+/************************************** Ray ***********************************/
+
+KERNEL_STRUCT_BEGIN(ray)
+KERNEL_STRUCT_MEMBER(ray, float3, P, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float3, D, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, time, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, dP, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(ray, float, dD, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(ray)
+
+/*************************** Intersection result ******************************/
+
+/* Result from scene intersection. */
+KERNEL_STRUCT_BEGIN(isect)
+KERNEL_STRUCT_MEMBER(isect, float, t, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, float, u, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_END(isect)
+
+/*************** Subsurface closure state for subsurface kernel ***************/
+
+KERNEL_STRUCT_BEGIN(subsurface)
+KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_END(subsurface)
+
+/********************************** Volume Stack ******************************/
+
+KERNEL_STRUCT_BEGIN(volume_stack)
+KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, object, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
+KERNEL_STRUCT_END_ARRAY(volume_stack,
+                        KERNEL_STRUCT_VOLUME_STACK_SIZE,
+                        KERNEL_STRUCT_VOLUME_STACK_SIZE)
diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h
new file mode 100644
index 00000000000..dafe06e7009
--- /dev/null
+++ b/intern/cycles/kernel/integrator/state_util.h
@@ -0,0 +1,440 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/state.h"
+
+#include "kernel/util/differential.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Ray */
+
+ccl_device_forceinline void integrator_state_write_ray(KernelGlobals kg,
+                                                       IntegratorState state,
+                                                       ccl_private const Ray *ccl_restrict ray)
+{
+  INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = ray->t;
+  INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
+}
+
+ccl_device_forceinline void integrator_state_read_ray(KernelGlobals kg,
+                                                      ConstIntegratorState state,
+                                                      ccl_private Ray *ccl_restrict ray)
+{
+  ray->P = INTEGRATOR_STATE(state, ray, P);
+  ray->D = INTEGRATOR_STATE(state, ray, D);
+  ray->t = INTEGRATOR_STATE(state, ray, t);
+  ray->time = INTEGRATOR_STATE(state, ray, time);
+  ray->dP = INTEGRATOR_STATE(state, ray, dP);
+  ray->dD = INTEGRATOR_STATE(state, ray, dD);
+}
+
+/* Shadow Ray */
+
+ccl_device_forceinline void integrator_state_write_shadow_ray(
+    KernelGlobals kg, IntegratorShadowState state, ccl_private const Ray *ccl_restrict ray)
+{
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, P) = ray->P;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, D) = ray->D;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, t) = ray->t;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, time) = ray->time;
+  INTEGRATOR_STATE_WRITE(state, shadow_ray, dP) = ray->dP;
+}
+
+ccl_device_forceinline void integrator_state_read_shadow_ray(KernelGlobals kg,
+                                                             ConstIntegratorShadowState state,
+                                                             ccl_private Ray *ccl_restrict ray)
+{
+  ray->P = INTEGRATOR_STATE(state, shadow_ray, P);
+  ray->D = INTEGRATOR_STATE(state, shadow_ray, D);
+  ray->t = INTEGRATOR_STATE(state, shadow_ray, t);
+  ray->time = INTEGRATOR_STATE(state, shadow_ray, time);
+  ray->dP = INTEGRATOR_STATE(state, shadow_ray, dP);
+  ray->dD = differential_zero_compact();
+}
+
+/* Intersection */
+
+ccl_device_forceinline void integrator_state_write_isect(
+    KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
+{
+  INTEGRATOR_STATE_WRITE(state, isect, t) = isect->t;
+  INTEGRATOR_STATE_WRITE(state, isect, u) = isect->u;
+  INTEGRATOR_STATE_WRITE(state, isect, v) = isect->v;
+  INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
+  INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
+  INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
+}
+
+ccl_device_forceinline void integrator_state_read_isect(
+    KernelGlobals kg, ConstIntegratorState state, ccl_private Intersection *ccl_restrict isect)
+{
+  isect->prim = INTEGRATOR_STATE(state, isect, prim);
+  isect->object = INTEGRATOR_STATE(state, isect, object);
+  isect->type = INTEGRATOR_STATE(state, isect, type);
+  isect->u = INTEGRATOR_STATE(state, isect, u);
+  isect->v = INTEGRATOR_STATE(state, isect, v);
+  isect->t = INTEGRATOR_STATE(state, isect, t);
+}
+
+ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
+                                                                      int i)
+{
+  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, volume_stack, i, object),
+                       INTEGRATOR_STATE_ARRAY(state, volume_stack, i, shader)};
+  return entry;
+}
+
+ccl_device_forceinline void integrator_state_write_volume_stack(IntegratorState state,
+                                                                int i,
+                                                                VolumeStack entry)
+{
+  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, object) = entry.object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, i, shader) = entry.shader;
+}
+
+ccl_device_forceinline bool integrator_state_volume_stack_is_empty(KernelGlobals kg,
+                                                                   ConstIntegratorState state)
+{
+  return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
+             INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, shader) == SHADER_NONE :
+             true;
+}
+
+/* Shadow Intersection */
+
+ccl_device_forceinline void integrator_state_write_shadow_isect(
+    IntegratorShadowState state,
+    ccl_private const Intersection *ccl_restrict isect,
+    const int index)
+{
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, t) = isect->t;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, u) = isect->u;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, v) = isect->v;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
+}
+
+ccl_device_forceinline void integrator_state_read_shadow_isect(
+    ConstIntegratorShadowState state,
+    ccl_private Intersection *ccl_restrict isect,
+    const int index)
+{
+  isect->prim = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, prim);
+  isect->object = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, object);
+  isect->type = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, type);
+  isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
+  isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
+  isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
+}
+
+ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(
+    KernelGlobals kg, IntegratorShadowState shadow_state, ConstIntegratorState state)
+{
+  if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
+    int index = 0;
+    int shader;
+    do {
+      shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader);
+
+      INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_volume_stack, index, object) =
+          INTEGRATOR_STATE_ARRAY(state, volume_stack, index, object);
+      INTEGRATOR_STATE_ARRAY_WRITE(shadow_state, shadow_volume_stack, index, shader) = shader;
+
+      ++index;
+    } while (shader != OBJECT_NONE);
+  }
+}
+
+ccl_device_forceinline void integrator_state_copy_volume_stack(KernelGlobals kg,
+                                                               IntegratorState to_state,
+                                                               ConstIntegratorState state)
+{
+  if (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) {
+    int index = 0;
+    int shader;
+    do {
+      shader = INTEGRATOR_STATE_ARRAY(state, volume_stack, index, shader);
+
+      INTEGRATOR_STATE_ARRAY_WRITE(to_state, volume_stack, index, object) = INTEGRATOR_STATE_ARRAY(
+          state, volume_stack, index, object);
+      INTEGRATOR_STATE_ARRAY_WRITE(to_state, volume_stack, index, shader) = shader;
+
+      ++index;
+    } while (shader != OBJECT_NONE);
+  }
+}
+
+ccl_device_forceinline VolumeStack
+integrator_state_read_shadow_volume_stack(ConstIntegratorShadowState state, int i)
+{
+  VolumeStack entry = {INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, object),
+                       INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, i, shader)};
+  return entry;
+}
+
+ccl_device_forceinline bool integrator_state_shadow_volume_stack_is_empty(
+    KernelGlobals kg, ConstIntegratorShadowState state)
+{
+  return (kernel_data.kernel_features & KERNEL_FEATURE_VOLUME) ?
+             INTEGRATOR_STATE_ARRAY(state, shadow_volume_stack, 0, shader) == SHADER_NONE :
+             true;
+}
+
+ccl_device_forceinline void integrator_state_write_shadow_volume_stack(IntegratorShadowState state,
+                                                                       int i,
+                                                                       VolumeStack entry)
+{
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, object) = entry.object;
+  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_volume_stack, i, shader) = entry.shader;
+}
+
+#if defined(__KERNEL_GPU__)
+ccl_device_inline void integrator_state_copy_only(KernelGlobals kg,
+                                                  ConstIntegratorState to_state,
+                                                  ConstIntegratorState state)
+{
+  int index;
+
+  /* Rely on the compiler to optimize out unused assignments and `while(false)`'s. */
+
+#  define KERNEL_STRUCT_BEGIN(name) \
+    index = 0; \
+    do {
+
+#  define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
+    if (kernel_integrator_state.parent_struct.name != nullptr) { \
+      kernel_integrator_state.parent_struct.name[to_state] = \
+          kernel_integrator_state.parent_struct.name[state]; \
+    }
+
+#  define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
+    if (kernel_integrator_state.parent_struct[index].name != nullptr) { \
+      kernel_integrator_state.parent_struct[index].name[to_state] = \
+          kernel_integrator_state.parent_struct[index].name[state]; \
+    }
+
+#  define KERNEL_STRUCT_END(name) \
+    } \
+    while (false) \
+      ;
+
+#  define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
+    ++index; \
+    } \
+    while (index < gpu_array_size) \
+      ;
+
+#  define KERNEL_STRUCT_VOLUME_STACK_SIZE kernel_data.volume_stack_size
+
+#  include "kernel/integrator/state_template.h"
+
+#  undef KERNEL_STRUCT_BEGIN
+#  undef KERNEL_STRUCT_MEMBER
+#  undef KERNEL_STRUCT_ARRAY_MEMBER
+#  undef KERNEL_STRUCT_END
+#  undef KERNEL_STRUCT_END_ARRAY
+#  undef KERNEL_STRUCT_VOLUME_STACK_SIZE
+}
+
+ccl_device_inline void integrator_state_move(KernelGlobals kg,
+                                             ConstIntegratorState to_state,
+                                             ConstIntegratorState state)
+{
+  integrator_state_copy_only(kg, to_state, state);
+
+  INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
+}
+
+ccl_device_inline void integrator_shadow_state_copy_only(KernelGlobals kg,
+                                                         ConstIntegratorShadowState to_state,
+                                                         ConstIntegratorShadowState state)
+{
+  int index;
+
+  /* Rely on the compiler to optimize out unused assignments and `while(false)`'s. */
+
+#  define KERNEL_STRUCT_BEGIN(name) \
+    index = 0; \
+    do {
+
+#  define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
+    if (kernel_integrator_state.parent_struct.name != nullptr) { \
+      kernel_integrator_state.parent_struct.name[to_state] = \
+          kernel_integrator_state.parent_struct.name[state]; \
+    }
+
+#  define KERNEL_STRUCT_ARRAY_MEMBER(parent_struct, type, name, feature) \
+    if (kernel_integrator_state.parent_struct[index].name != nullptr) { \
+      kernel_integrator_state.parent_struct[index].name[to_state] = \
+          kernel_integrator_state.parent_struct[index].name[state]; \
+    }
+
+#  define KERNEL_STRUCT_END(name) \
+    } \
+    while (false) \
+      ;
+
+#  define KERNEL_STRUCT_END_ARRAY(name, cpu_array_size, gpu_array_size) \
+    ++index; \
+    } \
+    while (index < gpu_array_size) \
+      ;
+
+#  define KERNEL_STRUCT_VOLUME_STACK_SIZE kernel_data.volume_stack_size
+
+#  include "kernel/integrator/shadow_state_template.h"
+
+#  undef KERNEL_STRUCT_BEGIN
+#  undef KERNEL_STRUCT_MEMBER
+#  undef KERNEL_STRUCT_ARRAY_MEMBER
+#  undef KERNEL_STRUCT_END
+#  undef KERNEL_STRUCT_END_ARRAY
+#  undef KERNEL_STRUCT_VOLUME_STACK_SIZE
+}
+
+ccl_device_inline void integrator_shadow_state_move(KernelGlobals kg,
+                                                    ConstIntegratorState to_state,
+                                                    ConstIntegratorState state)
+{
+  integrator_shadow_state_copy_only(kg, to_state, state);
+
+  INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
+}
+
+#endif
+
+/* NOTE: Leaves kernel scheduling information untouched. Use INIT semantic for one of the paths
+ * after this function. */
+ccl_device_inline void integrator_state_shadow_catcher_split(KernelGlobals kg,
+                                                             IntegratorState state)
+{
+#if defined(__KERNEL_GPU__)
+  ConstIntegratorState to_state = atomic_fetch_and_add_uint32(
+      &kernel_integrator_state.next_main_path_index[0], 1);
+
+  integrator_state_copy_only(kg, to_state, state);
+#else
+  IntegratorStateCPU *ccl_restrict to_state = state + 1;
+
+  /* Only copy the required subset, since shadow intersections are big and irrelevant here. */
+  to_state->path = state->path;
+  to_state->ray = state->ray;
+  to_state->isect = state->isect;
+  integrator_state_copy_volume_stack(kg, to_state, state);
+#endif
+
+  INTEGRATOR_STATE_WRITE(to_state, path, flag) |= PATH_RAY_SHADOW_CATCHER_PASS;
+}
+
+#ifdef __KERNEL_CPU__
+ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
+{
+  return INTEGRATOR_STATE(state, path, bounce);
+}
+
+ccl_device_inline int integrator_state_bounce(ConstIntegratorShadowState state, const int)
+{
+  return INTEGRATOR_STATE(state, shadow_path, bounce);
+}
+
+ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorState state, const int)
+{
+  return INTEGRATOR_STATE(state, path, diffuse_bounce);
+}
+
+ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorShadowState state, const int)
+{
+  return INTEGRATOR_STATE(state, shadow_path, diffuse_bounce);
+}
+
+ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorState state, const int)
+{
+  return INTEGRATOR_STATE(state, path, glossy_bounce);
+}
+
+ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorShadowState state, const int)
+{
+  return INTEGRATOR_STATE(state, shadow_path, glossy_bounce);
+}
+
+ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorState state, const int)
+{
+  return INTEGRATOR_STATE(state, path, transmission_bounce);
+}
+
+ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorShadowState state,
+                                                           const int)
+{
+  return INTEGRATOR_STATE(state, shadow_path, transmission_bounce);
+}
+
+ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorState state, const int)
+{
+  return INTEGRATOR_STATE(state, path, transparent_bounce);
+}
+
+ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorShadowState state,
+                                                          const int)
+{
+  return INTEGRATOR_STATE(state, shadow_path, transparent_bounce);
+}
+#else
+ccl_device_inline int integrator_state_bounce(ConstIntegratorShadowState state,
+                                              const uint32_t path_flag)
+{
+  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, bounce) :
+                                         INTEGRATOR_STATE(state, path, bounce);
+}
+
+ccl_device_inline int integrator_state_diffuse_bounce(ConstIntegratorShadowState state,
+                                                      const uint32_t path_flag)
+{
+  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, diffuse_bounce) :
+                                         INTEGRATOR_STATE(state, path, diffuse_bounce);
+}
+
+ccl_device_inline int integrator_state_glossy_bounce(ConstIntegratorShadowState state,
+                                                     const uint32_t path_flag)
+{
+  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, glossy_bounce) :
+                                         INTEGRATOR_STATE(state, path, glossy_bounce);
+}
+
+ccl_device_inline int integrator_state_transmission_bounce(ConstIntegratorShadowState state,
+                                                           const uint32_t path_flag)
+{
+  return (path_flag & PATH_RAY_SHADOW) ?
+             INTEGRATOR_STATE(state, shadow_path, transmission_bounce) :
+             INTEGRATOR_STATE(state, path, transmission_bounce);
+}
+
+ccl_device_inline int integrator_state_transparent_bounce(ConstIntegratorShadowState state,
+                                                          const uint32_t path_flag)
+{
+  return (path_flag & PATH_RAY_SHADOW) ? INTEGRATOR_STATE(state, shadow_path, transparent_bounce) :
+                                         INTEGRATOR_STATE(state, path, transparent_bounce);
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
new file mode 100644
index 00000000000..49466112387
--- /dev/null
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/camera/projection.h"
+
+#include "kernel/bvh/bvh.h"
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf_diffuse.h"
+#include "kernel/closure/bsdf_principled_diffuse.h"
+#include "kernel/closure/bssrdf.h"
+#include "kernel/closure/volume.h"
+
+#include "kernel/integrator/intersect_volume_stack.h"
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/subsurface_disk.h"
+#include "kernel/integrator/subsurface_random_walk.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __SUBSURFACE__
+
+ccl_device int subsurface_bounce(KernelGlobals kg,
+                                 IntegratorState state,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private const ShaderClosure *sc)
+{
+  /* We should never have two consecutive BSSRDF bounces, the second one should
+   * be converted to a diffuse BSDF to avoid this. */
+  kernel_assert(!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DIFFUSE_ANCESTOR));
+
+  /* Setup path state for intersect_subsurface kernel. */
+  ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
+
+  /* Setup ray into surface. */
+  INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
+  INTEGRATOR_STATE_WRITE(state, ray, D) = bssrdf->N;
+  INTEGRATOR_STATE_WRITE(state, ray, t) = FLT_MAX;
+  INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
+  INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
+
+  /* Pass along object info, reusing isect to save memory. */
+  INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng;
+  INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
+
+  uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
+                       ((sc->type == CLOSURE_BSSRDF_BURLEY_ID) ? PATH_RAY_SUBSURFACE_DISK :
+                                                                 PATH_RAY_SUBSURFACE_RANDOM_WALK);
+
+  /* Compute weight, optionally including Fresnel from entry point. */
+  float3 weight = shader_bssrdf_sample_weight(sd, sc);
+#  ifdef __PRINCIPLED__
+  if (bssrdf->roughness != FLT_MAX) {
+    path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL;
+  }
+#  endif
+
+  INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight;
+  INTEGRATOR_STATE_WRITE(state, path, flag) = path_flag;
+
+  /* Advance random number offset for bounce. */
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
+
+  if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
+    if (INTEGRATOR_STATE(state, path, bounce) == 0) {
+      INTEGRATOR_STATE_WRITE(state, path, diffuse_glossy_ratio) = one_float3();
+    }
+  }
+
+  /* Pass BSSRDF parameters. */
+  INTEGRATOR_STATE_WRITE(state, subsurface, albedo) = bssrdf->albedo;
+  INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius;
+  INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy;
+
+  return LABEL_SUBSURFACE_SCATTER;
+}
+
+ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
+                                             IntegratorState state,
+                                             ccl_private ShaderData *sd,
+                                             const uint32_t path_flag)
+{
+  /* Get bump mapped normal from shader evaluation at exit point. */
+  float3 N = sd->N;
+  if (sd->flag & SD_HAS_BSSRDF_BUMP) {
+    N = shader_bssrdf_normal(sd);
+  }
+
+  /* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */
+  sd->flag &= ~SD_CLOSURE_FLAGS;
+  sd->num_closure = 0;
+  sd->num_closure_left = kernel_data.max_closures;
+
+  const float3 weight = one_float3();
+
+#  ifdef __PRINCIPLED__
+  if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
+    ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
+        sd, sizeof(PrincipledDiffuseBsdf), weight);
+
+    if (bsdf) {
+      bsdf->N = N;
+      bsdf->roughness = FLT_MAX;
+      sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT);
+    }
+  }
+  else
+#  endif /* __PRINCIPLED__ */
+  {
+    ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+        sd, sizeof(DiffuseBsdf), weight);
+
+    if (bsdf) {
+      bsdf->N = N;
+      sd->flag |= bsdf_diffuse_setup(bsdf);
+    }
+  }
+}
+
+ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState state)
+{
+  RNGState rng_state;
+  path_state_rng_load(state, &rng_state);
+
+  Ray ray ccl_optional_struct_init;
+  LocalIntersection ss_isect ccl_optional_struct_init;
+
+  if (INTEGRATOR_STATE(state, path, flag) & PATH_RAY_SUBSURFACE_RANDOM_WALK) {
+    if (!subsurface_random_walk(kg, state, rng_state, ray, ss_isect)) {
+      return false;
+    }
+  }
+  else {
+    if (!subsurface_disk(kg, state, rng_state, ray, ss_isect)) {
+      return false;
+    }
+  }
+
+#  ifdef __VOLUME__
+  /* Update volume stack if needed. */
+  if (kernel_data.integrator.use_volumes) {
+    const int object = ss_isect.hits[0].object;
+    const int object_flag = kernel_tex_fetch(__object_flag, object);
+
+    if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
+      float3 P = INTEGRATOR_STATE(state, ray, P);
+      const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
+      const float3 offset_P = ray_offset(P, -Ng);
+
+      integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
+    }
+  }
+#  endif /* __VOLUME__ */
+
+  /* Pretend ray is coming from the outside towards the exit point. This ensures
+   * correct front/back facing normals.
+   * TODO: find a more elegant solution? */
+  ray.P += ray.D * ray.t * 2.0f;
+  ray.D = -ray.D;
+
+  integrator_state_write_isect(kg, state, &ss_isect.hits[0]);
+  integrator_state_write_ray(kg, state, &ray);
+
+  /* Advance random number offset for bounce. */
+  INTEGRATOR_STATE_WRITE(state, path, rng_offset) += PRNG_BOUNCE_NUM;
+
+  const int shader = intersection_get_shader(kg, &ss_isect.hits[0]);
+  const int shader_flags = kernel_tex_fetch(__shaders, shader).flags;
+  if (shader_flags & SD_HAS_RAYTRACE) {
+    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
+                                shader);
+  }
+  else {
+    INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+                                DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
+                                shader);
+  }
+
+  return true;
+}
+
+#endif /* __SUBSURFACE__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h
new file mode 100644
index 00000000000..e1cce13fb30
--- /dev/null
+++ b/intern/cycles/kernel/integrator/subsurface_disk.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* BSSRDF using disk based importance sampling.
+ *
+ * BSSRDF Importance Sampling, SIGGRAPH 2013
+ * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
+ */
+
+ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r)
+{
+  const float3 eval = bssrdf_eval(radius, r);
+  const float pdf = bssrdf_pdf(radius, disk_r);
+  return (pdf > 0.0f) ? eval / pdf : zero_float3();
+}
+
+/* Subsurface scattering step, from a point on the surface to other
+ * nearby points on the same object. */
+ccl_device_inline bool subsurface_disk(KernelGlobals kg,
+                                       IntegratorState state,
+                                       RNGState rng_state,
+                                       ccl_private Ray &ray,
+                                       ccl_private LocalIntersection &ss_isect)
+
+{
+  float disk_u, disk_v;
+  path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v);
+
+  /* Read shading point info from integrator state. */
+  const float3 P = INTEGRATOR_STATE(state, ray, P);
+  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
+  const float time = INTEGRATOR_STATE(state, ray, time);
+  const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
+  const int object = INTEGRATOR_STATE(state, isect, object);
+
+  /* Read subsurface scattering parameters. */
+  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+
+  /* Pick random axis in local frame and point on disk. */
+  float3 disk_N, disk_T, disk_B;
+  float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+  disk_N = Ng;
+  make_orthonormals(disk_N, &disk_T, &disk_B);
+
+  if (disk_v < 0.5f) {
+    pick_pdf_N = 0.5f;
+    pick_pdf_T = 0.25f;
+    pick_pdf_B = 0.25f;
+    disk_v *= 2.0f;
+  }
+  else if (disk_v < 0.75f) {
+    float3 tmp = disk_N;
+    disk_N = disk_T;
+    disk_T = tmp;
+    pick_pdf_N = 0.25f;
+    pick_pdf_T = 0.5f;
+    pick_pdf_B = 0.25f;
+    disk_v = (disk_v - 0.5f) * 4.0f;
+  }
+  else {
+    float3 tmp = disk_N;
+    disk_N = disk_B;
+    disk_B = tmp;
+    pick_pdf_N = 0.25f;
+    pick_pdf_T = 0.25f;
+    pick_pdf_B = 0.5f;
+    disk_v = (disk_v - 0.75f) * 4.0f;
+  }
+
+  /* Sample point on disk. */
+  float phi = M_2PI_F * disk_v;
+  float disk_height, disk_r;
+
+  bssrdf_sample(radius, disk_u, &disk_r, &disk_height);
+
+  float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
+
+  /* Create ray. */
+  ray.P = P + disk_N * disk_height + disk_P;
+  ray.D = -disk_N;
+  ray.t = 2.0f * disk_height;
+  ray.dP = ray_dP;
+  ray.dD = differential_zero_compact();
+  ray.time = time;
+
+  /* Intersect with the same object. if multiple intersections are found it
+   * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits. */
+  uint lcg_state = lcg_state_init(
+      rng_state.rng_hash, rng_state.rng_offset, rng_state.sample, 0x68bc21eb);
+  const int max_hits = BSSRDF_MAX_HITS;
+
+  scene_intersect_local(kg, &ray, &ss_isect, object, &lcg_state, max_hits);
+  const int num_eval_hits = min(ss_isect.num_hits, max_hits);
+  if (num_eval_hits == 0) {
+    return false;
+  }
+
+  /* Sort for consistent renders between CPU and GPU, independent of the BVH
+   * traversal algorithm. */
+  sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits);
+
+  float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */
+  float sum_weights = 0.0f;
+
+  for (int hit = 0; hit < num_eval_hits; hit++) {
+    /* Quickly retrieve P and Ng without setting up ShaderData. */
+    const float3 hit_P = ray.P + ray.D * ss_isect.hits[hit].t;
+
+    /* Get geometric normal. */
+    const int object = ss_isect.hits[hit].object;
+    const int object_flag = kernel_tex_fetch(__object_flag, object);
+    float3 hit_Ng = ss_isect.Ng[hit];
+    if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+      hit_Ng = -hit_Ng;
+    }
+
+    if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      Transform itfm;
+      object_fetch_transform_motion_test(kg, object, time, &itfm);
+      hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
+    }
+
+    /* Probability densities for local frame axes. */
+    const float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+    const float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+    const float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
+    /* Multiple importance sample between 3 axes, power heuristic
+     * found to be slightly better than balance heuristic. pdf_N
+     * in the MIS weight and denominator cancelled out. */
+    float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
+    if (ss_isect.num_hits > max_hits) {
+      w *= ss_isect.num_hits / (float)max_hits;
+    }
+
+    /* Real distance to sampled point. */
+    const float r = len(hit_P - P);
+
+    /* Evaluate profiles. */
+    const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w;
+
+    /* Store result. */
+    ss_isect.Ng[hit] = hit_Ng;
+    weights[hit] = weight;
+    sum_weights += average(fabs(weight));
+  }
+
+  if (sum_weights == 0.0f) {
+    return false;
+  }
+
+  /* Use importance resampling, sampling one of the hits proportional to weight. */
+  const float r = lcg_step_float(&lcg_state) * sum_weights;
+  float partial_sum = 0.0f;
+
+  for (int hit = 0; hit < num_eval_hits; hit++) {
+    const float3 weight = weights[hit];
+    const float sample_weight = average(fabs(weight));
+    float next_sum = partial_sum + sample_weight;
+
+    if (r < next_sum) {
+      /* Return exit point. */
+      INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight;
+
+      ss_isect.hits[0] = ss_isect.hits[hit];
+      ss_isect.Ng[0] = ss_isect.Ng[hit];
+
+      ray.P = ray.P + ray.D * ss_isect.hits[hit].t;
+      ray.D = ss_isect.Ng[hit];
+      ray.t = 1.0f;
+      return true;
+    }
+
+    partial_sum = next_sum;
+  }
+
+  return false;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h
new file mode 100644
index 00000000000..f0712758174
--- /dev/null
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -0,0 +1,469 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/camera/projection.h"
+
+#include "kernel/bvh/bvh.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Random walk subsurface scattering.
+ *
+ * "Practical and Controllable Subsurface Scattering for Production Path
+ *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
+
+/* Support for anisotropy from:
+ * "Path Traced Subsurface Scattering using Anisotropic Phase Functions
+ * and Non-Exponential Free Flights".
+ * Magnus Wrenninge, Ryusuke Villemin, Christophe Hery.
+ * https://graphics.pixar.com/library/PathTracedSubsurface/ */
+
+ccl_device void subsurface_random_walk_remap(const float albedo,
+                                             const float d,
+                                             float g,
+                                             ccl_private float *sigma_t,
+                                             ccl_private float *alpha)
+{
+  /* Compute attenuation and scattering coefficients from albedo. */
+  const float g2 = g * g;
+  const float g3 = g2 * g;
+  const float g4 = g3 * g;
+  const float g5 = g4 * g;
+  const float g6 = g5 * g;
+  const float g7 = g6 * g;
+
+  const float A = 1.8260523782f + -1.28451056436f * g + -1.79904629312f * g2 +
+                  9.19393289202f * g3 + -22.8215585862f * g4 + 32.0234874259f * g5 +
+                  -23.6264803333f * g6 + 7.21067002658f * g7;
+  const float B = 4.98511194385f +
+                  0.127355959438f *
+                      expf(31.1491581433f * g + -201.847017512f * g2 + 841.576016723f * g3 +
+                           -2018.09288505f * g4 + 2731.71560286f * g5 + -1935.41424244f * g6 +
+                           559.009054474f * g7);
+  const float C = 1.09686102424f + -0.394704063468f * g + 1.05258115941f * g2 +
+                  -8.83963712726f * g3 + 28.8643230661f * g4 + -46.8802913581f * g5 +
+                  38.5402837518f * g6 + -12.7181042538f * g7;
+  const float D = 0.496310210422f + 0.360146581622f * g + -2.15139309747f * g2 +
+                  17.8896899217f * g3 + -55.2984010333f * g4 + 82.065982243f * g5 +
+                  -58.5106008578f * g6 + 15.8478295021f * g7;
+  const float E = 4.23190299701f +
+                  0.00310603949088f *
+                      expf(76.7316253952f * g + -594.356773233f * g2 + 2448.8834203f * g3 +
+                           -5576.68528998f * g4 + 7116.60171912f * g5 + -4763.54467887f * g6 +
+                           1303.5318055f * g7);
+  const float F = 2.40602999408f + -2.51814844609f * g + 9.18494908356f * g2 +
+                  -79.2191708682f * g3 + 259.082868209f * g4 + -403.613804597f * g5 +
+                  302.85712436f * g6 + -87.4370473567f * g7;
+
+  const float blend = powf(albedo, 0.25f);
+
+  *alpha = (1.0f - blend) * A * powf(atanf(B * albedo), C) +
+           blend * D * powf(atanf(E * albedo), F);
+  *alpha = clamp(*alpha, 0.0f, 0.999999f);  // because of numerical precision
+
+  float sigma_t_prime = 1.0f / fmaxf(d, 1e-16f);
+  *sigma_t = sigma_t_prime / (1.0f - g);
+}
+
+ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
+                                                    const float3 radius,
+                                                    const float anisotropy,
+                                                    ccl_private float3 *sigma_t,
+                                                    ccl_private float3 *alpha,
+                                                    ccl_private float3 *throughput)
+{
+  float sigma_t_x, sigma_t_y, sigma_t_z;
+  float alpha_x, alpha_y, alpha_z;
+
+  subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x);
+  subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y);
+  subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z);
+
+  /* Throughput already contains closure weight at this point, which includes the
+   * albedo, as well as closure mixing and Fresnel weights. Divide out the albedo
+   * which will be added through scattering. */
+  *throughput = safe_divide_color(*throughput, albedo);
+
+  /* With low albedo values (like 0.025) we get diffusion_length 1.0 and
+   * infinite phase functions. To avoid a sharp discontinuity as we go from
+   * such values to 0.0, increase alpha and reduce the throughput to compensate. */
+  const float min_alpha = 0.2f;
+  if (alpha_x < min_alpha) {
+    (*throughput).x *= alpha_x / min_alpha;
+    alpha_x = min_alpha;
+  }
+  if (alpha_y < min_alpha) {
+    (*throughput).y *= alpha_y / min_alpha;
+    alpha_y = min_alpha;
+  }
+  if (alpha_z < min_alpha) {
+    (*throughput).z *= alpha_z / min_alpha;
+    alpha_z = min_alpha;
+  }
+
+  *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
+  *alpha = make_float3(alpha_x, alpha_y, alpha_z);
+}
+
+/* References for Dwivedi sampling:
+ *
+ * [1] "A Zero-variance-based Sampling Scheme for Monte Carlo Subsurface Scattering"
+ * by Jaroslav Křivánek and Eugene d'Eon (SIGGRAPH 2014)
+ * https://cgg.mff.cuni.cz/~jaroslav/papers/2014-zerovar/
+ *
+ * [2] "Improving the Dwivedi Sampling Scheme"
+ * by Johannes Meng, Johannes Hanika, and Carsten Dachsbacher (EGSR 2016)
+ * https://cg.ivd.kit.edu/1951.php
+ *
+ * [3] "Zero-Variance Theory for Efficient Subsurface Scattering"
+ * by Eugene d'Eon and Jaroslav Křivánek (SIGGRAPH 2020)
+ * https://iliyan.com/publications/RenderingCourse2020
+ */
+
+ccl_device_forceinline float eval_phase_dwivedi(float v, float phase_log, float cos_theta)
+{
+  /* Eq. 9 from [2] using precomputed log((v + 1) / (v - 1)) */
+  return 1.0f / ((v - cos_theta) * phase_log);
+}
+
+ccl_device_forceinline float sample_phase_dwivedi(float v, float phase_log, float rand)
+{
+  /* Based on Eq. 10 from [2]: `v - (v + 1) * pow((v - 1) / (v + 1), rand)`
+   * Since we're already pre-computing `phase_log = log((v + 1) / (v - 1))` for the evaluation,
+   * we can implement the power function like this. */
+  return v - (v + 1.0f) * expf(-rand * phase_log);
+}
+
+ccl_device_forceinline float diffusion_length_dwivedi(float alpha)
+{
+  /* Eq. 67 from [3] */
+  return 1.0f / sqrtf(1.0f - powf(alpha, 2.44294f - 0.0215813f * alpha + 0.578637f / alpha));
+}
+
+ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, float randv)
+{
+  float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
+  float phi = M_2PI_F * randv;
+  float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta);
+
+  float3 T, B;
+  make_orthonormals(D, &T, &B);
+  return dir.x * T + dir.y * B + dir.z * D;
+}
+
+ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
+                                                         float t,
+                                                         bool hit,
+                                                         ccl_private float3 *transmittance)
+{
+  float3 T = volume_color_transmittance(sigma_t, t);
+  if (transmittance) {
+    *transmittance = T;
+  }
+  return hit ? T : sigma_t * T;
+}
+
+/* Define the below variable to get the similarity code active,
+ * and the value represents the cutoff level */
+#define SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL 9
+
+ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
+                                              IntegratorState state,
+                                              RNGState rng_state,
+                                              ccl_private Ray &ray,
+                                              ccl_private LocalIntersection &ss_isect)
+{
+  float bssrdf_u, bssrdf_v;
+  path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+
+  const float3 P = INTEGRATOR_STATE(state, ray, P);
+  const float3 N = INTEGRATOR_STATE(state, ray, D);
+  const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
+  const float time = INTEGRATOR_STATE(state, ray, time);
+  const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
+  const int object = INTEGRATOR_STATE(state, isect, object);
+
+  /* Sample diffuse surface scatter into the object. */
+  float3 D;
+  float pdf;
+  sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf);
+  if (dot(-Ng, D) <= 0.0f) {
+    return false;
+  }
+
+  /* Setup ray. */
+  ray.P = ray_offset(P, -Ng);
+  ray.D = D;
+  ray.t = FLT_MAX;
+  ray.time = time;
+  ray.dP = ray_dP;
+  ray.dD = differential_zero_compact();
+
+#ifndef __KERNEL_OPTIX__
+  /* Compute or fetch object transforms. */
+  Transform ob_itfm ccl_optional_struct_init;
+  Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
+#endif
+
+  /* Convert subsurface to volume coefficients.
+   * The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
+  const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
+  const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+  const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy);
+
+  float3 sigma_t, alpha;
+  float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
+  subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
+  float3 sigma_s = sigma_t * alpha;
+
+  /* Theoretically it should be better to use the exact alpha for the channel we're sampling at
+   * each bounce, but in practice there doesn't seem to be a noticeable difference in exchange
+   * for making the code significantly more complex and slower (if direction sampling depends on
+   * the sampled channel, we need to compute its PDF per-channel and consider it for MIS later on).
+   *
+   * Since the strength of the guided sampling increases as alpha gets lower, using a value that
+   * is too low results in fireflies while one that's too high just gives a bit more noise.
+   * Therefore, the code here uses the highest of the three albedos to be safe. */
+  const float diffusion_length = diffusion_length_dwivedi(max3(alpha));
+
+  if (diffusion_length == 1.0f) {
+    /* With specific values of alpha the length might become 1, which in asymptotic makes phase to
+     * be infinite. After first bounce it will cause throughput to be 0. Do early output, avoiding
+     * numerical issues and extra unneeded work. */
+    return false;
+  }
+
+  /* Precompute term for phase sampling. */
+  const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
+
+  /* Modify state for RNGs, decorrelated from other paths. */
+  rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
+
+  /* Random walk until we hit the surface again. */
+  bool hit = false;
+  bool have_opposite_interface = false;
+  float opposite_distance = 0.0f;
+
+  /* TODO: Disable for `alpha > 0.999` or so? */
+  /* Our heuristic, a compromise between guiding and classic. */
+  const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f));
+
+#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
+  float3 sigma_s_star = sigma_s * (1.0f - anisotropy);
+  float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star;
+  float3 sigma_t_org = sigma_t;
+  float3 sigma_s_org = sigma_s;
+  const float anisotropy_org = anisotropy;
+  const float guided_fraction_org = guided_fraction;
+#endif
+
+  for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
+    /* Advance random number offset. */
+    rng_state.rng_offset += PRNG_BOUNCE_NUM;
+
+#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
+    // shadow with local variables according to depth
+    float anisotropy, guided_fraction;
+    float3 sigma_s, sigma_t;
+    if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) {
+      anisotropy = anisotropy_org;
+      guided_fraction = guided_fraction_org;
+      sigma_t = sigma_t_org;
+      sigma_s = sigma_s_org;
+    }
+    else {
+      anisotropy = 0.0f;
+      guided_fraction = 0.75f;  // back to isotropic heuristic from Blender
+      sigma_t = sigma_t_star;
+      sigma_s = sigma_s_star;
+    }
+#endif
+
+    /* Sample color channel, use MIS with balance heuristic. */
+    float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL);
+    float3 channel_pdf;
+    int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf);
+    float sample_sigma_t = volume_channel_get(sigma_t, channel);
+    float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE);
+
+    /* We need the result of the ray-cast to compute the full guided PDF, so just remember the
+     * relevant terms to avoid recomputing them later. */
+    float backward_fraction = 0.0f;
+    float forward_pdf_factor = 0.0f;
+    float forward_stretching = 1.0f;
+    float backward_pdf_factor = 0.0f;
+    float backward_stretching = 1.0f;
+
+    /* For the initial ray, we already know the direction, so just do classic distance sampling. */
+    if (bounce > 0) {
+      /* Decide whether we should use guided or classic sampling. */
+      bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction);
+
+      /* Determine if we want to sample away from the incoming interface.
+       * This only happens if we found a nearby opposite interface, and the probability for it
+       * depends on how close we are to it already.
+       * This probability term comes from the recorded presentation of [3]. */
+      bool guide_backward = false;
+      if (have_opposite_interface) {
+        /* Compute distance of the random walk between the tangent plane at the starting point
+         * and the assumed opposite interface (the parallel plane that contains the point we
+         * found in our ray query for the opposite side). */
+        float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance);
+        backward_fraction = 1.0f /
+                            (1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length));
+        guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction;
+      }
+
+      /* Sample scattering direction. */
+      float scatter_u, scatter_v;
+      path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v);
+      float cos_theta;
+      float hg_pdf;
+      if (guided) {
+        cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u);
+        /* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the
+         * sign here is enough to sample from that instead. */
+        if (guide_backward) {
+          cos_theta = -cos_theta;
+        }
+        float3 newD = direction_from_cosine(N, cos_theta, scatter_v);
+        hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy);
+        ray.D = newD;
+      }
+      else {
+        float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf);
+        cos_theta = dot(newD, N);
+        ray.D = newD;
+      }
+
+      /* Compute PDF factor caused by phase sampling (as the ratio of guided / classic).
+       * Since phase sampling is channel-independent, we can get away with applying a factor
+       * to the guided PDF, which implicitly means pulling out the classic PDF term and letting
+       * it cancel with an equivalent term in the numerator of the full estimator.
+       * For the backward PDF, we again reuse the same probability distribution with a sign swap.
+       */
+      forward_pdf_factor = M_1_2PI_F * eval_phase_dwivedi(diffusion_length, phase_log, cos_theta) /
+                           hg_pdf;
+      backward_pdf_factor = M_1_2PI_F *
+                            eval_phase_dwivedi(diffusion_length, phase_log, -cos_theta) / hg_pdf;
+
+      /* Prepare distance sampling.
+       * For the backwards case, this also needs the sign swapped since now directions against
+       * `sd->N` (and therefore with negative cos_theta) are preferred. */
+      forward_stretching = (1.0f - cos_theta / diffusion_length);
+      backward_stretching = (1.0f + cos_theta / diffusion_length);
+      if (guided) {
+        sample_sigma_t *= guide_backward ? backward_stretching : forward_stretching;
+      }
+    }
+
+    /* Sample direction along ray. */
+    float t = -logf(1.0f - randt) / sample_sigma_t;
+
+    /* On the first bounce, we use the ray-cast to check if the opposite side is nearby.
+     * If yes, we will later use backwards guided sampling in order to have a decent
+     * chance of connecting to it.
+     * TODO: Maybe use less than 10 times the mean free path? */
+    ray.t = (bounce == 0) ? max(t, 10.0f / (min3(sigma_t))) : t;
+    scene_intersect_local(kg, &ray, &ss_isect, object, NULL, 1);
+    hit = (ss_isect.num_hits > 0);
+
+    if (hit) {
+#ifdef __KERNEL_OPTIX__
+      /* t is always in world space with OptiX. */
+      ray.t = ss_isect.hits[0].t;
+#else
+      /* Compute world space distance to surface hit. */
+      float3 D = transform_direction(&ob_itfm, ray.D);
+      D = normalize(D) * ss_isect.hits[0].t;
+      ray.t = len(transform_direction(&ob_tfm, D));
+#endif
+    }
+
+    if (bounce == 0) {
+      /* Check if we hit the opposite side. */
+      if (hit) {
+        have_opposite_interface = true;
+        opposite_distance = dot(ray.P + ray.t * ray.D - P, -N);
+      }
+      /* Apart from the opposite side check, we were supposed to only trace up to distance t,
+       * so check if there would have been a hit in that case. */
+      hit = ray.t < t;
+    }
+
+    /* Use the distance to the exit point for the throughput update if we found one. */
+    if (hit) {
+      t = ray.t;
+    }
+    else if (bounce == 0) {
+      /* Restore original position if nothing was hit after the first bounce,
+       * without the ray_offset() that was added to avoid self-intersection.
+       * Otherwise if that offset is relatively large compared to the scattering
+       * radius, we never go back up high enough to exit the surface. */
+      ray.P = P;
+    }
+
+    /* Advance to new scatter location. */
+    ray.P += t * ray.D;
+
+    float3 transmittance;
+    float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
+    if (bounce > 0) {
+      /* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */
+      float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
+
+      if (have_opposite_interface) {
+        /* First step of MIS: Depending on geometry we might have two methods for guided
+         * sampling, so perform MIS between them. */
+        float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL);
+        guided_pdf = mix(
+            guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction);
+      }
+      else {
+        /* Just include phase sampling factor otherwise. */
+        guided_pdf *= forward_pdf_factor;
+      }
+
+      /* Now we apply the MIS balance heuristic between the classic and guided sampling. */
+      pdf = mix(pdf, guided_pdf, guided_fraction);
+    }
+
+    /* Finally, we're applying MIS again to combine the three color channels.
+     * Altogether, the MIS computation combines up to nine different estimators:
+     * {classic, guided, backward_guided} x {r, g, b} */
+    throughput *= (hit ? transmittance : sigma_s * transmittance) / dot(channel_pdf, pdf);
+
+    if (hit) {
+      /* If we hit the surface, we are done. */
+      break;
+    }
+    else if (throughput.x < VOLUME_THROUGHPUT_EPSILON &&
+             throughput.y < VOLUME_THROUGHPUT_EPSILON &&
+             throughput.z < VOLUME_THROUGHPUT_EPSILON) {
+      /* Avoid unnecessary work and precision issue when throughput gets really small. */
+      break;
+    }
+  }
+
+  if (hit) {
+    kernel_assert(isfinite3_safe(throughput));
+    INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
+  }
+
+  return hit;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h
new file mode 100644
index 00000000000..cf69826ffff
--- /dev/null
+++ b/intern/cycles/kernel/integrator/volume_stack.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Volume Stack
+ *
+ * This is an array of object/shared ID's that the current segment of the path
+ * is inside of. */
+
+template<typename StackReadOp, typename StackWriteOp>
+ccl_device void volume_stack_enter_exit(KernelGlobals kg,
+                                        ccl_private const ShaderData *sd,
+                                        StackReadOp stack_read,
+                                        StackWriteOp stack_write)
+{
+  /* todo: we should have some way for objects to indicate if they want the
+   * world shader to work inside them. excluding it by default is problematic
+   * because non-volume objects can't be assumed to be closed manifolds */
+  if (!(sd->flag & SD_HAS_VOLUME)) {
+    return;
+  }
+
+  if (sd->flag & SD_BACKFACING) {
+    /* Exit volume object: remove from stack. */
+    for (int i = 0;; i++) {
+      VolumeStack entry = stack_read(i);
+      if (entry.shader == SHADER_NONE) {
+        break;
+      }
+
+      if (entry.object == sd->object) {
+        /* Shift back next stack entries. */
+        do {
+          entry = stack_read(i + 1);
+          stack_write(i, entry);
+          i++;
+        } while (entry.shader != SHADER_NONE);
+
+        return;
+      }
+    }
+  }
+  else {
+    /* Enter volume object: add to stack. */
+    int i;
+    for (i = 0;; i++) {
+      VolumeStack entry = stack_read(i);
+      if (entry.shader == SHADER_NONE) {
+        break;
+      }
+
+      /* Already in the stack? then we have nothing to do. */
+      if (entry.object == sd->object) {
+        return;
+      }
+    }
+
+    /* If we exceed the stack limit, ignore. */
+    if (i >= kernel_data.volume_stack_size - 1) {
+      return;
+    }
+
+    /* Add to the end of the stack. */
+    const VolumeStack new_entry = {sd->object, sd->shader};
+    const VolumeStack empty_entry = {OBJECT_NONE, SHADER_NONE};
+    stack_write(i, new_entry);
+    stack_write(i + 1, empty_entry);
+  }
+}
+
+ccl_device void volume_stack_enter_exit(KernelGlobals kg,
+                                        IntegratorState state,
+                                        ccl_private const ShaderData *sd)
+{
+  volume_stack_enter_exit(
+      kg,
+      sd,
+      [=](const int i) { return integrator_state_read_volume_stack(state, i); },
+      [=](const int i, const VolumeStack entry) {
+        integrator_state_write_volume_stack(state, i, entry);
+      });
+}
+
+ccl_device void shadow_volume_stack_enter_exit(KernelGlobals kg,
+                                               IntegratorShadowState state,
+                                               ccl_private const ShaderData *sd)
+{
+  volume_stack_enter_exit(
+      kg,
+      sd,
+      [=](const int i) { return integrator_state_read_shadow_volume_stack(state, i); },
+      [=](const int i, const VolumeStack entry) {
+        integrator_state_write_shadow_volume_stack(state, i, entry);
+      });
+}
+
+/* Clean stack after the last bounce.
+ *
+ * It is expected that all volumes are closed manifolds, so at the time when ray
+ * hits nothing (for example, it is a last bounce which goes to environment) the
+ * only expected volume in the stack is the world's one. All the rest volume
+ * entries should have been exited already.
+ *
+ * This isn't always true because of ray intersection precision issues, which
+ * could lead us to an infinite non-world volume in the stack, causing render
+ * artifacts.
+ *
+ * Use this function after the last bounce to get rid of all volumes apart from
+ * the world's one after the last bounce to avoid render artifacts.
+ */
+ccl_device_inline void volume_stack_clean(KernelGlobals kg, IntegratorState state)
+{
+  if (kernel_data.background.volume_shader != SHADER_NONE) {
+    /* Keep the world's volume in stack. */
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 1, shader) = SHADER_NONE;
+  }
+  else {
+    INTEGRATOR_STATE_ARRAY_WRITE(state, volume_stack, 0, shader) = SHADER_NONE;
+  }
+}
+
+template<typename StackReadOp>
+ccl_device float volume_stack_step_size(KernelGlobals kg, StackReadOp stack_read)
+{
+  float step_size = FLT_MAX;
+
+  for (int i = 0;; i++) {
+    VolumeStack entry = stack_read(i);
+    if (entry.shader == SHADER_NONE) {
+      break;
+    }
+
+    int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
+
+    bool heterogeneous = false;
+
+    if (shader_flag & SD_HETEROGENEOUS_VOLUME) {
+      heterogeneous = true;
+    }
+    else if (shader_flag & SD_NEED_VOLUME_ATTRIBUTES) {
+      /* We want to render world or objects without any volume grids
+       * as homogeneous, but can only verify this at run-time since other
+       * heterogeneous volume objects may be using the same shader. */
+      int object = entry.object;
+      if (object != OBJECT_NONE) {
+        int object_flag = kernel_tex_fetch(__object_flag, object);
+        if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) {
+          heterogeneous = true;
+        }
+      }
+    }
+
+    if (heterogeneous) {
+      float object_step_size = object_volume_step_size(kg, entry.object);
+      object_step_size *= kernel_data.integrator.volume_step_rate;
+      step_size = fminf(object_step_size, step_size);
+    }
+  }
+
+  return step_size;
+}
+
+typedef enum VolumeSampleMethod {
+  VOLUME_SAMPLE_NONE = 0,
+  VOLUME_SAMPLE_DISTANCE = (1 << 0),
+  VOLUME_SAMPLE_EQUIANGULAR = (1 << 1),
+  VOLUME_SAMPLE_MIS = (VOLUME_SAMPLE_DISTANCE | VOLUME_SAMPLE_EQUIANGULAR),
+} VolumeSampleMethod;
+
+ccl_device VolumeSampleMethod volume_stack_sample_method(KernelGlobals kg, IntegratorState state)
+{
+  VolumeSampleMethod method = VOLUME_SAMPLE_NONE;
+
+  for (int i = 0;; i++) {
+    VolumeStack entry = integrator_state_read_volume_stack(state, i);
+    if (entry.shader == SHADER_NONE) {
+      break;
+    }
+
+    int shader_flag = kernel_tex_fetch(__shaders, (entry.shader & SHADER_MASK)).flags;
+
+    if (shader_flag & SD_VOLUME_MIS) {
+      /* Multiple importance sampling. */
+      return VOLUME_SAMPLE_MIS;
+    }
+    else if (shader_flag & SD_VOLUME_EQUIANGULAR) {
+      /* Distance + equiangular sampling -> multiple importance sampling. */
+      if (method == VOLUME_SAMPLE_DISTANCE) {
+        return VOLUME_SAMPLE_MIS;
+      }
+
+      /* Only equiangular sampling. */
+      method = VOLUME_SAMPLE_EQUIANGULAR;
+    }
+    else {
+      /* Distance + equiangular sampling -> multiple importance sampling. */
+      if (method == VOLUME_SAMPLE_EQUIANGULAR) {
+        return VOLUME_SAMPLE_MIS;
+      }
+
+      /* Distance sampling only. */
+      method = VOLUME_SAMPLE_DISTANCE;
+    }
+  }
+
+  return method;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
deleted file mode 100644
index 464ecb183cb..00000000000
--- a/intern/cycles/kernel/kernel_textures.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef KERNEL_TEX
-#  define KERNEL_TEX(type, name)
-#endif
-
-/* BVH2, not used for OptiX or Embree. */
-KERNEL_TEX(float4, __bvh_nodes)
-KERNEL_TEX(float4, __bvh_leaf_nodes)
-KERNEL_TEX(uint, __prim_type)
-KERNEL_TEX(uint, __prim_visibility)
-KERNEL_TEX(uint, __prim_index)
-KERNEL_TEX(uint, __prim_object)
-KERNEL_TEX(uint, __object_node)
-KERNEL_TEX(float2, __prim_time)
-
-/* objects */
-KERNEL_TEX(KernelObject, __objects)
-KERNEL_TEX(Transform, __object_motion_pass)
-KERNEL_TEX(DecomposedTransform, __object_motion)
-KERNEL_TEX(uint, __object_flag)
-KERNEL_TEX(float, __object_volume_step)
-
-/* cameras */
-KERNEL_TEX(DecomposedTransform, __camera_motion)
-
-/* triangles */
-KERNEL_TEX(uint, __tri_shader)
-KERNEL_TEX(float4, __tri_vnormal)
-KERNEL_TEX(uint4, __tri_vindex)
-KERNEL_TEX(uint, __tri_patch)
-KERNEL_TEX(float2, __tri_patch_uv)
-KERNEL_TEX(float4, __tri_verts)
-
-/* curves */
-KERNEL_TEX(KernelCurve, __curves)
-KERNEL_TEX(float4, __curve_keys)
-KERNEL_TEX(KernelCurveSegment, __curve_segments)
-
-/* patches */
-KERNEL_TEX(uint, __patches)
-
-/* attributes */
-KERNEL_TEX(uint4, __attributes_map)
-KERNEL_TEX(float, __attributes_float)
-KERNEL_TEX(float2, __attributes_float2)
-KERNEL_TEX(float4, __attributes_float3)
-KERNEL_TEX(uchar4, __attributes_uchar4)
-
-/* lights */
-KERNEL_TEX(KernelLightDistribution, __light_distribution)
-KERNEL_TEX(KernelLight, __lights)
-KERNEL_TEX(float2, __light_background_marginal_cdf)
-KERNEL_TEX(float2, __light_background_conditional_cdf)
-
-/* particles */
-KERNEL_TEX(KernelParticle, __particles)
-
-/* shaders */
-KERNEL_TEX(uint4, __svm_nodes)
-KERNEL_TEX(KernelShader, __shaders)
-
-/* lookup tables */
-KERNEL_TEX(float, __lookup_table)
-
-/* sobol */
-KERNEL_TEX(float, __sample_pattern_lut)
-
-/* image textures */
-KERNEL_TEX(TextureInfo, __texture_info)
-
-/* ies lights */
-KERNEL_TEX(float, __ies)
-
-#undef KERNEL_TEX
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
deleted file mode 100644
index 4312c1b67d2..00000000000
--- a/intern/cycles/kernel/kernel_types.h
+++ /dev/null
@@ -1,1608 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
-#  include <embree3/rtcore.h>
-#  include <embree3/rtcore_scene.h>
-#  define __EMBREE__
-#endif
-
-#include "util/util_math.h"
-#include "util/util_math_fast.h"
-#include "util/util_math_intersect.h"
-#include "util/util_projection.h"
-#include "util/util_texture.h"
-#include "util/util_transform.h"
-#include "util/util_static_assert.h"
-
-#include "kernel/svm/svm_types.h"
-
-#ifndef __KERNEL_GPU__
-#  define __KERNEL_CPU__
-#endif
-
-/* TODO(sergey): This is only to make it possible to include this header
- * from outside of the kernel. but this could be done somewhat cleaner?
- */
-#ifndef ccl_addr_space
-#  define ccl_addr_space
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Constants */
-#define OBJECT_MOTION_PASS_SIZE 2
-#define FILTER_TABLE_SIZE 1024
-#define RAMP_TABLE_SIZE 256
-#define SHUTTER_TABLE_SIZE 256
-
-#define BSSRDF_MIN_RADIUS 1e-8f
-#define BSSRDF_MAX_HITS 4
-#define BSSRDF_MAX_BOUNCES 256
-#define LOCAL_MAX_HITS 4
-
-#define VOLUME_BOUNDS_MAX 1024
-
-#define BECKMANN_TABLE_SIZE 256
-
-#define SHADER_NONE (~0)
-#define OBJECT_NONE (~0)
-#define PRIM_NONE (~0)
-#define LAMP_NONE (~0)
-#define ID_NONE (0.0f)
-#define PASS_UNUSED (~0)
-
-#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
-#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
-
-#ifdef __KERNEL_CPU__
-#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
-#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
-#endif
-
-/* Kernel features */
-#define __SOBOL__
-#define __DPDU__
-#define __BACKGROUND__
-#define __CAUSTICS_TRICKS__
-#define __VISIBILITY_FLAG__
-#define __RAY_DIFFERENTIALS__
-#define __CAMERA_CLIPPING__
-#define __INTERSECTION_REFINE__
-#define __CLAMP_SAMPLE__
-#define __PATCH_EVAL__
-#define __SHADOW_CATCHER__
-#define __DENOISING_FEATURES__
-#define __SHADER_RAYTRACE__
-#define __AO__
-#define __PASSES__
-#define __HAIR__
-#define __SVM__
-#define __EMISSION__
-#define __HOLDOUT__
-#define __TRANSPARENT_SHADOWS__
-#define __BACKGROUND_MIS__
-#define __LAMP_MIS__
-#define __CAMERA_MOTION__
-#define __OBJECT_MOTION__
-#define __BAKING__
-#define __PRINCIPLED__
-#define __SUBSURFACE__
-#define __VOLUME__
-#define __CMJ__
-#define __SHADOW_RECORD_ALL__
-#define __BRANCHED_PATH__
-
-/* Device specific features */
-#ifdef __KERNEL_CPU__
-#  ifdef WITH_OSL
-#    define __OSL__
-#  endif
-#  define __VOLUME_RECORD_ALL__
-#endif /* __KERNEL_CPU__ */
-
-#ifdef __KERNEL_OPTIX__
-#  undef __BAKING__
-#endif /* __KERNEL_OPTIX__ */
-
-/* Scene-based selective features compilation. */
-#ifdef __KERNEL_FEATURES__
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION)
-#    undef __CAMERA_MOTION__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_OBJECT_MOTION)
-#    undef __OBJECT_MOTION__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_HAIR)
-#    undef __HAIR__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_VOLUME)
-#    undef __VOLUME__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE)
-#    undef __SUBSURFACE__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING)
-#    undef __BAKING__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION)
-#    undef __PATCH_EVAL__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_TRANSPARENT)
-#    undef __TRANSPARENT_SHADOWS__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_SHADOW_CATCHER)
-#    undef __SHADOW_CATCHER__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_PRINCIPLED)
-#    undef __PRINCIPLED__
-#  endif
-#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_DENOISING)
-#    undef __DENOISING_FEATURES__
-#  endif
-#endif
-
-#ifdef WITH_CYCLES_DEBUG_NAN
-#  define __KERNEL_DEBUG_NAN__
-#endif
-
-/* Features that enable others */
-
-#if defined(__SUBSURFACE__) || defined(__SHADER_RAYTRACE__)
-#  define __BVH_LOCAL__
-#endif
-
-/* Path Tracing
- * note we need to keep the u/v pairs at even values */
-
-enum PathTraceDimension {
-  PRNG_FILTER_U = 0,
-  PRNG_FILTER_V = 1,
-  PRNG_LENS_U = 2,
-  PRNG_LENS_V = 3,
-  PRNG_TIME = 4,
-  PRNG_UNUSED_0 = 5,
-  PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
-  PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
-  PRNG_BASE_NUM = 10,
-
-  PRNG_BSDF_U = 0,
-  PRNG_BSDF_V = 1,
-  PRNG_LIGHT_U = 2,
-  PRNG_LIGHT_V = 3,
-  PRNG_LIGHT_TERMINATE = 4,
-  PRNG_TERMINATE = 5,
-  PRNG_PHASE_CHANNEL = 6,
-  PRNG_SCATTER_DISTANCE = 7,
-  PRNG_BOUNCE_NUM = 8,
-
-  PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
-  PRNG_BEVEL_V = 7,
-};
-
-enum SamplingPattern {
-  SAMPLING_PATTERN_SOBOL = 0,
-  SAMPLING_PATTERN_PMJ = 1,
-
-  SAMPLING_NUM_PATTERNS,
-};
-
-/* These flags values correspond to `raytypes` in `osl.cpp`, so keep them in sync! */
-
-enum PathRayFlag {
-  /* --------------------------------------------------------------------
-   * Ray visibility.
-   *
-   * NOTE: Recalculated after a surface bounce.
-   */
-
-  PATH_RAY_CAMERA = (1U << 0U),
-  PATH_RAY_REFLECT = (1U << 1U),
-  PATH_RAY_TRANSMIT = (1U << 2U),
-  PATH_RAY_DIFFUSE = (1U << 3U),
-  PATH_RAY_GLOSSY = (1U << 4U),
-  PATH_RAY_SINGULAR = (1U << 5U),
-  PATH_RAY_TRANSPARENT = (1U << 6U),
-  PATH_RAY_VOLUME_SCATTER = (1U << 7U),
-
-  /* Shadow ray visibility. */
-  PATH_RAY_SHADOW_OPAQUE = (1U << 8U),
-  PATH_RAY_SHADOW_TRANSPARENT = (1U << 9U),
-  PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),
-
-  /* Special flag to tag unaligned BVH nodes.
-   * Only set and used in BVH nodes to distinguish how to interpret bounding box information stored
-   * in the node (either it should be intersected as AABB or as OBBU). */
-  PATH_RAY_NODE_UNALIGNED = (1U << 10U),
-
-  /* Subset of flags used for ray visibility for intersection.
-   *
-   * NOTE: SHADOW_CATCHER macros below assume there are no more than
-   * 16 visibility bits. */
-  PATH_RAY_ALL_VISIBILITY = ((1U << 11U) - 1U),
-
-  /* --------------------------------------------------------------------
-   * Path flags.
-   */
-
-  /* Don't apply multiple importance sampling weights to emission from
-   * lamp or surface hits, because they were not direct light sampled. */
-  PATH_RAY_MIS_SKIP = (1U << 11U),
-
-  /* Diffuse bounce earlier in the path, skip SSS to improve performance
-   * and avoid branching twice with disk sampling SSS. */
-  PATH_RAY_DIFFUSE_ANCESTOR = (1U << 12U),
-
-  /* Single pass has been written. */
-  PATH_RAY_SINGLE_PASS_DONE = (1U << 13U),
-
-  /* Zero background alpha, for camera or transparent glass rays. */
-  PATH_RAY_TRANSPARENT_BACKGROUND = (1U << 14U),
-
-  /* Terminate ray immediately at next bounce. */
-  PATH_RAY_TERMINATE_ON_NEXT_SURFACE = (1U << 15U),
-  PATH_RAY_TERMINATE_IN_NEXT_VOLUME = (1U << 16U),
-
-  /* Ray is to be terminated, but continue with transparent bounces and
-   * emission as long as we encounter them. This is required to make the
-   * MIS between direct and indirect light rays match, as shadow rays go
-   * through transparent surfaces to reach emission too. */
-  PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1U << 17U),
-
-  /* Terminate ray immediately after volume shading. */
-  PATH_RAY_TERMINATE_AFTER_VOLUME = (1U << 18U),
-
-  /* Ray is to be terminated. */
-  PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_ON_NEXT_SURFACE | PATH_RAY_TERMINATE_IN_NEXT_VOLUME |
-                        PATH_RAY_TERMINATE_AFTER_TRANSPARENT | PATH_RAY_TERMINATE_AFTER_VOLUME),
-
-  /* Path and shader is being evaluated for direct lighting emission. */
-  PATH_RAY_EMISSION = (1U << 19U),
-
-  /* Perform subsurface scattering. */
-  PATH_RAY_SUBSURFACE_RANDOM_WALK = (1U << 20U),
-  PATH_RAY_SUBSURFACE_DISK = (1U << 21U),
-  PATH_RAY_SUBSURFACE_USE_FRESNEL = (1U << 22U),
-  PATH_RAY_SUBSURFACE = (PATH_RAY_SUBSURFACE_RANDOM_WALK | PATH_RAY_SUBSURFACE_DISK |
-                         PATH_RAY_SUBSURFACE_USE_FRESNEL),
-
-  /* Contribute to denoising features. */
-  PATH_RAY_DENOISING_FEATURES = (1U << 23U),
-
-  /* Render pass categories. */
-  PATH_RAY_REFLECT_PASS = (1U << 24U),
-  PATH_RAY_TRANSMISSION_PASS = (1U << 25U),
-  PATH_RAY_VOLUME_PASS = (1U << 26U),
-  PATH_RAY_ANY_PASS = (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS | PATH_RAY_VOLUME_PASS),
-
-  /* Shadow ray is for a light or surface, or AO. */
-  PATH_RAY_SHADOW_FOR_LIGHT = (1U << 27U),
-  PATH_RAY_SHADOW_FOR_AO = (1U << 28U),
-
-  /* A shadow catcher object was hit and the path was split into two. */
-  PATH_RAY_SHADOW_CATCHER_HIT = (1U << 29U),
-
-  /* A shadow catcher object was hit and this path traces only shadow catchers, writing them into
-   * their dedicated pass for later division.
-   *
-   * NOTE: Is not covered with `PATH_RAY_ANY_PASS` because shadow catcher does special handling
-   * which is separate from the light passes. */
-  PATH_RAY_SHADOW_CATCHER_PASS = (1U << 30U),
-
-  /* Path is evaluating background for an approximate shadow catcher with non-transparent film. */
-  PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 31U),
-};
-
-/* Configure ray visibility bits for rays and objects respectively,
- * to make shadow catchers work.
- *
- * On shadow catcher paths we want to ignore any intersections with non-catchers,
- * whereas on regular paths we want to intersect all objects. */
-
-#define SHADOW_CATCHER_VISIBILITY_SHIFT(visibility) ((visibility) << 16)
-
-#define SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility) \
-  (((path_flag)&PATH_RAY_SHADOW_CATCHER_PASS) ? SHADOW_CATCHER_VISIBILITY_SHIFT(visibility) : \
-                                                (visibility))
-
-#define SHADOW_CATCHER_OBJECT_VISIBILITY(is_shadow_catcher, visibility) \
-  (((is_shadow_catcher) ? SHADOW_CATCHER_VISIBILITY_SHIFT(visibility) : 0) | (visibility))
-
-/* Closure Label */
-
-typedef enum ClosureLabel {
-  LABEL_NONE = 0,
-  LABEL_TRANSMIT = 1,
-  LABEL_REFLECT = 2,
-  LABEL_DIFFUSE = 4,
-  LABEL_GLOSSY = 8,
-  LABEL_SINGULAR = 16,
-  LABEL_TRANSPARENT = 32,
-  LABEL_VOLUME_SCATTER = 64,
-  LABEL_TRANSMIT_TRANSPARENT = 128,
-  LABEL_SUBSURFACE_SCATTER = 256,
-} ClosureLabel;
-
-/* Render Passes */
-
-#define PASS_NAME_JOIN(a, b) a##_##b
-#define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32))
-
-// NOTE: Keep in sync with `Pass::get_type_enum()`.
-typedef enum PassType {
-  PASS_NONE = 0,
-
-  /* Light Passes */
-  PASS_COMBINED = 1,
-  PASS_EMISSION,
-  PASS_BACKGROUND,
-  PASS_AO,
-  PASS_SHADOW,
-  PASS_DIFFUSE,
-  PASS_DIFFUSE_DIRECT,
-  PASS_DIFFUSE_INDIRECT,
-  PASS_GLOSSY,
-  PASS_GLOSSY_DIRECT,
-  PASS_GLOSSY_INDIRECT,
-  PASS_TRANSMISSION,
-  PASS_TRANSMISSION_DIRECT,
-  PASS_TRANSMISSION_INDIRECT,
-  PASS_VOLUME,
-  PASS_VOLUME_DIRECT,
-  PASS_VOLUME_INDIRECT,
-  PASS_CATEGORY_LIGHT_END = 31,
-
-  /* Data passes */
-  PASS_DEPTH = 32,
-  PASS_POSITION,
-  PASS_NORMAL,
-  PASS_ROUGHNESS,
-  PASS_UV,
-  PASS_OBJECT_ID,
-  PASS_MATERIAL_ID,
-  PASS_MOTION,
-  PASS_MOTION_WEIGHT,
-  PASS_CRYPTOMATTE,
-  PASS_AOV_COLOR,
-  PASS_AOV_VALUE,
-  PASS_ADAPTIVE_AUX_BUFFER,
-  PASS_SAMPLE_COUNT,
-  PASS_DIFFUSE_COLOR,
-  PASS_GLOSSY_COLOR,
-  PASS_TRANSMISSION_COLOR,
-  /* No Scatter color since it's tricky to define what it would even mean. */
-  PASS_MIST,
-  PASS_DENOISING_NORMAL,
-  PASS_DENOISING_ALBEDO,
-  PASS_DENOISING_DEPTH,
-
-  /* PASS_SHADOW_CATCHER accumulates contribution of shadow catcher object which is not affected by
-   * any other object. The pass accessor will divide the combined pass by the shadow catcher. The
-   * result of this division is then to be multiplied with the backdrop. The alpha channel of this
-   * pass contains number of samples which contributed to the color components of the pass.
-   *
-   * PASS_SHADOW_CATCHER_SAMPLE_COUNT contains number of samples for which the path split
-   * happened.
-   *
-   * PASS_SHADOW_CATCHER_MATTE contains pass which contains non-catcher objects. This pass is to be
-   * alpha-overed onto the backdrop (after multiplication). */
-  PASS_SHADOW_CATCHER,
-  PASS_SHADOW_CATCHER_SAMPLE_COUNT,
-  PASS_SHADOW_CATCHER_MATTE,
-
-  PASS_CATEGORY_DATA_END = 63,
-
-  PASS_BAKE_PRIMITIVE,
-  PASS_BAKE_DIFFERENTIAL,
-  PASS_CATEGORY_BAKE_END = 95,
-
-  PASS_NUM,
-} PassType;
-
-#define PASS_ANY (~0)
-
-typedef enum CryptomatteType {
-  CRYPT_NONE = 0,
-  CRYPT_OBJECT = (1 << 0),
-  CRYPT_MATERIAL = (1 << 1),
-  CRYPT_ASSET = (1 << 2),
-  CRYPT_ACCURATE = (1 << 3),
-} CryptomatteType;
-
-typedef struct BsdfEval {
-  float3 diffuse;
-  float3 glossy;
-} BsdfEval;
-
-/* Shader Flag */
-
-typedef enum ShaderFlag {
-  SHADER_SMOOTH_NORMAL = (1 << 31),
-  SHADER_CAST_SHADOW = (1 << 30),
-  SHADER_AREA_LIGHT = (1 << 29),
-  SHADER_USE_MIS = (1 << 28),
-  SHADER_EXCLUDE_DIFFUSE = (1 << 27),
-  SHADER_EXCLUDE_GLOSSY = (1 << 26),
-  SHADER_EXCLUDE_TRANSMIT = (1 << 25),
-  SHADER_EXCLUDE_CAMERA = (1 << 24),
-  SHADER_EXCLUDE_SCATTER = (1 << 23),
-  SHADER_EXCLUDE_SHADOW_CATCHER = (1 << 22),
-  SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE | SHADER_EXCLUDE_GLOSSY | SHADER_EXCLUDE_TRANSMIT |
-                        SHADER_EXCLUDE_CAMERA | SHADER_EXCLUDE_SCATTER |
-                        SHADER_EXCLUDE_SHADOW_CATCHER),
-
-  SHADER_MASK = ~(SHADER_SMOOTH_NORMAL | SHADER_CAST_SHADOW | SHADER_AREA_LIGHT | SHADER_USE_MIS |
-                  SHADER_EXCLUDE_ANY)
-} ShaderFlag;
-
-/* Light Type */
-
-typedef enum LightType {
-  LIGHT_POINT,
-  LIGHT_DISTANT,
-  LIGHT_BACKGROUND,
-  LIGHT_AREA,
-  LIGHT_SPOT,
-  LIGHT_TRIANGLE
-} LightType;
-
-/* Camera Type */
-
-enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA };
-
-/* Panorama Type */
-
-enum PanoramaType {
-  PANORAMA_EQUIRECTANGULAR = 0,
-  PANORAMA_FISHEYE_EQUIDISTANT = 1,
-  PANORAMA_FISHEYE_EQUISOLID = 2,
-  PANORAMA_MIRRORBALL = 3,
-
-  PANORAMA_NUM_TYPES,
-};
-
-/* Differential */
-
-typedef struct differential3 {
-  float3 dx;
-  float3 dy;
-} differential3;
-
-typedef struct differential {
-  float dx;
-  float dy;
-} differential;
-
-/* Ray */
-
-typedef struct Ray {
-  float3 P;   /* origin */
-  float3 D;   /* direction */
-  float t;    /* length of the ray */
-  float time; /* time (for motion blur) */
-
-#ifdef __RAY_DIFFERENTIALS__
-  float dP;
-  float dD;
-#endif
-} Ray;
-
-/* Intersection */
-
-typedef struct Intersection {
-  float t, u, v;
-  int prim;
-  int object;
-  int type;
-} Intersection;
-
-/* Primitives */
-
-typedef enum PrimitiveType {
-  PRIMITIVE_NONE = 0,
-  PRIMITIVE_TRIANGLE = (1 << 0),
-  PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
-  PRIMITIVE_CURVE_THICK = (1 << 2),
-  PRIMITIVE_MOTION_CURVE_THICK = (1 << 3),
-  PRIMITIVE_CURVE_RIBBON = (1 << 4),
-  PRIMITIVE_MOTION_CURVE_RIBBON = (1 << 5),
-  PRIMITIVE_VOLUME = (1 << 6),
-  PRIMITIVE_LAMP = (1 << 7),
-
-  PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
-  PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE_THICK | PRIMITIVE_MOTION_CURVE_THICK |
-                         PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON),
-  PRIMITIVE_ALL_VOLUME = (PRIMITIVE_VOLUME),
-  PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE_THICK |
-                          PRIMITIVE_MOTION_CURVE_RIBBON),
-  PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE | PRIMITIVE_ALL_VOLUME |
-                   PRIMITIVE_LAMP),
-
-  PRIMITIVE_NUM = 8,
-} PrimitiveType;
-
-#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM) | (type))
-#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM)
-
-typedef enum CurveShapeType {
-  CURVE_RIBBON = 0,
-  CURVE_THICK = 1,
-
-  CURVE_NUM_SHAPE_TYPES,
-} CurveShapeType;
-
-/* Attributes */
-
-typedef enum AttributePrimitive {
-  ATTR_PRIM_GEOMETRY = 0,
-  ATTR_PRIM_SUBD,
-
-  ATTR_PRIM_TYPES
-} AttributePrimitive;
-
-typedef enum AttributeElement {
-  ATTR_ELEMENT_NONE = 0,
-  ATTR_ELEMENT_OBJECT = (1 << 0),
-  ATTR_ELEMENT_MESH = (1 << 1),
-  ATTR_ELEMENT_FACE = (1 << 2),
-  ATTR_ELEMENT_VERTEX = (1 << 3),
-  ATTR_ELEMENT_VERTEX_MOTION = (1 << 4),
-  ATTR_ELEMENT_CORNER = (1 << 5),
-  ATTR_ELEMENT_CORNER_BYTE = (1 << 6),
-  ATTR_ELEMENT_CURVE = (1 << 7),
-  ATTR_ELEMENT_CURVE_KEY = (1 << 8),
-  ATTR_ELEMENT_CURVE_KEY_MOTION = (1 << 9),
-  ATTR_ELEMENT_VOXEL = (1 << 10)
-} AttributeElement;
-
-typedef enum AttributeStandard {
-  ATTR_STD_NONE = 0,
-  ATTR_STD_VERTEX_NORMAL,
-  ATTR_STD_FACE_NORMAL,
-  ATTR_STD_UV,
-  ATTR_STD_UV_TANGENT,
-  ATTR_STD_UV_TANGENT_SIGN,
-  ATTR_STD_VERTEX_COLOR,
-  ATTR_STD_GENERATED,
-  ATTR_STD_GENERATED_TRANSFORM,
-  ATTR_STD_POSITION_UNDEFORMED,
-  ATTR_STD_POSITION_UNDISPLACED,
-  ATTR_STD_MOTION_VERTEX_POSITION,
-  ATTR_STD_MOTION_VERTEX_NORMAL,
-  ATTR_STD_PARTICLE,
-  ATTR_STD_CURVE_INTERCEPT,
-  ATTR_STD_CURVE_LENGTH,
-  ATTR_STD_CURVE_RANDOM,
-  ATTR_STD_PTEX_FACE_ID,
-  ATTR_STD_PTEX_UV,
-  ATTR_STD_VOLUME_DENSITY,
-  ATTR_STD_VOLUME_COLOR,
-  ATTR_STD_VOLUME_FLAME,
-  ATTR_STD_VOLUME_HEAT,
-  ATTR_STD_VOLUME_TEMPERATURE,
-  ATTR_STD_VOLUME_VELOCITY,
-  ATTR_STD_POINTINESS,
-  ATTR_STD_RANDOM_PER_ISLAND,
-  ATTR_STD_SHADOW_TRANSPARENCY,
-  ATTR_STD_NUM,
-
-  ATTR_STD_NOT_FOUND = ~0
-} AttributeStandard;
-
-typedef enum AttributeFlag {
-  ATTR_FINAL_SIZE = (1 << 0),
-  ATTR_SUBDIVIDED = (1 << 1),
-} AttributeFlag;
-
-typedef struct AttributeDescriptor {
-  AttributeElement element;
-  NodeAttributeType type;
-  uint flags; /* see enum AttributeFlag */
-  int offset;
-} AttributeDescriptor;
-
-/* Closure data */
-
-#ifndef __MAX_CLOSURE__
-#  define MAX_CLOSURE 64
-#else
-#  define MAX_CLOSURE __MAX_CLOSURE__
-#endif
-
-#ifndef __MAX_VOLUME_STACK_SIZE__
-#  define MAX_VOLUME_STACK_SIZE 32
-#else
-#  define MAX_VOLUME_STACK_SIZE __MAX_VOLUME_STACK_SIZE__
-#endif
-
-#define MAX_VOLUME_CLOSURE 8
-
-/* This struct is the base class for all closures. The common members are
- * duplicated in all derived classes since we don't have C++ in the kernel
- * yet, and because it lets us lay out the members to minimize padding. The
- * weight member is located at the beginning of the struct for this reason.
- *
- * ShaderClosure has a fixed size, and any extra space must be allocated
- * with closure_alloc_extra().
- *
- * We pad the struct to align to 16 bytes. All shader closures are assumed
- * to fit in this struct size. CPU sizes are a bit larger because float3 is
- * padded to be 16 bytes, while it's only 12 bytes on the GPU. */
-
-#define SHADER_CLOSURE_BASE \
-  float3 weight; \
-  ClosureType type; \
-  float sample_weight; \
-  float3 N
-
-typedef struct ccl_align(16) ShaderClosure
-{
-  SHADER_CLOSURE_BASE;
-
-#ifdef __KERNEL_CPU__
-  float pad[2];
-#endif
-  float data[10];
-}
-ShaderClosure;
-
-/* Shader Data
- *
- * Main shader state at a point on the surface or in a volume. All coordinates
- * are in world space.
- */
-
-enum ShaderDataFlag {
-  /* Runtime flags. */
-
-  /* Set when ray hits backside of surface. */
-  SD_BACKFACING = (1 << 0),
-  /* Shader has non-zero emission. */
-  SD_EMISSION = (1 << 1),
-  /* Shader has BSDF closure. */
-  SD_BSDF = (1 << 2),
-  /* Shader has non-singular BSDF closure. */
-  SD_BSDF_HAS_EVAL = (1 << 3),
-  /* Shader has BSSRDF closure. */
-  SD_BSSRDF = (1 << 4),
-  /* Shader has holdout closure. */
-  SD_HOLDOUT = (1 << 5),
-  /* Shader has non-zero volume extinction. */
-  SD_EXTINCTION = (1 << 6),
-  /* Shader has have volume phase (scatter) closure. */
-  SD_SCATTER = (1 << 7),
-  /* Shader has transparent closure. */
-  SD_TRANSPARENT = (1 << 9),
-  /* BSDF requires LCG for evaluation. */
-  SD_BSDF_NEEDS_LCG = (1 << 10),
-
-  SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT |
-                      SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG),
-
-  /* Shader flags. */
-
-  /* direct light sample */
-  SD_USE_MIS = (1 << 16),
-  /* Has transparent shadow. */
-  SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
-  /* Has volume shader. */
-  SD_HAS_VOLUME = (1 << 18),
-  /* Has only volume shader, no surface. */
-  SD_HAS_ONLY_VOLUME = (1 << 19),
-  /* Has heterogeneous volume. */
-  SD_HETEROGENEOUS_VOLUME = (1 << 20),
-  /* BSSRDF normal uses bump. */
-  SD_HAS_BSSRDF_BUMP = (1 << 21),
-  /* Use equiangular volume sampling */
-  SD_VOLUME_EQUIANGULAR = (1 << 22),
-  /* Use multiple importance volume sampling. */
-  SD_VOLUME_MIS = (1 << 23),
-  /* Use cubic interpolation for voxels. */
-  SD_VOLUME_CUBIC = (1 << 24),
-  /* Has data connected to the displacement input or uses bump map. */
-  SD_HAS_BUMP = (1 << 25),
-  /* Has true displacement. */
-  SD_HAS_DISPLACEMENT = (1 << 26),
-  /* Has constant emission (value stored in __shaders) */
-  SD_HAS_CONSTANT_EMISSION = (1 << 27),
-  /* Needs to access attributes for volume rendering */
-  SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
-  /* Shader has emission */
-  SD_HAS_EMISSION = (1 << 29),
-  /* Shader has raytracing */
-  SD_HAS_RAYTRACE = (1 << 30),
-
-  SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
-                     SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
-                     SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
-                     SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES | SD_HAS_EMISSION |
-                     SD_HAS_RAYTRACE)
-};
-
-/* Object flags. */
-enum ShaderDataObjectFlag {
-  /* Holdout for camera rays. */
-  SD_OBJECT_HOLDOUT_MASK = (1 << 0),
-  /* Has object motion blur. */
-  SD_OBJECT_MOTION = (1 << 1),
-  /* Vertices have transform applied. */
-  SD_OBJECT_TRANSFORM_APPLIED = (1 << 2),
-  /* Vertices have negative scale applied. */
-  SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
-  /* Object has a volume shader. */
-  SD_OBJECT_HAS_VOLUME = (1 << 4),
-  /* Object intersects AABB of an object with volume shader. */
-  SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
-  /* Has position for motion vertices. */
-  SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
-  /* object is used to catch shadows */
-  SD_OBJECT_SHADOW_CATCHER = (1 << 7),
-  /* object has volume attributes */
-  SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
-
-  SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED |
-                     SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME |
-                     SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER |
-                     SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
-};
-
-typedef struct ccl_align(16) ShaderData
-{
-  /* position */
-  float3 P;
-  /* smooth normal for shading */
-  float3 N;
-  /* true geometric normal */
-  float3 Ng;
-  /* view/incoming direction */
-  float3 I;
-  /* shader id */
-  int shader;
-  /* booleans describing shader, see ShaderDataFlag */
-  int flag;
-  /* booleans describing object of the shader, see ShaderDataObjectFlag */
-  int object_flag;
-
-  /* primitive id if there is one, ~0 otherwise */
-  int prim;
-
-  /* combined type and curve segment for hair */
-  int type;
-
-  /* parametric coordinates
-   * - barycentric weights for triangles */
-  float u;
-  float v;
-  /* object id if there is one, ~0 otherwise */
-  int object;
-  /* lamp id if there is one, ~0 otherwise */
-  int lamp;
-
-  /* motion blur sample time */
-  float time;
-
-  /* length of the ray being shaded */
-  float ray_length;
-
-#ifdef __RAY_DIFFERENTIALS__
-  /* differential of P. these are orthogonal to Ng, not N */
-  differential3 dP;
-  /* differential of I */
-  differential3 dI;
-  /* differential of u, v */
-  differential du;
-  differential dv;
-#endif
-#ifdef __DPDU__
-  /* differential of P w.r.t. parametric coordinates. note that dPdu is
-   * not readily suitable as a tangent for shading on triangles. */
-  float3 dPdu;
-  float3 dPdv;
-#endif
-
-#ifdef __OBJECT_MOTION__
-  /* Object <-> world space transformations for motion blur, cached to avoid
-   * re-interpolating them constantly for shading. */
-  Transform ob_tfm_motion;
-  Transform ob_itfm_motion;
-#endif
-
-  /* ray start position, only set for backgrounds */
-  float3 ray_P;
-  float ray_dP;
-
-#ifdef __OSL__
-  const struct KernelGlobalsCPU *osl_globals;
-  const struct IntegratorStateCPU *osl_path_state;
-  const struct IntegratorShadowStateCPU *osl_shadow_path_state;
-#endif
-
-  /* LCG state for closures that require additional random numbers. */
-  uint lcg_state;
-
-  /* Closure data, we store a fixed array of closures */
-  int num_closure;
-  int num_closure_left;
-  float3 svm_closure_weight;
-
-  /* Closure weights summed directly, so we can evaluate
-   * emission and shadow transparency with MAX_CLOSURE 0. */
-  float3 closure_emission_background;
-  float3 closure_transparent_extinction;
-
-  /* At the end so we can adjust size in ShaderDataTinyStorage. */
-  struct ShaderClosure closure[MAX_CLOSURE];
-}
-ShaderData;
-
-/* ShaderDataTinyStorage needs the same alignment as ShaderData, or else
- * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
-typedef struct ccl_align(16) ShaderDataTinyStorage
-{
-  char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
-}
-ShaderDataTinyStorage;
-#define AS_SHADER_DATA(shader_data_tiny_storage) \
-  ((ccl_private ShaderData *)shader_data_tiny_storage)
-
-/* Compact volume closures storage.
- *
- * Used for decoupled direct/indirect light closure storage. */
-
-typedef struct ShaderVolumeClosure {
-  float3 weight;
-  float sample_weight;
-  float g;
-} ShaderVolumeClosure;
-
-typedef struct ShaderVolumePhases {
-  ShaderVolumeClosure closure[MAX_VOLUME_CLOSURE];
-  int num_closure;
-} ShaderVolumePhases;
-
-/* Volume Stack */
-
-#ifdef __VOLUME__
-typedef struct VolumeStack {
-  int object;
-  int shader;
-} VolumeStack;
-#endif
-
-/* Struct to gather multiple nearby intersections. */
-typedef struct LocalIntersection {
-  int num_hits;
-  struct Intersection hits[LOCAL_MAX_HITS];
-  float3 Ng[LOCAL_MAX_HITS];
-} LocalIntersection;
-
-/* Constant Kernel Data
- *
- * These structs are passed from CPU to various devices, and the struct layout
- * must match exactly. Structs are padded to ensure 16 byte alignment, and we
- * do not use float3 because its size may not be the same on all devices. */
-
-typedef struct KernelCamera {
-  /* type */
-  int type;
-
-  /* panorama */
-  int panorama_type;
-  float fisheye_fov;
-  float fisheye_lens;
-  float4 equirectangular_range;
-
-  /* stereo */
-  float interocular_offset;
-  float convergence_distance;
-  float pole_merge_angle_from;
-  float pole_merge_angle_to;
-
-  /* matrices */
-  Transform cameratoworld;
-  ProjectionTransform rastertocamera;
-
-  /* differentials */
-  float4 dx;
-  float4 dy;
-
-  /* depth of field */
-  float aperturesize;
-  float blades;
-  float bladesrotation;
-  float focaldistance;
-
-  /* motion blur */
-  float shuttertime;
-  int num_motion_steps, have_perspective_motion;
-
-  /* clipping */
-  float nearclip;
-  float cliplength;
-
-  /* sensor size */
-  float sensorwidth;
-  float sensorheight;
-
-  /* render size */
-  float width, height;
-  int pad1;
-
-  /* anamorphic lens bokeh */
-  float inv_aperture_ratio;
-
-  int is_inside_volume;
-
-  /* more matrices */
-  ProjectionTransform screentoworld;
-  ProjectionTransform rastertoworld;
-  ProjectionTransform ndctoworld;
-  ProjectionTransform worldtoscreen;
-  ProjectionTransform worldtoraster;
-  ProjectionTransform worldtondc;
-  Transform worldtocamera;
-
-  /* Stores changes in the projection matrix. Use for camera zoom motion
-   * blur and motion pass output for perspective camera. */
-  ProjectionTransform perspective_pre;
-  ProjectionTransform perspective_post;
-
-  /* Transforms for motion pass. */
-  Transform motion_pass_pre;
-  Transform motion_pass_post;
-
-  int shutter_table_offset;
-
-  /* Rolling shutter */
-  int rolling_shutter_type;
-  float rolling_shutter_duration;
-
-  int pad;
-} KernelCamera;
-static_assert_align(KernelCamera, 16);
-
-typedef struct KernelFilm {
-  float exposure;
-  int pass_flag;
-
-  int light_pass_flag;
-  int pass_stride;
-
-  int pass_combined;
-  int pass_depth;
-  int pass_position;
-  int pass_normal;
-  int pass_roughness;
-  int pass_motion;
-
-  int pass_motion_weight;
-  int pass_uv;
-  int pass_object_id;
-  int pass_material_id;
-
-  int pass_diffuse_color;
-  int pass_glossy_color;
-  int pass_transmission_color;
-
-  int pass_diffuse_indirect;
-  int pass_glossy_indirect;
-  int pass_transmission_indirect;
-  int pass_volume_indirect;
-
-  int pass_diffuse_direct;
-  int pass_glossy_direct;
-  int pass_transmission_direct;
-  int pass_volume_direct;
-
-  int pass_emission;
-  int pass_background;
-  int pass_ao;
-  float pass_alpha_threshold;
-
-  int pass_shadow;
-  float pass_shadow_scale;
-
-  int pass_shadow_catcher;
-  int pass_shadow_catcher_sample_count;
-  int pass_shadow_catcher_matte;
-
-  int filter_table_offset;
-
-  int cryptomatte_passes;
-  int cryptomatte_depth;
-  int pass_cryptomatte;
-
-  int pass_adaptive_aux_buffer;
-  int pass_sample_count;
-
-  int pass_mist;
-  float mist_start;
-  float mist_inv_depth;
-  float mist_falloff;
-
-  int pass_denoising_normal;
-  int pass_denoising_albedo;
-  int pass_denoising_depth;
-
-  int pass_aov_color;
-  int pass_aov_value;
-
-  /* XYZ to rendering color space transform. float4 instead of float3 to
-   * ensure consistent padding/alignment across devices. */
-  float4 xyz_to_r;
-  float4 xyz_to_g;
-  float4 xyz_to_b;
-  float4 rgb_to_y;
-
-  int pass_bake_primitive;
-  int pass_bake_differential;
-
-  int use_approximate_shadow_catcher;
-
-  int pad1, pad2;
-} KernelFilm;
-static_assert_align(KernelFilm, 16);
-
-typedef struct KernelFilmConvert {
-  int pass_offset;
-  int pass_stride;
-
-  int pass_use_exposure;
-  int pass_use_filter;
-
-  int pass_divide;
-  int pass_indirect;
-
-  int pass_combined;
-  int pass_sample_count;
-  int pass_adaptive_aux_buffer;
-  int pass_motion_weight;
-  int pass_shadow_catcher;
-  int pass_shadow_catcher_sample_count;
-  int pass_shadow_catcher_matte;
-  int pass_background;
-
-  float scale;
-  float exposure;
-  float scale_exposure;
-
-  int use_approximate_shadow_catcher;
-  int use_approximate_shadow_catcher_background;
-  int show_active_pixels;
-
-  /* Number of components to write to. */
-  int num_components;
-
-  /* Number of floats per pixel. When zero is the same as `num_components`.
-   * NOTE: Is ignored for half4 destination. */
-  int pixel_stride;
-
-  int is_denoised;
-
-  /* Padding. */
-  int pad1;
-} KernelFilmConvert;
-static_assert_align(KernelFilmConvert, 16);
-
-typedef struct KernelBackground {
-  /* only shader index */
-  int surface_shader;
-  int volume_shader;
-  float volume_step_size;
-  int transparent;
-  float transparent_roughness_squared_threshold;
-
-  /* portal sampling */
-  float portal_weight;
-  int num_portals;
-  int portal_offset;
-
-  /* sun sampling */
-  float sun_weight;
-  /* xyz store direction, w the angle. float4 instead of float3 is used
-   * to ensure consistent padding/alignment across devices. */
-  float4 sun;
-
-  /* map sampling */
-  float map_weight;
-  int map_res_x;
-  int map_res_y;
-
-  int use_mis;
-
-  /* Padding */
-  int pad1, pad2, pad3;
-} KernelBackground;
-static_assert_align(KernelBackground, 16);
-
-typedef struct KernelIntegrator {
-  /* emission */
-  int use_direct_light;
-  int num_distribution;
-  int num_all_lights;
-  float pdf_triangles;
-  float pdf_lights;
-  float light_inv_rr_threshold;
-
-  /* bounces */
-  int min_bounce;
-  int max_bounce;
-
-  int max_diffuse_bounce;
-  int max_glossy_bounce;
-  int max_transmission_bounce;
-  int max_volume_bounce;
-
-  /* AO bounces */
-  int ao_bounces;
-  float ao_bounces_distance;
-  float ao_bounces_factor;
-  float ao_additive_factor;
-
-  /* transparent */
-  int transparent_min_bounce;
-  int transparent_max_bounce;
-  int transparent_shadows;
-
-  /* caustics */
-  int caustics_reflective;
-  int caustics_refractive;
-  float filter_glossy;
-
-  /* seed */
-  int seed;
-
-  /* clamp */
-  float sample_clamp_direct;
-  float sample_clamp_indirect;
-
-  /* mis */
-  int use_lamp_mis;
-
-  /* sampler */
-  int sampling_pattern;
-
-  /* volume render */
-  int use_volumes;
-  int volume_max_steps;
-  float volume_step_rate;
-
-  int has_shadow_catcher;
-
-  /* padding */
-  int pad1;
-} KernelIntegrator;
-static_assert_align(KernelIntegrator, 16);
-
-typedef enum KernelBVHLayout {
-  BVH_LAYOUT_NONE = 0,
-
-  BVH_LAYOUT_BVH2 = (1 << 0),
-  BVH_LAYOUT_EMBREE = (1 << 1),
-  BVH_LAYOUT_OPTIX = (1 << 2),
-  BVH_LAYOUT_MULTI_OPTIX = (1 << 3),
-  BVH_LAYOUT_MULTI_OPTIX_EMBREE = (1 << 4),
-
-  /* Default BVH layout to use for CPU. */
-  BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
-  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX,
-} KernelBVHLayout;
-
-typedef struct KernelBVH {
-  /* Own BVH */
-  int root;
-  int have_motion;
-  int have_curves;
-  int bvh_layout;
-  int use_bvh_steps;
-  int curve_subdivisions;
-
-  /* Custom BVH */
-#ifdef __KERNEL_OPTIX__
-  OptixTraversableHandle scene;
-#else
-#  ifdef __EMBREE__
-  RTCScene scene;
-#    ifndef __KERNEL_64_BIT__
-  int pad2;
-#    endif
-#  else
-  int scene, pad2;
-#  endif
-#endif
-} KernelBVH;
-static_assert_align(KernelBVH, 16);
-
-typedef struct KernelTables {
-  int beckmann_offset;
-  int pad1, pad2, pad3;
-} KernelTables;
-static_assert_align(KernelTables, 16);
-
-typedef struct KernelBake {
-  int use;
-  int object_index;
-  int tri_offset;
-  int pad1;
-} KernelBake;
-static_assert_align(KernelBake, 16);
-
-typedef struct KernelData {
-  uint kernel_features;
-  uint max_closures;
-  uint max_shaders;
-  uint volume_stack_size;
-
-  KernelCamera cam;
-  KernelFilm film;
-  KernelBackground background;
-  KernelIntegrator integrator;
-  KernelBVH bvh;
-  KernelTables tables;
-  KernelBake bake;
-} KernelData;
-static_assert_align(KernelData, 16);
-
-/* Kernel data structures. */
-
-typedef struct KernelObject {
-  Transform tfm;
-  Transform itfm;
-
-  float volume_density;
-  float pass_id;
-  float random_number;
-  float color[3];
-  int particle_index;
-
-  float dupli_generated[3];
-  float dupli_uv[2];
-
-  int numkeys;
-  int numsteps;
-  int numverts;
-
-  uint patch_map_offset;
-  uint attribute_map_offset;
-  uint motion_offset;
-
-  float cryptomatte_object;
-  float cryptomatte_asset;
-
-  float shadow_terminator_shading_offset;
-  float shadow_terminator_geometry_offset;
-
-  float ao_distance;
-
-  uint visibility;
-  int primitive_type;
-} KernelObject;
-static_assert_align(KernelObject, 16);
-
-typedef struct KernelCurve {
-  int shader_id;
-  int first_key;
-  int num_keys;
-  int type;
-} KernelCurve;
-static_assert_align(KernelCurve, 16);
-
-typedef struct KernelCurveSegment {
-  int prim;
-  int type;
-} KernelCurveSegment;
-static_assert_align(KernelCurveSegment, 8);
-
-typedef struct KernelSpotLight {
-  float radius;
-  float invarea;
-  float spot_angle;
-  float spot_smooth;
-  float dir[3];
-  float pad;
-} KernelSpotLight;
-
-/* PointLight is SpotLight with only radius and invarea being used. */
-
-typedef struct KernelAreaLight {
-  float axisu[3];
-  float invarea;
-  float axisv[3];
-  float tan_spread;
-  float dir[3];
-  float normalize_spread;
-} KernelAreaLight;
-
-typedef struct KernelDistantLight {
-  float radius;
-  float cosangle;
-  float invarea;
-  float pad;
-} KernelDistantLight;
-
-typedef struct KernelLight {
-  int type;
-  float co[3];
-  int shader_id;
-  float max_bounces;
-  float random;
-  float strength[3];
-  float pad1, pad2;
-  Transform tfm;
-  Transform itfm;
-  union {
-    KernelSpotLight spot;
-    KernelAreaLight area;
-    KernelDistantLight distant;
-  };
-} KernelLight;
-static_assert_align(KernelLight, 16);
-
-typedef struct KernelLightDistribution {
-  float totarea;
-  int prim;
-  union {
-    struct {
-      int shader_flag;
-      int object_id;
-    } mesh_light;
-    struct {
-      float pad;
-      float size;
-    } lamp;
-  };
-} KernelLightDistribution;
-static_assert_align(KernelLightDistribution, 16);
-
-typedef struct KernelParticle {
-  int index;
-  float age;
-  float lifetime;
-  float size;
-  float4 rotation;
-  /* Only xyz are used of the following. float4 instead of float3 are used
-   * to ensure consistent padding/alignment across devices. */
-  float4 location;
-  float4 velocity;
-  float4 angular_velocity;
-} KernelParticle;
-static_assert_align(KernelParticle, 16);
-
-typedef struct KernelShader {
-  float constant_emission[3];
-  float cryptomatte_id;
-  int flags;
-  int pass_id;
-  int pad2, pad3;
-} KernelShader;
-static_assert_align(KernelShader, 16);
-
-/* Patches */
-
-#define PATCH_MAX_CONTROL_VERTS 16
-
-/* Patch map node flags */
-
-#define PATCH_MAP_NODE_IS_SET (1 << 30)
-#define PATCH_MAP_NODE_IS_LEAF (1u << 31)
-#define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF))
-
-/* Work Tiles */
-
-typedef struct KernelWorkTile {
-  uint x, y, w, h;
-
-  uint start_sample;
-  uint num_samples;
-
-  int offset;
-  uint stride;
-
-  /* Precalculated parameters used by init_from_camera kernel on GPU. */
-  int path_index_offset;
-  int work_size;
-} KernelWorkTile;
-
-/* Shader Evaluation.
- *
- * Position on a primitive on an object at which we want to evaluate the
- * shader for e.g. mesh displacement or light importance map. */
-
-typedef struct KernelShaderEvalInput {
-  int object;
-  int prim;
-  float u, v;
-} KernelShaderEvalInput;
-static_assert_align(KernelShaderEvalInput, 16);
-
-/* Pre-computed sample table sizes for PMJ02 sampler. */
-#define NUM_PMJ_DIVISIONS 32
-#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
-#define NUM_PMJ_PATTERNS 1
-
-/* Device kernels.
- *
- * Identifier for kernels that can be executed in device queues.
- *
- * Some implementation details.
- *
- * If the kernel uses shared CUDA memory, `CUDADeviceQueue::enqueue` is to be modified.
- * The path iteration kernels are handled in `PathTraceWorkGPU::enqueue_path_iteration`. */
-
-typedef enum DeviceKernel {
-  DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA = 0,
-  DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE,
-  DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
-  DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
-  DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
-  DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
-  DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
-  DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
-  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
-  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
-  DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
-  DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
-  DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL,
-
-  DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES,
-  DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY,
-  DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES,
-  DEVICE_KERNEL_INTEGRATOR_RESET,
-  DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS,
-
-  DEVICE_KERNEL_SHADER_EVAL_DISPLACE,
-  DEVICE_KERNEL_SHADER_EVAL_BACKGROUND,
-  DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
-
-#define DECLARE_FILM_CONVERT_KERNEL(variant) \
-  DEVICE_KERNEL_FILM_CONVERT_##variant, DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA
-
-  DECLARE_FILM_CONVERT_KERNEL(DEPTH),
-  DECLARE_FILM_CONVERT_KERNEL(MIST),
-  DECLARE_FILM_CONVERT_KERNEL(SAMPLE_COUNT),
-  DECLARE_FILM_CONVERT_KERNEL(FLOAT),
-  DECLARE_FILM_CONVERT_KERNEL(LIGHT_PATH),
-  DECLARE_FILM_CONVERT_KERNEL(FLOAT3),
-  DECLARE_FILM_CONVERT_KERNEL(MOTION),
-  DECLARE_FILM_CONVERT_KERNEL(CRYPTOMATTE),
-  DECLARE_FILM_CONVERT_KERNEL(SHADOW_CATCHER),
-  DECLARE_FILM_CONVERT_KERNEL(SHADOW_CATCHER_MATTE_WITH_SHADOW),
-  DECLARE_FILM_CONVERT_KERNEL(COMBINED),
-  DECLARE_FILM_CONVERT_KERNEL(FLOAT4),
-
-#undef DECLARE_FILM_CONVERT_KERNEL
-
-  DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK,
-  DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X,
-  DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y,
-
-  DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS,
-  DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO,
-  DEVICE_KERNEL_FILTER_COLOR_PREPROCESS,
-  DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS,
-
-  DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS,
-
-  DEVICE_KERNEL_PREFIX_SUM,
-
-  DEVICE_KERNEL_NUM,
-} DeviceKernel;
-
-enum {
-  DEVICE_KERNEL_INTEGRATOR_NUM = DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL + 1,
-};
-
-/* Kernel Features */
-
-enum KernelFeatureFlag : unsigned int {
-  /* Shader nodes. */
-  KERNEL_FEATURE_NODE_BSDF = (1U << 0U),
-  KERNEL_FEATURE_NODE_EMISSION = (1U << 1U),
-  KERNEL_FEATURE_NODE_VOLUME = (1U << 2U),
-  KERNEL_FEATURE_NODE_HAIR = (1U << 3U),
-  KERNEL_FEATURE_NODE_BUMP = (1U << 4U),
-  KERNEL_FEATURE_NODE_BUMP_STATE = (1U << 5U),
-  KERNEL_FEATURE_NODE_VORONOI_EXTRA = (1U << 6U),
-  KERNEL_FEATURE_NODE_RAYTRACE = (1U << 7U),
-  KERNEL_FEATURE_NODE_AOV = (1U << 8U),
-  KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 9U),
-
-  /* Use denoising kernels and output denoising passes. */
-  KERNEL_FEATURE_DENOISING = (1U << 10U),
-
-  /* Use path tracing kernels. */
-  KERNEL_FEATURE_PATH_TRACING = (1U << 11U),
-
-  /* BVH/sampling kernel features. */
-  KERNEL_FEATURE_HAIR = (1U << 12U),
-  KERNEL_FEATURE_HAIR_THICK = (1U << 13U),
-  KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U),
-  KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U),
-
-  /* Denotes whether baking functionality is needed. */
-  KERNEL_FEATURE_BAKING = (1U << 16U),
-
-  /* Use subsurface scattering materials. */
-  KERNEL_FEATURE_SUBSURFACE = (1U << 17U),
-
-  /* Use volume materials. */
-  KERNEL_FEATURE_VOLUME = (1U << 18U),
-
-  /* Use OpenSubdiv patch evaluation */
-  KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U),
-
-  /* Use Transparent shadows */
-  KERNEL_FEATURE_TRANSPARENT = (1U << 20U),
-
-  /* Use shadow catcher. */
-  KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U),
-
-  /* Per-uber shader usage flags. */
-  KERNEL_FEATURE_PRINCIPLED = (1U << 22U),
-
-  /* Light render passes. */
-  KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U),
-
-  /* Shadow render pass. */
-  KERNEL_FEATURE_SHADOW_PASS = (1U << 24U),
-
-  /* AO. */
-  KERNEL_FEATURE_AO_PASS = (1U << 25U),
-  KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
-  KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
-};
-
-/* Shader node feature mask, to specialize shader evaluation for kernels. */
-
-#define KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT \
-  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA | \
-   KERNEL_FEATURE_NODE_LIGHT_PATH)
-#define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \
-  (KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
-   KERNEL_FEATURE_NODE_HAIR | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \
-   KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH)
-#define KERNEL_FEATURE_NODE_MASK_SURFACE \
-  (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE | \
-   KERNEL_FEATURE_NODE_AOV | KERNEL_FEATURE_NODE_LIGHT_PATH)
-#define KERNEL_FEATURE_NODE_MASK_VOLUME \
-  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
-   KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH)
-#define KERNEL_FEATURE_NODE_MASK_DISPLACEMENT \
-  (KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE)
-#define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT
-
-/* Must be constexpr on the CPU to avoid compile errors because the state types
- * are different depending on the main, shadow or null path. For GPU we don't have
- * C++17 everywhere so can't use it. */
-#ifdef __KERNEL_CPU__
-#  define IF_KERNEL_NODES_FEATURE(feature) \
-    if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#else
-#  define IF_KERNEL_NODES_FEATURE(feature) \
-    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/background.h b/intern/cycles/kernel/light/background.h
new file mode 100644
index 00000000000..d801cc94393
--- /dev/null
+++ b/intern/cycles/kernel/light/background.h
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/light/common.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Background Light */
+
+#ifdef __BACKGROUND_MIS__
+
+ccl_device float3 background_map_sample(KernelGlobals kg,
+                                        float randu,
+                                        float randv,
+                                        ccl_private float *pdf)
+{
+  /* for the following, the CDF values are actually a pair of floats, with the
+   * function value as X and the actual CDF as Y.  The last entry's function
+   * value is the CDF total. */
+  int res_x = kernel_data.background.map_res_x;
+  int res_y = kernel_data.background.map_res_y;
+  int cdf_width = res_x + 1;
+
+  /* This is basically std::lower_bound as used by PBRT. */
+  int first = 0;
+  int count = res_y;
+
+  while (count > 0) {
+    int step = count >> 1;
+    int middle = first + step;
+
+    if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
+      first = middle + 1;
+      count -= step + 1;
+    }
+    else
+      count = step;
+  }
+
+  int index_v = max(0, first - 1);
+  kernel_assert(index_v >= 0 && index_v < res_y);
+
+  float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+  float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
+  float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+  /* importance-sampled V direction */
+  float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
+  float v = (index_v + dv) / res_y;
+
+  /* This is basically std::lower_bound as used by PBRT. */
+  first = 0;
+  count = res_x;
+  while (count > 0) {
+    int step = count >> 1;
+    int middle = first + step;
+
+    if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
+        randu) {
+      first = middle + 1;
+      count -= step + 1;
+    }
+    else
+      count = step;
+  }
+
+  int index_u = max(0, first - 1);
+  kernel_assert(index_u >= 0 && index_u < res_x);
+
+  float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                  index_v * cdf_width + index_u);
+  float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                       index_v * cdf_width + index_u + 1);
+  float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                       index_v * cdf_width + res_x);
+
+  /* importance-sampled U direction */
+  float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
+  float u = (index_u + du) / res_x;
+
+  /* compute pdf */
+  float sin_theta = sinf(M_PI_F * v);
+  float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
+
+  if (sin_theta == 0.0f || denom == 0.0f)
+    *pdf = 0.0f;
+  else
+    *pdf = (cdf_u.x * cdf_v.x) / denom;
+
+  /* compute direction */
+  return equirectangular_to_direction(u, v);
+}
+
+/* TODO(sergey): Same as above, after the release we should consider using
+ * 'noinline' for all devices.
+ */
+ccl_device float background_map_pdf(KernelGlobals kg, float3 direction)
+{
+  float2 uv = direction_to_equirectangular(direction);
+  int res_x = kernel_data.background.map_res_x;
+  int res_y = kernel_data.background.map_res_y;
+  int cdf_width = res_x + 1;
+
+  float sin_theta = sinf(uv.y * M_PI_F);
+
+  if (sin_theta == 0.0f)
+    return 0.0f;
+
+  int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
+  int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
+
+  /* pdfs in V direction */
+  float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                       index_v * cdf_width + res_x);
+  float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
+
+  float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
+
+  if (denom == 0.0f)
+    return 0.0f;
+
+  /* pdfs in U direction */
+  float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
+                                  index_v * cdf_width + index_u);
+  float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
+
+  return (cdf_u.x * cdf_v.x) / denom;
+}
+
+ccl_device_inline bool background_portal_data_fetch_and_check_side(
+    KernelGlobals kg, float3 P, int index, ccl_private float3 *lightpos, ccl_private float3 *dir)
+{
+  int portal = kernel_data.background.portal_offset + index;
+  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+
+  *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+  *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
+
+  /* Check whether portal is on the right side. */
+  if (dot(*dir, P - *lightpos) > 1e-4f)
+    return true;
+
+  return false;
+}
+
+ccl_device_inline float background_portal_pdf(
+    KernelGlobals kg, float3 P, float3 direction, int ignore_portal, ccl_private bool *is_possible)
+{
+  float portal_pdf = 0.0f;
+
+  int num_possible = 0;
+  for (int p = 0; p < kernel_data.background.num_portals; p++) {
+    if (p == ignore_portal)
+      continue;
+
+    float3 lightpos, dir;
+    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+      continue;
+
+    /* There's a portal that could be sampled from this position. */
+    if (is_possible) {
+      *is_possible = true;
+    }
+    num_possible++;
+
+    int portal = kernel_data.background.portal_offset + p;
+    const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+    float3 axisu = make_float3(
+        klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+    float3 axisv = make_float3(
+        klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+    bool is_round = (klight->area.invarea < 0.0f);
+
+    if (!ray_quad_intersect(P,
+                            direction,
+                            1e-4f,
+                            FLT_MAX,
+                            lightpos,
+                            axisu,
+                            axisv,
+                            dir,
+                            NULL,
+                            NULL,
+                            NULL,
+                            NULL,
+                            is_round))
+      continue;
+
+    if (is_round) {
+      float t;
+      float3 D = normalize_len(lightpos - P, &t);
+      portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+    }
+    else {
+      portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
+    }
+  }
+
+  if (ignore_portal >= 0) {
+    /* We have skipped a portal that could be sampled as well. */
+    num_possible++;
+  }
+
+  return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
+}
+
+ccl_device int background_num_possible_portals(KernelGlobals kg, float3 P)
+{
+  int num_possible_portals = 0;
+  for (int p = 0; p < kernel_data.background.num_portals; p++) {
+    float3 lightpos, dir;
+    if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+      num_possible_portals++;
+  }
+  return num_possible_portals;
+}
+
+ccl_device float3 background_portal_sample(KernelGlobals kg,
+                                           float3 P,
+                                           float randu,
+                                           float randv,
+                                           int num_possible,
+                                           ccl_private int *sampled_portal,
+                                           ccl_private float *pdf)
+{
+  /* Pick a portal, then re-normalize randv. */
+  randv *= num_possible;
+  int portal = (int)randv;
+  randv -= portal;
+
+  /* TODO(sergey): Some smarter way of finding portal to sample
+   * is welcome.
+   */
+  for (int p = 0; p < kernel_data.background.num_portals; p++) {
+    /* Search for the sampled portal. */
+    float3 lightpos, dir;
+    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
+      continue;
+
+    if (portal == 0) {
+      /* p is the portal to be sampled. */
+      int portal = kernel_data.background.portal_offset + p;
+      const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+      float3 axisu = make_float3(
+          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+      float3 axisv = make_float3(
+          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
+      bool is_round = (klight->area.invarea < 0.0f);
+
+      float3 D;
+      if (is_round) {
+        lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
+        float t;
+        D = normalize_len(lightpos - P, &t);
+        *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
+      }
+      else {
+        *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
+        D = normalize(lightpos - P);
+      }
+
+      *pdf /= num_possible;
+      *sampled_portal = p;
+      return D;
+    }
+
+    portal--;
+  }
+
+  return zero_float3();
+}
+
+ccl_device_inline float3 background_sun_sample(KernelGlobals kg,
+                                               float randu,
+                                               float randv,
+                                               ccl_private float *pdf)
+{
+  float3 D;
+  const float3 N = float4_to_float3(kernel_data.background.sun);
+  const float angle = kernel_data.background.sun.w;
+  sample_uniform_cone(N, angle, randu, randv, &D, pdf);
+  return D;
+}
+
+ccl_device_inline float background_sun_pdf(KernelGlobals kg, float3 D)
+{
+  const float3 N = float4_to_float3(kernel_data.background.sun);
+  const float angle = kernel_data.background.sun.w;
+  return pdf_uniform_cone(N, D, angle);
+}
+
+ccl_device_inline float3 background_light_sample(
+    KernelGlobals kg, float3 P, float randu, float randv, ccl_private float *pdf)
+{
+  float portal_method_pdf = kernel_data.background.portal_weight;
+  float sun_method_pdf = kernel_data.background.sun_weight;
+  float map_method_pdf = kernel_data.background.map_weight;
+
+  int num_portals = 0;
+  if (portal_method_pdf > 0.0f) {
+    /* Check if there are portals in the scene which we can sample. */
+    num_portals = background_num_possible_portals(kg, P);
+    if (num_portals == 0) {
+      portal_method_pdf = 0.0f;
+    }
+  }
+
+  float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
+  if (pdf_fac == 0.0f) {
+    /* Use uniform as a fallback if we can't use any strategy. */
+    *pdf = 1.0f / M_4PI_F;
+    return sample_uniform_sphere(randu, randv);
+  }
+
+  pdf_fac = 1.0f / pdf_fac;
+  portal_method_pdf *= pdf_fac;
+  sun_method_pdf *= pdf_fac;
+  map_method_pdf *= pdf_fac;
+
+  /* We have 100% in total and split it between the three categories.
+   * Therefore, we pick portals if randu is between 0 and portal_method_pdf,
+   * sun if randu is between portal_method_pdf and (portal_method_pdf + sun_method_pdf)
+   * and map if randu is between (portal_method_pdf + sun_method_pdf) and 1. */
+  float sun_method_cdf = portal_method_pdf + sun_method_pdf;
+
+  int method = 0;
+  float3 D;
+  if (randu < portal_method_pdf) {
+    method = 0;
+    /* Rescale randu. */
+    if (portal_method_pdf != 1.0f) {
+      randu /= portal_method_pdf;
+    }
+
+    /* Sample a portal. */
+    int portal;
+    D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
+    if (num_portals > 1) {
+      /* Ignore the chosen portal, its pdf is already included. */
+      *pdf += background_portal_pdf(kg, P, D, portal, NULL);
+    }
+
+    /* Skip MIS if this is the only method. */
+    if (portal_method_pdf == 1.0f) {
+      return D;
+    }
+    *pdf *= portal_method_pdf;
+  }
+  else if (randu < sun_method_cdf) {
+    method = 1;
+    /* Rescale randu. */
+    if (sun_method_pdf != 1.0f) {
+      randu = (randu - portal_method_pdf) / sun_method_pdf;
+    }
+
+    D = background_sun_sample(kg, randu, randv, pdf);
+
+    /* Skip MIS if this is the only method. */
+    if (sun_method_pdf == 1.0f) {
+      return D;
+    }
+    *pdf *= sun_method_pdf;
+  }
+  else {
+    method = 2;
+    /* Rescale randu. */
+    if (map_method_pdf != 1.0f) {
+      randu = (randu - sun_method_cdf) / map_method_pdf;
+    }
+
+    D = background_map_sample(kg, randu, randv, pdf);
+
+    /* Skip MIS if this is the only method. */
+    if (map_method_pdf == 1.0f) {
+      return D;
+    }
+    *pdf *= map_method_pdf;
+  }
+
+  /* MIS weighting. */
+  if (method != 0 && portal_method_pdf != 0.0f) {
+    *pdf += portal_method_pdf * background_portal_pdf(kg, P, D, -1, NULL);
+  }
+  if (method != 1 && sun_method_pdf != 0.0f) {
+    *pdf += sun_method_pdf * background_sun_pdf(kg, D);
+  }
+  if (method != 2 && map_method_pdf != 0.0f) {
+    *pdf += map_method_pdf * background_map_pdf(kg, D);
+  }
+  return D;
+}
+
+ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 direction)
+{
+  float portal_method_pdf = kernel_data.background.portal_weight;
+  float sun_method_pdf = kernel_data.background.sun_weight;
+  float map_method_pdf = kernel_data.background.map_weight;
+
+  float portal_pdf = 0.0f;
+  /* Portals are a special case here since we need to compute their pdf in order
+   * to find out if we can sample them. */
+  if (portal_method_pdf > 0.0f) {
+    /* Evaluate PDF of sampling this direction by portal sampling. */
+    bool is_possible = false;
+    portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible);
+    if (!is_possible) {
+      /* Portal sampling is not possible here because all portals point to the wrong side.
+       * If other methods can be used instead, do so, otherwise uniform sampling is used as a
+       * fallback. */
+      portal_method_pdf = 0.0f;
+    }
+  }
+
+  float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
+  if (pdf_fac == 0.0f) {
+    /* Use uniform as a fallback if we can't use any strategy. */
+    return kernel_data.integrator.pdf_lights / M_4PI_F;
+  }
+
+  pdf_fac = 1.0f / pdf_fac;
+  portal_method_pdf *= pdf_fac;
+  sun_method_pdf *= pdf_fac;
+  map_method_pdf *= pdf_fac;
+
+  float pdf = portal_pdf * portal_method_pdf;
+  if (sun_method_pdf != 0.0f) {
+    pdf += background_sun_pdf(kg, direction) * sun_method_pdf;
+  }
+  if (map_method_pdf != 0.0f) {
+    pdf += background_map_pdf(kg, direction) * map_method_pdf;
+  }
+
+  return pdf * kernel_data.integrator.pdf_lights;
+}
+
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/common.h b/intern/cycles/kernel/light/common.h
new file mode 100644
index 00000000000..75331d32d44
--- /dev/null
+++ b/intern/cycles/kernel/light/common.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/sample/mapping.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Area light sampling */
+
+/* Uses the following paper:
+ *
+ * Carlos Urena et al.
+ * An Area-Preserving Parametrization for Spherical Rectangles.
+ *
+ * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
+ *
+ * Note: light_p is modified when sample_coord is true.
+ */
+ccl_device_inline float rect_light_sample(float3 P,
+                                          ccl_private float3 *light_p,
+                                          float3 axisu,
+                                          float3 axisv,
+                                          float randu,
+                                          float randv,
+                                          bool sample_coord)
+{
+  /* In our name system we're using P for the center,
+   * which is o in the paper.
+   */
+
+  float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
+  float axisu_len, axisv_len;
+  /* Compute local reference system R. */
+  float3 x = normalize_len(axisu, &axisu_len);
+  float3 y = normalize_len(axisv, &axisv_len);
+  float3 z = cross(x, y);
+  /* Compute rectangle coords in local reference system. */
+  float3 dir = corner - P;
+  float z0 = dot(dir, z);
+  /* Flip 'z' to make it point against Q. */
+  if (z0 > 0.0f) {
+    z *= -1.0f;
+    z0 *= -1.0f;
+  }
+  float x0 = dot(dir, x);
+  float y0 = dot(dir, y);
+  float x1 = x0 + axisu_len;
+  float y1 = y0 + axisv_len;
+  /* Compute internal angles (gamma_i). */
+  float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
+  float4 nz = make_float4(y0, x1, y1, x0) * diff;
+  nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
+  float g0 = safe_acosf(-nz.x * nz.y);
+  float g1 = safe_acosf(-nz.y * nz.z);
+  float g2 = safe_acosf(-nz.z * nz.w);
+  float g3 = safe_acosf(-nz.w * nz.x);
+  /* Compute predefined constants. */
+  float b0 = nz.x;
+  float b1 = nz.z;
+  float b0sq = b0 * b0;
+  float k = M_2PI_F - g2 - g3;
+  /* Compute solid angle from internal angles. */
+  float S = g0 + g1 - k;
+
+  if (sample_coord) {
+    /* Compute cu. */
+    float au = randu * S + k;
+    float fu = (cosf(au) * b0 - b1) / sinf(au);
+    float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
+    cu = clamp(cu, -1.0f, 1.0f);
+    /* Compute xu. */
+    float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
+    xu = clamp(xu, x0, x1);
+    /* Compute yv. */
+    float z0sq = z0 * z0;
+    float y0sq = y0 * y0;
+    float y1sq = y1 * y1;
+    float d = sqrtf(xu * xu + z0sq);
+    float h0 = y0 / sqrtf(d * d + y0sq);
+    float h1 = y1 / sqrtf(d * d + y1sq);
+    float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
+    float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
+
+    /* Transform (xu, yv, z0) to world coords. */
+    *light_p = P + xu * x + yv * y + z0 * z;
+  }
+
+  /* return pdf */
+  if (S != 0.0f)
+    return 1.0f / S;
+  else
+    return 0.0f;
+}
+
+ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
+{
+  to_unit_disk(&randu, &randv);
+  return ru * randu + rv * randv;
+}
+
+ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
+{
+  float3 ru, rv;
+
+  make_orthonormals(v, &ru, &rv);
+
+  return ellipse_sample(ru, rv, randu, randv);
+}
+
+ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
+{
+  return normalize(D + disk_light_sample(D, randu, randv) * radius);
+}
+
+ccl_device float3
+sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
+{
+  return disk_light_sample(normalize(P - center), randu, randv) * radius;
+}
+
+ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, float3 N)
+{
+  float attenuation = dot(dir, N);
+
+  if (attenuation <= spot_angle) {
+    attenuation = 0.0f;
+  }
+  else {
+    float t = attenuation - spot_angle;
+
+    if (t < spot_smooth && spot_smooth != 0.0f)
+      attenuation *= smoothstepf(t / spot_smooth);
+  }
+
+  return attenuation;
+}
+
+ccl_device float light_spread_attenuation(const float3 D,
+                                          const float3 lightNg,
+                                          const float tan_spread,
+                                          const float normalize_spread)
+{
+  /* Model a soft-box grid, computing the ratio of light not hidden by the
+   * slats of the grid at a given angle. (see D10594). */
+  const float cos_a = -dot(D, lightNg);
+  const float sin_a = safe_sqrtf(1.0f - sqr(cos_a));
+  const float tan_a = sin_a / cos_a;
+  return max((1.0f - (tan_spread * tan_a)) * normalize_spread, 0.0f);
+}
+
+/* Compute subset of area light that actually has an influence on the shading point, to
+ * reduce noise with low spread. */
+ccl_device bool light_spread_clamp_area_light(const float3 P,
+                                              const float3 lightNg,
+                                              ccl_private float3 *lightP,
+                                              ccl_private float3 *axisu,
+                                              ccl_private float3 *axisv,
+                                              const float tan_spread)
+{
+  /* Closest point in area light plane and distance to that plane. */
+  const float3 closest_P = P - dot(lightNg, P - *lightP) * lightNg;
+  const float t = len(closest_P - P);
+
+  /* Radius of circle on area light that actually affects the shading point. */
+  const float radius = t / tan_spread;
+
+  /* TODO: would be faster to store as normalized vector + length, also in rect_light_sample. */
+  float len_u, len_v;
+  const float3 u = normalize_len(*axisu, &len_u);
+  const float3 v = normalize_len(*axisv, &len_v);
+
+  /* Local uv coordinates of closest point. */
+  const float closest_u = dot(u, closest_P - *lightP);
+  const float closest_v = dot(v, closest_P - *lightP);
+
+  /* Compute rectangle encompassing the circle that affects the shading point,
+   * clamped to the bounds of the area light. */
+  const float min_u = max(closest_u - radius, -len_u * 0.5f);
+  const float max_u = min(closest_u + radius, len_u * 0.5f);
+  const float min_v = max(closest_v - radius, -len_v * 0.5f);
+  const float max_v = min(closest_v + radius, len_v * 0.5f);
+
+  /* Skip if rectangle is empty. */
+  if (min_u >= max_u || min_v >= max_v) {
+    return false;
+  }
+
+  /* Compute new area light center position and axes from rectangle in local
+   * uv coordinates. */
+  const float new_center_u = 0.5f * (min_u + max_u);
+  const float new_center_v = 0.5f * (min_v + max_v);
+  const float new_len_u = max_u - min_u;
+  const float new_len_v = max_v - min_v;
+
+  *lightP = *lightP + new_center_u * u + new_center_v * v;
+  *axisu = u * new_len_u;
+  *axisv = v * new_len_v;
+
+  return true;
+}
+
+ccl_device float lamp_light_pdf(KernelGlobals kg, const float3 Ng, const float3 I, float t)
+{
+  float cos_pi = dot(Ng, I);
+
+  if (cos_pi <= 0.0f)
+    return 0.0f;
+
+  return t * t / cos_pi;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index facbbe23d0f..746c7747569 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -17,8 +17,8 @@
 #pragma once
 
 #include "kernel/geom/geom.h"
-#include "kernel/light/light_background.h"
-#include "kernel/sample/sample_mapping.h"
+#include "kernel/light/background.h"
+#include "kernel/sample/mapping.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/light/light_background.h b/intern/cycles/kernel/light/light_background.h
deleted file mode 100644
index 78f8c94f7a3..00000000000
--- a/intern/cycles/kernel/light/light_background.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/light/light_common.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Background Light */
-
-#ifdef __BACKGROUND_MIS__
-
-ccl_device float3 background_map_sample(KernelGlobals kg,
-                                        float randu,
-                                        float randv,
-                                        ccl_private float *pdf)
-{
-  /* for the following, the CDF values are actually a pair of floats, with the
-   * function value as X and the actual CDF as Y.  The last entry's function
-   * value is the CDF total. */
-  int res_x = kernel_data.background.map_res_x;
-  int res_y = kernel_data.background.map_res_y;
-  int cdf_width = res_x + 1;
-
-  /* This is basically std::lower_bound as used by PBRT. */
-  int first = 0;
-  int count = res_y;
-
-  while (count > 0) {
-    int step = count >> 1;
-    int middle = first + step;
-
-    if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
-      first = middle + 1;
-      count -= step + 1;
-    }
-    else
-      count = step;
-  }
-
-  int index_v = max(0, first - 1);
-  kernel_assert(index_v >= 0 && index_v < res_y);
-
-  float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
-  float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
-  float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
-  /* importance-sampled V direction */
-  float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
-  float v = (index_v + dv) / res_y;
-
-  /* This is basically std::lower_bound as used by PBRT. */
-  first = 0;
-  count = res_x;
-  while (count > 0) {
-    int step = count >> 1;
-    int middle = first + step;
-
-    if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y <
-        randu) {
-      first = middle + 1;
-      count -= step + 1;
-    }
-    else
-      count = step;
-  }
-
-  int index_u = max(0, first - 1);
-  kernel_assert(index_u >= 0 && index_u < res_x);
-
-  float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
-                                  index_v * cdf_width + index_u);
-  float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf,
-                                       index_v * cdf_width + index_u + 1);
-  float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
-                                       index_v * cdf_width + res_x);
-
-  /* importance-sampled U direction */
-  float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
-  float u = (index_u + du) / res_x;
-
-  /* compute pdf */
-  float sin_theta = sinf(M_PI_F * v);
-  float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
-
-  if (sin_theta == 0.0f || denom == 0.0f)
-    *pdf = 0.0f;
-  else
-    *pdf = (cdf_u.x * cdf_v.x) / denom;
-
-  /* compute direction */
-  return equirectangular_to_direction(u, v);
-}
-
-/* TODO(sergey): Same as above, after the release we should consider using
- * 'noinline' for all devices.
- */
-ccl_device float background_map_pdf(KernelGlobals kg, float3 direction)
-{
-  float2 uv = direction_to_equirectangular(direction);
-  int res_x = kernel_data.background.map_res_x;
-  int res_y = kernel_data.background.map_res_y;
-  int cdf_width = res_x + 1;
-
-  float sin_theta = sinf(uv.y * M_PI_F);
-
-  if (sin_theta == 0.0f)
-    return 0.0f;
-
-  int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
-  int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
-
-  /* pdfs in V direction */
-  float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf,
-                                       index_v * cdf_width + res_x);
-  float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
-
-  float denom = (M_2PI_F * M_PI_F * sin_theta) * cdf_last_u.x * cdf_last_v.x;
-
-  if (denom == 0.0f)
-    return 0.0f;
-
-  /* pdfs in U direction */
-  float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf,
-                                  index_v * cdf_width + index_u);
-  float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
-
-  return (cdf_u.x * cdf_v.x) / denom;
-}
-
-ccl_device_inline bool background_portal_data_fetch_and_check_side(
-    KernelGlobals kg, float3 P, int index, ccl_private float3 *lightpos, ccl_private float3 *dir)
-{
-  int portal = kernel_data.background.portal_offset + index;
-  const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-
-  *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
-  *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
-
-  /* Check whether portal is on the right side. */
-  if (dot(*dir, P - *lightpos) > 1e-4f)
-    return true;
-
-  return false;
-}
-
-ccl_device_inline float background_portal_pdf(
-    KernelGlobals kg, float3 P, float3 direction, int ignore_portal, ccl_private bool *is_possible)
-{
-  float portal_pdf = 0.0f;
-
-  int num_possible = 0;
-  for (int p = 0; p < kernel_data.background.num_portals; p++) {
-    if (p == ignore_portal)
-      continue;
-
-    float3 lightpos, dir;
-    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
-      continue;
-
-    /* There's a portal that could be sampled from this position. */
-    if (is_possible) {
-      *is_possible = true;
-    }
-    num_possible++;
-
-    int portal = kernel_data.background.portal_offset + p;
-    const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-    float3 axisu = make_float3(
-        klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
-    float3 axisv = make_float3(
-        klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
-    bool is_round = (klight->area.invarea < 0.0f);
-
-    if (!ray_quad_intersect(P,
-                            direction,
-                            1e-4f,
-                            FLT_MAX,
-                            lightpos,
-                            axisu,
-                            axisv,
-                            dir,
-                            NULL,
-                            NULL,
-                            NULL,
-                            NULL,
-                            is_round))
-      continue;
-
-    if (is_round) {
-      float t;
-      float3 D = normalize_len(lightpos - P, &t);
-      portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
-    }
-    else {
-      portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
-    }
-  }
-
-  if (ignore_portal >= 0) {
-    /* We have skipped a portal that could be sampled as well. */
-    num_possible++;
-  }
-
-  return (num_possible > 0) ? portal_pdf / num_possible : 0.0f;
-}
-
-ccl_device int background_num_possible_portals(KernelGlobals kg, float3 P)
-{
-  int num_possible_portals = 0;
-  for (int p = 0; p < kernel_data.background.num_portals; p++) {
-    float3 lightpos, dir;
-    if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
-      num_possible_portals++;
-  }
-  return num_possible_portals;
-}
-
-ccl_device float3 background_portal_sample(KernelGlobals kg,
-                                           float3 P,
-                                           float randu,
-                                           float randv,
-                                           int num_possible,
-                                           ccl_private int *sampled_portal,
-                                           ccl_private float *pdf)
-{
-  /* Pick a portal, then re-normalize randv. */
-  randv *= num_possible;
-  int portal = (int)randv;
-  randv -= portal;
-
-  /* TODO(sergey): Some smarter way of finding portal to sample
-   * is welcome.
-   */
-  for (int p = 0; p < kernel_data.background.num_portals; p++) {
-    /* Search for the sampled portal. */
-    float3 lightpos, dir;
-    if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
-      continue;
-
-    if (portal == 0) {
-      /* p is the portal to be sampled. */
-      int portal = kernel_data.background.portal_offset + p;
-      const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
-      float3 axisu = make_float3(
-          klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
-      float3 axisv = make_float3(
-          klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
-      bool is_round = (klight->area.invarea < 0.0f);
-
-      float3 D;
-      if (is_round) {
-        lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv);
-        float t;
-        D = normalize_len(lightpos - P, &t);
-        *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
-      }
-      else {
-        *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true);
-        D = normalize(lightpos - P);
-      }
-
-      *pdf /= num_possible;
-      *sampled_portal = p;
-      return D;
-    }
-
-    portal--;
-  }
-
-  return zero_float3();
-}
-
-ccl_device_inline float3 background_sun_sample(KernelGlobals kg,
-                                               float randu,
-                                               float randv,
-                                               ccl_private float *pdf)
-{
-  float3 D;
-  const float3 N = float4_to_float3(kernel_data.background.sun);
-  const float angle = kernel_data.background.sun.w;
-  sample_uniform_cone(N, angle, randu, randv, &D, pdf);
-  return D;
-}
-
-ccl_device_inline float background_sun_pdf(KernelGlobals kg, float3 D)
-{
-  const float3 N = float4_to_float3(kernel_data.background.sun);
-  const float angle = kernel_data.background.sun.w;
-  return pdf_uniform_cone(N, D, angle);
-}
-
-ccl_device_inline float3 background_light_sample(
-    KernelGlobals kg, float3 P, float randu, float randv, ccl_private float *pdf)
-{
-  float portal_method_pdf = kernel_data.background.portal_weight;
-  float sun_method_pdf = kernel_data.background.sun_weight;
-  float map_method_pdf = kernel_data.background.map_weight;
-
-  int num_portals = 0;
-  if (portal_method_pdf > 0.0f) {
-    /* Check if there are portals in the scene which we can sample. */
-    num_portals = background_num_possible_portals(kg, P);
-    if (num_portals == 0) {
-      portal_method_pdf = 0.0f;
-    }
-  }
-
-  float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
-  if (pdf_fac == 0.0f) {
-    /* Use uniform as a fallback if we can't use any strategy. */
-    *pdf = 1.0f / M_4PI_F;
-    return sample_uniform_sphere(randu, randv);
-  }
-
-  pdf_fac = 1.0f / pdf_fac;
-  portal_method_pdf *= pdf_fac;
-  sun_method_pdf *= pdf_fac;
-  map_method_pdf *= pdf_fac;
-
-  /* We have 100% in total and split it between the three categories.
-   * Therefore, we pick portals if randu is between 0 and portal_method_pdf,
-   * sun if randu is between portal_method_pdf and (portal_method_pdf + sun_method_pdf)
-   * and map if randu is between (portal_method_pdf + sun_method_pdf) and 1. */
-  float sun_method_cdf = portal_method_pdf + sun_method_pdf;
-
-  int method = 0;
-  float3 D;
-  if (randu < portal_method_pdf) {
-    method = 0;
-    /* Rescale randu. */
-    if (portal_method_pdf != 1.0f) {
-      randu /= portal_method_pdf;
-    }
-
-    /* Sample a portal. */
-    int portal;
-    D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
-    if (num_portals > 1) {
-      /* Ignore the chosen portal, its pdf is already included. */
-      *pdf += background_portal_pdf(kg, P, D, portal, NULL);
-    }
-
-    /* Skip MIS if this is the only method. */
-    if (portal_method_pdf == 1.0f) {
-      return D;
-    }
-    *pdf *= portal_method_pdf;
-  }
-  else if (randu < sun_method_cdf) {
-    method = 1;
-    /* Rescale randu. */
-    if (sun_method_pdf != 1.0f) {
-      randu = (randu - portal_method_pdf) / sun_method_pdf;
-    }
-
-    D = background_sun_sample(kg, randu, randv, pdf);
-
-    /* Skip MIS if this is the only method. */
-    if (sun_method_pdf == 1.0f) {
-      return D;
-    }
-    *pdf *= sun_method_pdf;
-  }
-  else {
-    method = 2;
-    /* Rescale randu. */
-    if (map_method_pdf != 1.0f) {
-      randu = (randu - sun_method_cdf) / map_method_pdf;
-    }
-
-    D = background_map_sample(kg, randu, randv, pdf);
-
-    /* Skip MIS if this is the only method. */
-    if (map_method_pdf == 1.0f) {
-      return D;
-    }
-    *pdf *= map_method_pdf;
-  }
-
-  /* MIS weighting. */
-  if (method != 0 && portal_method_pdf != 0.0f) {
-    *pdf += portal_method_pdf * background_portal_pdf(kg, P, D, -1, NULL);
-  }
-  if (method != 1 && sun_method_pdf != 0.0f) {
-    *pdf += sun_method_pdf * background_sun_pdf(kg, D);
-  }
-  if (method != 2 && map_method_pdf != 0.0f) {
-    *pdf += map_method_pdf * background_map_pdf(kg, D);
-  }
-  return D;
-}
-
-ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 direction)
-{
-  float portal_method_pdf = kernel_data.background.portal_weight;
-  float sun_method_pdf = kernel_data.background.sun_weight;
-  float map_method_pdf = kernel_data.background.map_weight;
-
-  float portal_pdf = 0.0f;
-  /* Portals are a special case here since we need to compute their pdf in order
-   * to find out if we can sample them. */
-  if (portal_method_pdf > 0.0f) {
-    /* Evaluate PDF of sampling this direction by portal sampling. */
-    bool is_possible = false;
-    portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible);
-    if (!is_possible) {
-      /* Portal sampling is not possible here because all portals point to the wrong side.
-       * If other methods can be used instead, do so, otherwise uniform sampling is used as a
-       * fallback. */
-      portal_method_pdf = 0.0f;
-    }
-  }
-
-  float pdf_fac = (portal_method_pdf + sun_method_pdf + map_method_pdf);
-  if (pdf_fac == 0.0f) {
-    /* Use uniform as a fallback if we can't use any strategy. */
-    return kernel_data.integrator.pdf_lights / M_4PI_F;
-  }
-
-  pdf_fac = 1.0f / pdf_fac;
-  portal_method_pdf *= pdf_fac;
-  sun_method_pdf *= pdf_fac;
-  map_method_pdf *= pdf_fac;
-
-  float pdf = portal_pdf * portal_method_pdf;
-  if (sun_method_pdf != 0.0f) {
-    pdf += background_sun_pdf(kg, direction) * sun_method_pdf;
-  }
-  if (map_method_pdf != 0.0f) {
-    pdf += background_map_pdf(kg, direction) * map_method_pdf;
-  }
-
-  return pdf * kernel_data.integrator.pdf_lights;
-}
-
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/light_common.h b/intern/cycles/kernel/light/light_common.h
deleted file mode 100644
index 207e89090cc..00000000000
--- a/intern/cycles/kernel/light/light_common.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/sample/sample_mapping.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Area light sampling */
-
-/* Uses the following paper:
- *
- * Carlos Urena et al.
- * An Area-Preserving Parametrization for Spherical Rectangles.
- *
- * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
- *
- * Note: light_p is modified when sample_coord is true.
- */
-ccl_device_inline float rect_light_sample(float3 P,
-                                          ccl_private float3 *light_p,
-                                          float3 axisu,
-                                          float3 axisv,
-                                          float randu,
-                                          float randv,
-                                          bool sample_coord)
-{
-  /* In our name system we're using P for the center,
-   * which is o in the paper.
-   */
-
-  float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
-  float axisu_len, axisv_len;
-  /* Compute local reference system R. */
-  float3 x = normalize_len(axisu, &axisu_len);
-  float3 y = normalize_len(axisv, &axisv_len);
-  float3 z = cross(x, y);
-  /* Compute rectangle coords in local reference system. */
-  float3 dir = corner - P;
-  float z0 = dot(dir, z);
-  /* Flip 'z' to make it point against Q. */
-  if (z0 > 0.0f) {
-    z *= -1.0f;
-    z0 *= -1.0f;
-  }
-  float x0 = dot(dir, x);
-  float y0 = dot(dir, y);
-  float x1 = x0 + axisu_len;
-  float y1 = y0 + axisv_len;
-  /* Compute internal angles (gamma_i). */
-  float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
-  float4 nz = make_float4(y0, x1, y1, x0) * diff;
-  nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz);
-  float g0 = safe_acosf(-nz.x * nz.y);
-  float g1 = safe_acosf(-nz.y * nz.z);
-  float g2 = safe_acosf(-nz.z * nz.w);
-  float g3 = safe_acosf(-nz.w * nz.x);
-  /* Compute predefined constants. */
-  float b0 = nz.x;
-  float b1 = nz.z;
-  float b0sq = b0 * b0;
-  float k = M_2PI_F - g2 - g3;
-  /* Compute solid angle from internal angles. */
-  float S = g0 + g1 - k;
-
-  if (sample_coord) {
-    /* Compute cu. */
-    float au = randu * S + k;
-    float fu = (cosf(au) * b0 - b1) / sinf(au);
-    float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
-    cu = clamp(cu, -1.0f, 1.0f);
-    /* Compute xu. */
-    float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
-    xu = clamp(xu, x0, x1);
-    /* Compute yv. */
-    float z0sq = z0 * z0;
-    float y0sq = y0 * y0;
-    float y1sq = y1 * y1;
-    float d = sqrtf(xu * xu + z0sq);
-    float h0 = y0 / sqrtf(d * d + y0sq);
-    float h1 = y1 / sqrtf(d * d + y1sq);
-    float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
-    float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
-
-    /* Transform (xu, yv, z0) to world coords. */
-    *light_p = P + xu * x + yv * y + z0 * z;
-  }
-
-  /* return pdf */
-  if (S != 0.0f)
-    return 1.0f / S;
-  else
-    return 0.0f;
-}
-
-ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
-{
-  to_unit_disk(&randu, &randv);
-  return ru * randu + rv * randv;
-}
-
-ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
-{
-  float3 ru, rv;
-
-  make_orthonormals(v, &ru, &rv);
-
-  return ellipse_sample(ru, rv, randu, randv);
-}
-
-ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
-{
-  return normalize(D + disk_light_sample(D, randu, randv) * radius);
-}
-
-ccl_device float3
-sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
-{
-  return disk_light_sample(normalize(P - center), randu, randv) * radius;
-}
-
-ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, float3 N)
-{
-  float attenuation = dot(dir, N);
-
-  if (attenuation <= spot_angle) {
-    attenuation = 0.0f;
-  }
-  else {
-    float t = attenuation - spot_angle;
-
-    if (t < spot_smooth && spot_smooth != 0.0f)
-      attenuation *= smoothstepf(t / spot_smooth);
-  }
-
-  return attenuation;
-}
-
-ccl_device float light_spread_attenuation(const float3 D,
-                                          const float3 lightNg,
-                                          const float tan_spread,
-                                          const float normalize_spread)
-{
-  /* Model a soft-box grid, computing the ratio of light not hidden by the
-   * slats of the grid at a given angle. (see D10594). */
-  const float cos_a = -dot(D, lightNg);
-  const float sin_a = safe_sqrtf(1.0f - sqr(cos_a));
-  const float tan_a = sin_a / cos_a;
-  return max((1.0f - (tan_spread * tan_a)) * normalize_spread, 0.0f);
-}
-
-/* Compute subset of area light that actually has an influence on the shading point, to
- * reduce noise with low spread. */
-ccl_device bool light_spread_clamp_area_light(const float3 P,
-                                              const float3 lightNg,
-                                              ccl_private float3 *lightP,
-                                              ccl_private float3 *axisu,
-                                              ccl_private float3 *axisv,
-                                              const float tan_spread)
-{
-  /* Closest point in area light plane and distance to that plane. */
-  const float3 closest_P = P - dot(lightNg, P - *lightP) * lightNg;
-  const float t = len(closest_P - P);
-
-  /* Radius of circle on area light that actually affects the shading point. */
-  const float radius = t / tan_spread;
-
-  /* TODO: would be faster to store as normalized vector + length, also in rect_light_sample. */
-  float len_u, len_v;
-  const float3 u = normalize_len(*axisu, &len_u);
-  const float3 v = normalize_len(*axisv, &len_v);
-
-  /* Local uv coordinates of closest point. */
-  const float closest_u = dot(u, closest_P - *lightP);
-  const float closest_v = dot(v, closest_P - *lightP);
-
-  /* Compute rectangle encompassing the circle that affects the shading point,
-   * clamped to the bounds of the area light. */
-  const float min_u = max(closest_u - radius, -len_u * 0.5f);
-  const float max_u = min(closest_u + radius, len_u * 0.5f);
-  const float min_v = max(closest_v - radius, -len_v * 0.5f);
-  const float max_v = min(closest_v + radius, len_v * 0.5f);
-
-  /* Skip if rectangle is empty. */
-  if (min_u >= max_u || min_v >= max_v) {
-    return false;
-  }
-
-  /* Compute new area light center position and axes from rectangle in local
-   * uv coordinates. */
-  const float new_center_u = 0.5f * (min_u + max_u);
-  const float new_center_v = 0.5f * (min_v + max_v);
-  const float new_len_u = max_u - min_u;
-  const float new_len_v = max_v - min_v;
-
-  *lightP = *lightP + new_center_u * u + new_center_v * v;
-  *axisu = u * new_len_u;
-  *axisv = v * new_len_v;
-
-  return true;
-}
-
-ccl_device float lamp_light_pdf(KernelGlobals kg, const float3 Ng, const float3 I, float t)
-{
-  float cos_pi = dot(Ng, I);
-
-  if (cos_pi <= 0.0f)
-    return 0.0f;
-
-  return t * t / cos_pi;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/light_sample.h b/intern/cycles/kernel/light/light_sample.h
deleted file mode 100644
index 4ae5d9e1944..00000000000
--- a/intern/cycles/kernel/light/light_sample.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-
-#include "kernel/light/light.h"
-
-#include "kernel/sample/sample_mapping.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Evaluate shader on light. */
-ccl_device_noinline_cpu float3
-light_sample_shader_eval(KernelGlobals kg,
-                         IntegratorState state,
-                         ccl_private ShaderData *ccl_restrict emission_sd,
-                         ccl_private LightSample *ccl_restrict ls,
-                         float time)
-{
-  /* setup shading at emitter */
-  float3 eval = zero_float3();
-
-  if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
-    if ((ls->prim != PRIM_NONE) && dot(ls->Ng, ls->D) > 0.0f) {
-      ls->Ng = -ls->Ng;
-    }
-  }
-  else {
-    /* Setup shader data and call shader_eval_surface once, better
-     * for GPU coherence and compile times. */
-    PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
-#ifdef __BACKGROUND_MIS__
-    if (ls->type == LIGHT_BACKGROUND) {
-      shader_setup_from_background(kg, emission_sd, ls->P, ls->D, time);
-    }
-    else
-#endif
-    {
-      shader_setup_from_sample(kg,
-                               emission_sd,
-                               ls->P,
-                               ls->Ng,
-                               -ls->D,
-                               ls->shader,
-                               ls->object,
-                               ls->prim,
-                               ls->u,
-                               ls->v,
-                               ls->t,
-                               time,
-                               false,
-                               ls->lamp);
-
-      ls->Ng = emission_sd->Ng;
-    }
-
-    PROFILING_SHADER(emission_sd->object, emission_sd->shader);
-    PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
-
-    /* No proper path flag, we're evaluating this for all closures. that's
-     * weak but we'd have to do multiple evaluations otherwise. */
-    shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
-        kg, state, emission_sd, NULL, PATH_RAY_EMISSION);
-
-    /* Evaluate closures. */
-#ifdef __BACKGROUND_MIS__
-    if (ls->type == LIGHT_BACKGROUND) {
-      eval = shader_background_eval(emission_sd);
-    }
-    else
-#endif
-    {
-      eval = shader_emissive_eval(emission_sd);
-    }
-  }
-
-  eval *= ls->eval_fac;
-
-  if (ls->lamp != LAMP_NONE) {
-    ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
-    eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]);
-  }
-
-  return eval;
-}
-
-/* Test if light sample is from a light or emission from geometry. */
-ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_restrict ls)
-{
-  /* return if it's a lamp for shadow pass */
-  return (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
-}
-
-/* Early path termination of shadow rays. */
-ccl_device_inline bool light_sample_terminate(KernelGlobals kg,
-                                              ccl_private const LightSample *ccl_restrict ls,
-                                              ccl_private BsdfEval *ccl_restrict eval,
-                                              const float rand_terminate)
-{
-  if (bsdf_eval_is_zero(eval)) {
-    return true;
-  }
-
-  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-    float probability = max3(fabs(bsdf_eval_sum(eval))) *
-                        kernel_data.integrator.light_inv_rr_threshold;
-    if (probability < 1.0f) {
-      if (rand_terminate >= probability) {
-        return true;
-      }
-      bsdf_eval_mul(eval, 1.0f / probability);
-    }
-  }
-
-  return false;
-}
-
-/* This function should be used to compute a modified ray start position for
- * rays leaving from a surface. The algorithm slightly distorts flat surface
- * of a triangle. Surface is lifted by amount h along normal n in the incident
- * point. */
-
-ccl_device_inline float3 shadow_ray_smooth_surface_offset(
-    KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, float3 Ng)
-{
-  float3 V[3], N[3];
-  triangle_vertices_and_normals(kg, sd->prim, V, N);
-
-  const float u = sd->u, v = sd->v;
-  const float w = 1 - u - v;
-  float3 P = V[0] * u + V[1] * v + V[2] * w; /* Local space */
-  float3 n = N[0] * u + N[1] * v + N[2] * w; /* We get away without normalization */
-
-  object_normal_transform(kg, sd, &n); /* Normal x scale, world space */
-
-  /* Parabolic approximation */
-  float a = dot(N[2] - N[0], V[0] - V[2]);
-  float b = dot(N[2] - N[1], V[1] - V[2]);
-  float c = dot(N[1] - N[0], V[1] - V[0]);
-  float h = a * u * (u - 1) + (a + b + c) * u * v + b * v * (v - 1);
-
-  /* Check flipped normals */
-  if (dot(n, Ng) > 0) {
-    /* Local linear envelope */
-    float h0 = max(max(dot(V[1] - V[0], N[0]), dot(V[2] - V[0], N[0])), 0.0f);
-    float h1 = max(max(dot(V[0] - V[1], N[1]), dot(V[2] - V[1], N[1])), 0.0f);
-    float h2 = max(max(dot(V[0] - V[2], N[2]), dot(V[1] - V[2], N[2])), 0.0f);
-    h0 = max(dot(V[0] - P, N[0]) + h0, 0.0f);
-    h1 = max(dot(V[1] - P, N[1]) + h1, 0.0f);
-    h2 = max(dot(V[2] - P, N[2]) + h2, 0.0f);
-    h = max(min(min(h0, h1), h2), h * 0.5f);
-  }
-  else {
-    float h0 = max(max(dot(V[0] - V[1], N[0]), dot(V[0] - V[2], N[0])), 0.0f);
-    float h1 = max(max(dot(V[1] - V[0], N[1]), dot(V[1] - V[2], N[1])), 0.0f);
-    float h2 = max(max(dot(V[2] - V[0], N[2]), dot(V[2] - V[1], N[2])), 0.0f);
-    h0 = max(dot(P - V[0], N[0]) + h0, 0.0f);
-    h1 = max(dot(P - V[1], N[1]) + h1, 0.0f);
-    h2 = max(dot(P - V[2], N[2]) + h2, 0.0f);
-    h = min(-min(min(h0, h1), h2), h * 0.5f);
-  }
-
-  return n * h;
-}
-
-/* Ray offset to avoid shadow terminator artifact. */
-
-ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg,
-                                           ccl_private const ShaderData *ccl_restrict sd,
-                                           float3 L)
-{
-  float NL = dot(sd->N, L);
-  bool transmit = (NL < 0.0f);
-  float3 Ng = (transmit ? -sd->Ng : sd->Ng);
-  float3 P = ray_offset(sd->P, Ng);
-
-  if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && (sd->shader & SHADER_SMOOTH_NORMAL)) {
-    const float offset_cutoff =
-        kernel_tex_fetch(__objects, sd->object).shadow_terminator_geometry_offset;
-    /* Do ray offset (heavy stuff) only for close to be terminated triangles:
-     * offset_cutoff = 0.1f means that 10-20% of rays will be affected. Also
-     * make a smooth transition near the threshold. */
-    if (offset_cutoff > 0.0f) {
-      float NgL = dot(Ng, L);
-      float offset_amount = 0.0f;
-      if (NL < offset_cutoff) {
-        offset_amount = clamp(2.0f - (NgL + NL) / offset_cutoff, 0.0f, 1.0f);
-      }
-      else {
-        offset_amount = clamp(1.0f - NgL / offset_cutoff, 0.0f, 1.0f);
-      }
-      if (offset_amount > 0.0f) {
-        P += shadow_ray_smooth_surface_offset(kg, sd, Ng) * offset_amount;
-      }
-    }
-  }
-
-  return P;
-}
-
-ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restrict sd,
-                                        ccl_private const LightSample *ccl_restrict ls,
-                                        const float3 P,
-                                        ccl_private Ray *ray)
-{
-  if (ls->shader & SHADER_CAST_SHADOW) {
-    /* setup ray */
-    ray->P = P;
-
-    if (ls->t == FLT_MAX) {
-      /* distant light */
-      ray->D = ls->D;
-      ray->t = ls->t;
-    }
-    else {
-      /* other lights, avoid self-intersection */
-      ray->D = ray_offset(ls->P, ls->Ng) - P;
-      ray->D = normalize_len(ray->D, &ray->t);
-    }
-  }
-  else {
-    /* signal to not cast shadow ray */
-    ray->P = zero_float3();
-    ray->D = zero_float3();
-    ray->t = 0.0f;
-  }
-
-  ray->dP = differential_make_compact(sd->dP);
-  ray->dD = differential_zero_compact();
-  ray->time = sd->time;
-}
-
-/* Create shadow ray towards light sample. */
-ccl_device_inline void light_sample_to_surface_shadow_ray(
-    KernelGlobals kg,
-    ccl_private const ShaderData *ccl_restrict sd,
-    ccl_private const LightSample *ccl_restrict ls,
-    ccl_private Ray *ray)
-{
-  const float3 P = shadow_ray_offset(kg, sd, ls->D);
-  shadow_ray_setup(sd, ls, P, ray);
-}
-
-/* Create shadow ray towards light sample. */
-ccl_device_inline void light_sample_to_volume_shadow_ray(
-    KernelGlobals kg,
-    ccl_private const ShaderData *ccl_restrict sd,
-    ccl_private const LightSample *ccl_restrict ls,
-    const float3 P,
-    ccl_private Ray *ray)
-{
-  shadow_ray_setup(sd, ls, P, ray);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
new file mode 100644
index 00000000000..6b643a95250
--- /dev/null
+++ b/intern/cycles/kernel/light/sample.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shader_eval.h"
+
+#include "kernel/light/light.h"
+
+#include "kernel/sample/mapping.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Evaluate shader on light. */
+ccl_device_noinline_cpu float3
+light_sample_shader_eval(KernelGlobals kg,
+                         IntegratorState state,
+                         ccl_private ShaderData *ccl_restrict emission_sd,
+                         ccl_private LightSample *ccl_restrict ls,
+                         float time)
+{
+  /* setup shading at emitter */
+  float3 eval = zero_float3();
+
+  if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
+    if ((ls->prim != PRIM_NONE) && dot(ls->Ng, ls->D) > 0.0f) {
+      ls->Ng = -ls->Ng;
+    }
+  }
+  else {
+    /* Setup shader data and call shader_eval_surface once, better
+     * for GPU coherence and compile times. */
+    PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
+#ifdef __BACKGROUND_MIS__
+    if (ls->type == LIGHT_BACKGROUND) {
+      shader_setup_from_background(kg, emission_sd, ls->P, ls->D, time);
+    }
+    else
+#endif
+    {
+      shader_setup_from_sample(kg,
+                               emission_sd,
+                               ls->P,
+                               ls->Ng,
+                               -ls->D,
+                               ls->shader,
+                               ls->object,
+                               ls->prim,
+                               ls->u,
+                               ls->v,
+                               ls->t,
+                               time,
+                               false,
+                               ls->lamp);
+
+      ls->Ng = emission_sd->Ng;
+    }
+
+    PROFILING_SHADER(emission_sd->object, emission_sd->shader);
+    PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
+
+    /* No proper path flag, we're evaluating this for all closures. that's
+     * weak but we'd have to do multiple evaluations otherwise. */
+    shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
+        kg, state, emission_sd, NULL, PATH_RAY_EMISSION);
+
+    /* Evaluate closures. */
+#ifdef __BACKGROUND_MIS__
+    if (ls->type == LIGHT_BACKGROUND) {
+      eval = shader_background_eval(emission_sd);
+    }
+    else
+#endif
+    {
+      eval = shader_emissive_eval(emission_sd);
+    }
+  }
+
+  eval *= ls->eval_fac;
+
+  if (ls->lamp != LAMP_NONE) {
+    ccl_global const KernelLight *klight = &kernel_tex_fetch(__lights, ls->lamp);
+    eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]);
+  }
+
+  return eval;
+}
+
+/* Test if light sample is from a light or emission from geometry. */
+ccl_device_inline bool light_sample_is_light(ccl_private const LightSample *ccl_restrict ls)
+{
+  /* return if it's a lamp for shadow pass */
+  return (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND);
+}
+
+/* Early path termination of shadow rays. */
+ccl_device_inline bool light_sample_terminate(KernelGlobals kg,
+                                              ccl_private const LightSample *ccl_restrict ls,
+                                              ccl_private BsdfEval *ccl_restrict eval,
+                                              const float rand_terminate)
+{
+  if (bsdf_eval_is_zero(eval)) {
+    return true;
+  }
+
+  if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
+    float probability = max3(fabs(bsdf_eval_sum(eval))) *
+                        kernel_data.integrator.light_inv_rr_threshold;
+    if (probability < 1.0f) {
+      if (rand_terminate >= probability) {
+        return true;
+      }
+      bsdf_eval_mul(eval, 1.0f / probability);
+    }
+  }
+
+  return false;
+}
+
+/* This function should be used to compute a modified ray start position for
+ * rays leaving from a surface. The algorithm slightly distorts flat surface
+ * of a triangle. Surface is lifted by amount h along normal n in the incident
+ * point. */
+
+ccl_device_inline float3 shadow_ray_smooth_surface_offset(
+    KernelGlobals kg, ccl_private const ShaderData *ccl_restrict sd, float3 Ng)
+{
+  float3 V[3], N[3];
+  triangle_vertices_and_normals(kg, sd->prim, V, N);
+
+  const float u = sd->u, v = sd->v;
+  const float w = 1 - u - v;
+  float3 P = V[0] * u + V[1] * v + V[2] * w; /* Local space */
+  float3 n = N[0] * u + N[1] * v + N[2] * w; /* We get away without normalization */
+
+  object_normal_transform(kg, sd, &n); /* Normal x scale, world space */
+
+  /* Parabolic approximation */
+  float a = dot(N[2] - N[0], V[0] - V[2]);
+  float b = dot(N[2] - N[1], V[1] - V[2]);
+  float c = dot(N[1] - N[0], V[1] - V[0]);
+  float h = a * u * (u - 1) + (a + b + c) * u * v + b * v * (v - 1);
+
+  /* Check flipped normals */
+  if (dot(n, Ng) > 0) {
+    /* Local linear envelope */
+    float h0 = max(max(dot(V[1] - V[0], N[0]), dot(V[2] - V[0], N[0])), 0.0f);
+    float h1 = max(max(dot(V[0] - V[1], N[1]), dot(V[2] - V[1], N[1])), 0.0f);
+    float h2 = max(max(dot(V[0] - V[2], N[2]), dot(V[1] - V[2], N[2])), 0.0f);
+    h0 = max(dot(V[0] - P, N[0]) + h0, 0.0f);
+    h1 = max(dot(V[1] - P, N[1]) + h1, 0.0f);
+    h2 = max(dot(V[2] - P, N[2]) + h2, 0.0f);
+    h = max(min(min(h0, h1), h2), h * 0.5f);
+  }
+  else {
+    float h0 = max(max(dot(V[0] - V[1], N[0]), dot(V[0] - V[2], N[0])), 0.0f);
+    float h1 = max(max(dot(V[1] - V[0], N[1]), dot(V[1] - V[2], N[1])), 0.0f);
+    float h2 = max(max(dot(V[2] - V[0], N[2]), dot(V[2] - V[1], N[2])), 0.0f);
+    h0 = max(dot(P - V[0], N[0]) + h0, 0.0f);
+    h1 = max(dot(P - V[1], N[1]) + h1, 0.0f);
+    h2 = max(dot(P - V[2], N[2]) + h2, 0.0f);
+    h = min(-min(min(h0, h1), h2), h * 0.5f);
+  }
+
+  return n * h;
+}
+
+/* Ray offset to avoid shadow terminator artifact. */
+
+ccl_device_inline float3 shadow_ray_offset(KernelGlobals kg,
+                                           ccl_private const ShaderData *ccl_restrict sd,
+                                           float3 L)
+{
+  float NL = dot(sd->N, L);
+  bool transmit = (NL < 0.0f);
+  float3 Ng = (transmit ? -sd->Ng : sd->Ng);
+  float3 P = ray_offset(sd->P, Ng);
+
+  if ((sd->type & PRIMITIVE_ALL_TRIANGLE) && (sd->shader & SHADER_SMOOTH_NORMAL)) {
+    const float offset_cutoff =
+        kernel_tex_fetch(__objects, sd->object).shadow_terminator_geometry_offset;
+    /* Do ray offset (heavy stuff) only for close to be terminated triangles:
+     * offset_cutoff = 0.1f means that 10-20% of rays will be affected. Also
+     * make a smooth transition near the threshold. */
+    if (offset_cutoff > 0.0f) {
+      float NgL = dot(Ng, L);
+      float offset_amount = 0.0f;
+      if (NL < offset_cutoff) {
+        offset_amount = clamp(2.0f - (NgL + NL) / offset_cutoff, 0.0f, 1.0f);
+      }
+      else {
+        offset_amount = clamp(1.0f - NgL / offset_cutoff, 0.0f, 1.0f);
+      }
+      if (offset_amount > 0.0f) {
+        P += shadow_ray_smooth_surface_offset(kg, sd, Ng) * offset_amount;
+      }
+    }
+  }
+
+  return P;
+}
+
+ccl_device_inline void shadow_ray_setup(ccl_private const ShaderData *ccl_restrict sd,
+                                        ccl_private const LightSample *ccl_restrict ls,
+                                        const float3 P,
+                                        ccl_private Ray *ray)
+{
+  if (ls->shader & SHADER_CAST_SHADOW) {
+    /* setup ray */
+    ray->P = P;
+
+    if (ls->t == FLT_MAX) {
+      /* distant light */
+      ray->D = ls->D;
+      ray->t = ls->t;
+    }
+    else {
+      /* other lights, avoid self-intersection */
+      ray->D = ray_offset(ls->P, ls->Ng) - P;
+      ray->D = normalize_len(ray->D, &ray->t);
+    }
+  }
+  else {
+    /* signal to not cast shadow ray */
+    ray->P = zero_float3();
+    ray->D = zero_float3();
+    ray->t = 0.0f;
+  }
+
+  ray->dP = differential_make_compact(sd->dP);
+  ray->dD = differential_zero_compact();
+  ray->time = sd->time;
+}
+
+/* Create shadow ray towards light sample. */
+ccl_device_inline void light_sample_to_surface_shadow_ray(
+    KernelGlobals kg,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const LightSample *ccl_restrict ls,
+    ccl_private Ray *ray)
+{
+  const float3 P = shadow_ray_offset(kg, sd, ls->D);
+  shadow_ray_setup(sd, ls, P, ray);
+}
+
+/* Create shadow ray towards light sample. */
+ccl_device_inline void light_sample_to_volume_shadow_ray(
+    KernelGlobals kg,
+    ccl_private const ShaderData *ccl_restrict sd,
+    ccl_private const LightSample *ccl_restrict ls,
+    const float3 P,
+    ccl_private Ray *ray)
+{
+  shadow_ray_setup(sd, ls, P, ray);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 7be1b7129e0..f226c95766f 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -25,17 +25,17 @@ set(SRC
   bsdf_diffuse_ramp.cpp
   bsdf_phong_ramp.cpp
   emissive.cpp
-  osl_bssrdf.cpp
-  osl_closures.cpp
-  osl_services.cpp
-  osl_shader.cpp
+  bssrdf.cpp
+  closures.cpp
+  services.cpp
+  shader.cpp
 )
 
 set(HEADER_SRC
-  osl_closures.h
-  osl_globals.h
-  osl_services.h
-  osl_shader.h
+  closures.h
+  globals.h
+  services.h
+  shader.h
 )
 
 set(LIB
diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp
index bb290a5ced2..540180f99e8 100644
--- a/intern/cycles/kernel/osl/background.cpp
+++ b/intern/cycles/kernel/osl/background.cpp
@@ -34,7 +34,7 @@
 
 #include <OSL/genclosure.h>
 
-#include "kernel/osl/osl_closures.h"
+#include "kernel/osl/closures.h"
 
 // clang-format off
 #include "kernel/device/cpu/compat.h"
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
index 2ec7f14c0fa..768531a0bf9 100644
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
@@ -35,10 +35,10 @@
 #include <OSL/genclosure.h>
 
 #include "kernel/device/cpu/compat.h"
-#include "kernel/osl/osl_closures.h"
+#include "kernel/osl/closures.h"
 
 // clang-format off
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/bsdf_diffuse_ramp.h"
 // clang-format on
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
index 90160fba962..d34a33216a0 100644
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
@@ -35,10 +35,10 @@
 #include <OSL/genclosure.h>
 
 #include "kernel/device/cpu/compat.h"
-#include "kernel/osl/osl_closures.h"
+#include "kernel/osl/closures.h"
 
 // clang-format off
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/bsdf_phong_ramp.h"
 // clang-format on
diff --git a/intern/cycles/kernel/osl/bssrdf.cpp b/intern/cycles/kernel/osl/bssrdf.cpp
new file mode 100644
index 00000000000..7c7f1ce157f
--- /dev/null
+++ b/intern/cycles/kernel/osl/bssrdf.cpp
@@ -0,0 +1,122 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <OSL/genclosure.h>
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/osl/closures.h"
+
+// clang-format off
+#include "kernel/types.h"
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/bsdf_diffuse.h"
+#include "kernel/closure/bsdf_principled_diffuse.h"
+#include "kernel/closure/bssrdf.h"
+// clang-format on
+
+CCL_NAMESPACE_BEGIN
+
+using namespace OSL;
+
+static ustring u_burley("burley");
+static ustring u_random_walk_fixed_radius("random_walk_fixed_radius");
+static ustring u_random_walk("random_walk");
+
+class CBSSRDFClosure : public CClosurePrimitive {
+ public:
+  Bssrdf params;
+  float ior;
+  ustring method;
+
+  CBSSRDFClosure()
+  {
+    params.roughness = FLT_MAX;
+    params.anisotropy = 1.0f;
+    ior = 1.4f;
+  }
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    if (method == u_burley) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
+    }
+    else if (method == u_random_walk_fixed_radius) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
+    }
+    else if (method == u_random_walk) {
+      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
+    }
+  }
+
+  void alloc(ShaderData *sd, uint32_t path_flag, float3 weight, ClosureType type)
+  {
+    Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+
+    if (bssrdf) {
+      /* disable in case of diffuse ancestor, can't see it well then and
+       * adds considerably noise due to probabilities of continuing path
+       * getting lower and lower */
+      if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+        params.radius = make_float3(0.0f, 0.0f, 0.0f);
+      }
+
+      /* create one closure per color channel */
+      bssrdf->radius = params.radius;
+      bssrdf->albedo = params.albedo;
+      bssrdf->N = params.N;
+      bssrdf->roughness = params.roughness;
+      bssrdf->anisotropy = clamp(params.anisotropy, 0.0f, 0.9f);
+      sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, clamp(ior, 1.01f, 3.8f));
+    }
+  }
+};
+
+ClosureParam *closure_bssrdf_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
+      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
+      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
+      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
+      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, ior, "ior"),
+      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.anisotropy, "anisotropy"),
+      CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(CBSSRDFClosure)};
+  return params;
+}
+
+CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure)
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/closures.cpp b/intern/cycles/kernel/osl/closures.cpp
new file mode 100644
index 00000000000..adc0f50aefb
--- /dev/null
+++ b/intern/cycles/kernel/osl/closures.cpp
@@ -0,0 +1,1006 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011-2018, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <OSL/genclosure.h>
+#include <OSL/oslclosure.h>
+
+#include "kernel/osl/closures.h"
+#include "kernel/osl/shader.h"
+
+#include "util/math.h"
+#include "util/param.h"
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/types.h"
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/bsdf_ashikhmin_velvet.h"
+#include "kernel/closure/bsdf_diffuse.h"
+#include "kernel/closure/bsdf_microfacet.h"
+#include "kernel/closure/bsdf_microfacet_multi.h"
+#include "kernel/closure/bsdf_oren_nayar.h"
+#include "kernel/closure/bsdf_reflection.h"
+#include "kernel/closure/bsdf_refraction.h"
+#include "kernel/closure/bsdf_transparent.h"
+#include "kernel/closure/bsdf_ashikhmin_shirley.h"
+#include "kernel/closure/bsdf_toon.h"
+#include "kernel/closure/bsdf_hair.h"
+#include "kernel/closure/bsdf_hair_principled.h"
+#include "kernel/closure/bsdf_principled_diffuse.h"
+#include "kernel/closure/bsdf_principled_sheen.h"
+#include "kernel/closure/volume.h"
+// clang-format on
+
+CCL_NAMESPACE_BEGIN
+
+using namespace OSL;
+
+/* BSDF class definitions */
+
+BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE)
+  BSDF_CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N)
+BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
+
+BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
+  BSDF_CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N)
+BSDF_CLOSURE_CLASS_END(Translucent, translucent)
+
+BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
+  BSDF_CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness)
+BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
+
+BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
+  BSDF_CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N)
+BSDF_CLOSURE_CLASS_END(Reflection, reflection)
+
+BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
+  BSDF_CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior)
+BSDF_CLOSURE_CLASS_END(Refraction, refraction)
+
+BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
+  BSDF_CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma)
+BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
+
+BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley,
+                         ashikhmin_shirley,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_REFLECT)
+  BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N)
+  BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T)
+  BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x)
+  BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y)
+BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley)
+
+BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
+  BSDF_CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size)
+  BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth)
+BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
+
+BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
+  BSDF_CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size)
+  BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth)
+BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXIsotropic,
+                         microfacet_ggx_isotropic,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_REFLECT)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXIsotropicClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXIsotropicClosure, params.alpha_x)
+BSDF_CLOSURE_CLASS_END(MicrofacetGGXIsotropic, microfacet_ggx_isotropic)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX,
+                         microfacet_ggx,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_REFLECT)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.T)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_y)
+BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannIsotropic,
+                         microfacet_beckmann_isotropic,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_REFLECT)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannIsotropicClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannIsotropicClosure, params.alpha_x)
+BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannIsotropic, microfacet_beckmann_isotropic)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann,
+                         microfacet_beckmann,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_REFLECT)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.T)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_y)
+BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction,
+                         microfacet_ggx_refraction,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_TRANSMIT)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior)
+BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
+
+BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction,
+                         microfacet_beckmann_refraction,
+                         MicrofacetBsdf,
+                         LABEL_GLOSSY | LABEL_TRANSMIT)
+  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x)
+  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior)
+BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
+
+BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
+  BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1)
+  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2)
+  BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T)
+  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset)
+BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
+
+BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
+  BSDF_CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1)
+  BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2)
+  BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T)
+  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset)
+BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
+
+BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse,
+                         principled_diffuse,
+                         PrincipledDiffuseBsdf,
+                         LABEL_DIFFUSE)
+  BSDF_CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N)
+  BSDF_CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness)
+BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
+
+class PrincipledSheenClosure : public CBSDFClosure {
+ public:
+  PrincipledSheenBsdf params;
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    if (!skip(sd, path_flag, LABEL_DIFFUSE)) {
+      PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc_osl(
+          sd, sizeof(PrincipledSheenBsdf), weight, &params);
+      sd->flag |= (bsdf) ? bsdf_principled_sheen_setup(sd, bsdf) : 0;
+    }
+  }
+};
+
+static ClosureParam *bsdf_principled_sheen_params()
+{
+  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
+                                  CLOSURE_STRING_KEYPARAM(PrincipledSheenClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(PrincipledSheenClosure)};
+  return params;
+}
+
+CCLOSURE_PREPARE_STATIC(closure_bsdf_principled_sheen_prepare, PrincipledSheenClosure)
+
+/* PRINCIPLED HAIR BSDF */
+class PrincipledHairClosure : public CBSDFClosure {
+ public:
+  PrincipledHairBSDF params;
+
+  PrincipledHairBSDF *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl(
+        sd, sizeof(PrincipledHairBSDF), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
+        sd, sizeof(PrincipledHairExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    return bsdf;
+  }
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    if (!skip(sd, path_flag, LABEL_GLOSSY)) {
+      PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight);
+      if (!bsdf) {
+        return;
+      }
+
+      sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
+    }
+  }
+};
+
+static ClosureParam *closure_bsdf_principled_hair_params()
+{
+  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
+                                  CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
+                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
+                                  CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(PrincipledHairClosure)};
+
+  return params;
+}
+
+CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure)
+
+/* DISNEY PRINCIPLED CLEARCOAT */
+class PrincipledClearcoatClosure : public CBSDFClosure {
+ public:
+  MicrofacetBsdf params;
+  float clearcoat, clearcoat_roughness;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->extra = extra;
+    bsdf->ior = 1.5f;
+    bsdf->alpha_x = clearcoat_roughness;
+    bsdf->alpha_y = clearcoat_roughness;
+    bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
+    bsdf->extra->clearcoat = clearcoat;
+    return bsdf;
+  }
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+  }
+};
+
+ClosureParam *closure_bsdf_principled_clearcoat_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
+      CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
+      CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
+      CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure)
+
+/* Registration */
+
+static void register_closure(OSL::ShadingSystem *ss,
+                             const char *name,
+                             int id,
+                             OSL::ClosureParam *params,
+                             OSL::PrepareClosureFunc prepare)
+{
+  /* optimization: it's possible to not use a prepare function at all and
+   * only initialize the actual class when accessing the closure component
+   * data, but then we need to map the id to the class somehow */
+#if OSL_LIBRARY_VERSION_CODE >= 10900
+  ss->register_closure(name, id, params, prepare, NULL);
+#else
+  ss->register_closure(name, id, params, prepare, NULL, 16);
+#endif
+}
+
+void OSLShader::register_closures(OSLShadingSystem *ss_)
+{
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_;
+  int id = 0;
+
+  register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare);
+  register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
+  register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare);
+  register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare);
+  register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare);
+  register_closure(ss,
+                   "transparent",
+                   id++,
+                   closure_bsdf_transparent_params(),
+                   closure_bsdf_transparent_prepare);
+
+  register_closure(
+      ss, "microfacet", id++, closure_bsdf_microfacet_params(), closure_bsdf_microfacet_prepare);
+  register_closure(ss,
+                   "microfacet_ggx",
+                   id++,
+                   bsdf_microfacet_ggx_isotropic_params(),
+                   bsdf_microfacet_ggx_isotropic_prepare);
+  register_closure(
+      ss, "microfacet_ggx_aniso", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_refraction",
+                   id++,
+                   bsdf_microfacet_ggx_refraction_params(),
+                   bsdf_microfacet_ggx_refraction_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_params(),
+                   closure_bsdf_microfacet_multi_ggx_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_glass",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_glass_params(),
+                   closure_bsdf_microfacet_multi_ggx_glass_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_aniso",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_aniso_params(),
+                   closure_bsdf_microfacet_multi_ggx_aniso_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_ggx_fresnel_params(),
+                   closure_bsdf_microfacet_ggx_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_ggx_aniso_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_ggx_aniso_fresnel_params(),
+                   closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_fresnel_params(),
+                   closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_glass_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(),
+                   closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_multi_ggx_aniso_fresnel",
+                   id++,
+                   closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(),
+                   closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
+  register_closure(ss,
+                   "microfacet_beckmann",
+                   id++,
+                   bsdf_microfacet_beckmann_isotropic_params(),
+                   bsdf_microfacet_beckmann_isotropic_prepare);
+  register_closure(ss,
+                   "microfacet_beckmann_aniso",
+                   id++,
+                   bsdf_microfacet_beckmann_params(),
+                   bsdf_microfacet_beckmann_prepare);
+  register_closure(ss,
+                   "microfacet_beckmann_refraction",
+                   id++,
+                   bsdf_microfacet_beckmann_refraction_params(),
+                   bsdf_microfacet_beckmann_refraction_prepare);
+  register_closure(ss,
+                   "ashikhmin_shirley",
+                   id++,
+                   bsdf_ashikhmin_shirley_params(),
+                   bsdf_ashikhmin_shirley_prepare);
+  register_closure(
+      ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
+  register_closure(
+      ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
+  register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
+  register_closure(ss,
+                   "principled_diffuse",
+                   id++,
+                   bsdf_principled_diffuse_params(),
+                   bsdf_principled_diffuse_prepare);
+  register_closure(ss,
+                   "principled_sheen",
+                   id++,
+                   bsdf_principled_sheen_params(),
+                   closure_bsdf_principled_sheen_prepare);
+  register_closure(ss,
+                   "principled_clearcoat",
+                   id++,
+                   closure_bsdf_principled_clearcoat_params(),
+                   closure_bsdf_principled_clearcoat_prepare);
+
+  register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare);
+  register_closure(
+      ss, "background", id++, closure_background_params(), closure_background_prepare);
+  register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare);
+  register_closure(ss,
+                   "diffuse_ramp",
+                   id++,
+                   closure_bsdf_diffuse_ramp_params(),
+                   closure_bsdf_diffuse_ramp_prepare);
+  register_closure(
+      ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
+  register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare);
+
+  register_closure(
+      ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
+  register_closure(ss,
+                   "hair_transmission",
+                   id++,
+                   bsdf_hair_transmission_params(),
+                   bsdf_hair_transmission_prepare);
+
+  register_closure(ss,
+                   "principled_hair",
+                   id++,
+                   closure_bsdf_principled_hair_params(),
+                   closure_bsdf_principled_hair_prepare);
+
+  register_closure(ss,
+                   "henyey_greenstein",
+                   id++,
+                   closure_henyey_greenstein_params(),
+                   closure_henyey_greenstein_prepare);
+  register_closure(
+      ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare);
+}
+
+/* BSDF Closure */
+
+bool CBSDFClosure::skip(const ShaderData *sd, uint32_t path_flag, int scattering)
+{
+  /* caustic options */
+  if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
+    const KernelGlobalsCPU *kg = sd->osl_globals;
+
+    if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
+        (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/* Standard Microfacet Closure */
+
+class MicrofacetClosure : public CBSDFClosure {
+ public:
+  MicrofacetBsdf params;
+  ustring distribution;
+  int refract;
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    static ustring u_ggx("ggx");
+    static ustring u_default("default");
+
+    const int label = (refract) ? LABEL_TRANSMIT : LABEL_REFLECT;
+    if (skip(sd, path_flag, LABEL_GLOSSY | label)) {
+      return;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+
+    if (!bsdf) {
+      return;
+    }
+
+    /* GGX */
+    if (distribution == u_ggx || distribution == u_default) {
+      if (!refract) {
+        if (params.alpha_x == params.alpha_y) {
+          /* Isotropic */
+          sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
+        }
+        else {
+          /* Anisotropic */
+          sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+        }
+      }
+      else {
+        sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+      }
+    }
+    /* Beckmann */
+    else {
+      if (!refract) {
+        if (params.alpha_x == params.alpha_y) {
+          /* Isotropic */
+          sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
+        }
+        else {
+          /* Anisotropic */
+          sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+        }
+      }
+      else {
+        sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+      }
+    }
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_params()
+{
+  static ClosureParam params[] = {CLOSURE_STRING_PARAM(MicrofacetClosure, distribution),
+                                  CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.N),
+                                  CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.T),
+                                  CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_x),
+                                  CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_y),
+                                  CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.ior),
+                                  CLOSURE_INT_PARAM(MicrofacetClosure, refract),
+                                  CLOSURE_STRING_KEYPARAM(MicrofacetClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(MicrofacetClosure)};
+
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_prepare, MicrofacetClosure)
+
+/* GGX closures with Fresnel */
+
+class MicrofacetFresnelClosure : public CBSDFClosure {
+ public:
+  MicrofacetBsdf params;
+  float3 color;
+  float3 cspec0;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    /* Technically, the MultiGGX Glass closure may also transmit. However,
+     * since this is set statically and only used for caustic flags, this
+     * is probably as good as it gets. */
+    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+      return NULL;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    bsdf->extra->color = color;
+    bsdf->extra->cspec0 = cspec0;
+    bsdf->extra->clearcoat = 0.0f;
+    return bsdf;
+  }
+};
+
+class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure);
+
+class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
+      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare,
+                 MicrofacetGGXAnisoFresnelClosure);
+
+/* Multiscattering GGX closures */
+
+class MicrofacetMultiClosure : public CBSDFClosure {
+ public:
+  MicrofacetBsdf params;
+  float3 color;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    /* Technically, the MultiGGX closure may also transmit. However,
+     * since this is set statically and only used for caustic flags, this
+     * is probably as good as it gets. */
+    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+      return NULL;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    bsdf->extra->color = color;
+    bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->extra->clearcoat = 0.0f;
+    return bsdf;
+  }
+};
+
+class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->ior = 0.0f;
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_multi_ggx_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure);
+
+class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->ior = 0.0f;
+    sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure);
+
+class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure {
+ public:
+  MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure()
+  {
+  }
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure);
+
+/* Multiscattering GGX closures with Fresnel */
+
+class MicrofacetMultiFresnelClosure : public CBSDFClosure {
+ public:
+  MicrofacetBsdf params;
+  float3 color;
+  float3 cspec0;
+
+  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    /* Technically, the MultiGGX closure may also transmit. However,
+     * since this is set statically and only used for caustic flags, this
+     * is probably as good as it gets. */
+    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+      return NULL;
+    }
+
+    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
+        sd, sizeof(MicrofacetBsdf), weight, &params);
+    if (!bsdf) {
+      return NULL;
+    }
+
+    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
+    if (!extra) {
+      return NULL;
+    }
+
+    bsdf->extra = extra;
+    bsdf->extra->color = color;
+    bsdf->extra->cspec0 = cspec0;
+    bsdf->extra->clearcoat = 0.0f;
+    return bsdf;
+  }
+};
+
+class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare,
+                 MicrofacetMultiGGXFresnelClosure);
+
+class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare,
+                 MicrofacetMultiGGXAnisoFresnelClosure);
+
+class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure {
+ public:
+  MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure()
+  {
+  }
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
+    if (!bsdf) {
+      return;
+    }
+
+    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+    bsdf->alpha_y = bsdf->alpha_x;
+    sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
+  }
+};
+
+ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
+      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
+      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
+      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
+  return params;
+}
+CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare,
+                 MicrofacetMultiGGXGlassFresnelClosure);
+
+/* Transparent */
+
+class TransparentClosure : public CBSDFClosure {
+ public:
+  ShaderClosure params;
+  float3 unused;
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    bsdf_transparent_setup(sd, weight, path_flag);
+  }
+};
+
+ClosureParam *closure_bsdf_transparent_params()
+{
+  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(TransparentClosure)};
+  return params;
+}
+
+CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
+
+/* Volume */
+
+class VolumeAbsorptionClosure : public CBSDFClosure {
+ public:
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    volume_extinction_setup(sd, weight);
+  }
+};
+
+ClosureParam *closure_absorption_params()
+{
+  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
+                                  CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)};
+  return params;
+}
+
+CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure)
+
+class VolumeHenyeyGreensteinClosure : public CBSDFClosure {
+ public:
+  HenyeyGreensteinVolume params;
+
+  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
+  {
+    volume_extinction_setup(sd, weight);
+
+    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl(
+        sd, sizeof(HenyeyGreensteinVolume), weight, &params);
+    if (!volume) {
+      return;
+    }
+
+    sd->flag |= volume_henyey_greenstein_setup(volume);
+  }
+};
+
+ClosureParam *closure_henyey_greenstein_params()
+{
+  static ClosureParam params[] = {
+      CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
+      CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
+      CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)};
+  return params;
+}
+
+CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure)
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/closures.h b/intern/cycles/kernel/osl/closures.h
new file mode 100644
index 00000000000..8f573e89734
--- /dev/null
+++ b/intern/cycles/kernel/osl/closures.h
@@ -0,0 +1,164 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __OSL_CLOSURES_H__
+#define __OSL_CLOSURES_H__
+
+#include "kernel/types.h"
+#include "util/types.h"
+
+#include <OSL/genclosure.h>
+#include <OSL/oslclosure.h>
+#include <OSL/oslexec.h>
+
+CCL_NAMESPACE_BEGIN
+
+OSL::ClosureParam *closure_emission_params();
+OSL::ClosureParam *closure_background_params();
+OSL::ClosureParam *closure_holdout_params();
+OSL::ClosureParam *closure_bsdf_diffuse_ramp_params();
+OSL::ClosureParam *closure_bsdf_phong_ramp_params();
+OSL::ClosureParam *closure_bsdf_transparent_params();
+OSL::ClosureParam *closure_bssrdf_params();
+OSL::ClosureParam *closure_absorption_params();
+OSL::ClosureParam *closure_henyey_greenstein_params();
+OSL::ClosureParam *closure_bsdf_microfacet_params();
+OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_params();
+OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params();
+OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params();
+OSL::ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params();
+OSL::ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params();
+OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params();
+OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params();
+OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params();
+OSL::ClosureParam *closure_bsdf_principled_clearcoat_params();
+
+void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
+void closure_background_prepare(OSL::RendererServices *, int id, void *data);
+void closure_holdout_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_transparent_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
+void closure_absorption_prepare(OSL::RendererServices *, int id, void *data);
+void closure_henyey_greenstein_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
+                                                       int id,
+                                                       void *data);
+void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *,
+                                                       int id,
+                                                       void *data);
+void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *,
+                                                             int id,
+                                                             void *data);
+void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
+                                                             int id,
+                                                             void *data);
+void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data);
+void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data);
+
+#define CCLOSURE_PREPARE(name, classname) \
+  void name(RendererServices *, int id, void *data) \
+  { \
+    memset(data, 0, sizeof(classname)); \
+    new (data) classname(); \
+  }
+
+#define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname)
+
+#define CLOSURE_FLOAT3_PARAM(st, fld) \
+  { \
+    TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \
+  }
+
+#define BSDF_CLOSURE_FLOAT_PARAM(st, fld) CLOSURE_FLOAT_PARAM(st, fld),
+#define BSDF_CLOSURE_FLOAT3_PARAM(st, fld) CLOSURE_FLOAT3_PARAM(st, fld),
+
+#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
+#define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z)
+#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
+
+/* Closure */
+
+class CClosurePrimitive {
+ public:
+  virtual void setup(ShaderData *sd, uint32_t path_flag, float3 weight) = 0;
+
+  OSL::ustring label;
+};
+
+/* BSDF */
+
+class CBSDFClosure : public CClosurePrimitive {
+ public:
+  bool skip(const ShaderData *sd, uint32_t path_flag, int scattering);
+};
+
+#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \
+\
+  class Upper##Closure : public CBSDFClosure { \
+   public: \
+    structname params; \
+    float3 unused; \
+\
+    void setup(ShaderData *sd, uint32_t path_flag, float3 weight) \
+    { \
+      if (!skip(sd, path_flag, TYPE)) { \
+        structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
+        sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
+      } \
+    } \
+  }; \
+\
+  static ClosureParam *bsdf_##lower##_params() \
+  { \
+    static ClosureParam params[] = {
+
+/* parameters */
+
+#define BSDF_CLOSURE_CLASS_END(Upper, lower) \
+  CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \
+  } \
+  ; \
+  return params; \
+  } \
+\
+  CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
+
+CCL_NAMESPACE_END
+
+#endif /* __OSL_CLOSURES_H__ */
diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp
index 5a7fe14b22e..2615e300a92 100644
--- a/intern/cycles/kernel/osl/emissive.cpp
+++ b/intern/cycles/kernel/osl/emissive.cpp
@@ -34,11 +34,11 @@
 
 #include <OSL/genclosure.h>
 
-#include "kernel/osl/osl_closures.h"
+#include "kernel/osl/closures.h"
 
 // clang-format off
 #include "kernel/device/cpu/compat.h"
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 #include "kernel/closure/alloc.h"
 #include "kernel/closure/emissive.h"
 // clang-format on
diff --git a/intern/cycles/kernel/osl/globals.h b/intern/cycles/kernel/osl/globals.h
new file mode 100644
index 00000000000..126ace0086e
--- /dev/null
+++ b/intern/cycles/kernel/osl/globals.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OSL_GLOBALS_H__
+#define __OSL_GLOBALS_H__
+
+#ifdef WITH_OSL
+
+#  include <OSL/oslexec.h>
+
+#  include <OpenImageIO/refcnt.h>
+#  include <OpenImageIO/unordered_map_concurrent.h>
+
+#  include "util/map.h"
+#  include "util/param.h"
+#  include "util/thread.h"
+#  include "util/unique_ptr.h"
+#  include "util/vector.h"
+
+#  ifndef WIN32
+using std::isfinite;
+#  endif
+
+CCL_NAMESPACE_BEGIN
+
+class OSLRenderServices;
+class ColorSpaceProcessor;
+
+/* OSL Globals
+ *
+ * Data needed by OSL render services, that is global to a rendering session.
+ * This includes all OSL shaders, name to attribute mapping and texture handles.
+ */
+
+struct OSLGlobals {
+  OSLGlobals()
+  {
+    ss = NULL;
+    ts = NULL;
+    services = NULL;
+    use = false;
+  }
+
+  bool use;
+
+  /* shading system */
+  OSL::ShadingSystem *ss;
+  OSL::TextureSystem *ts;
+  OSLRenderServices *services;
+
+  /* shader states */
+  vector<OSL::ShaderGroupRef> surface_state;
+  vector<OSL::ShaderGroupRef> volume_state;
+  vector<OSL::ShaderGroupRef> displacement_state;
+  vector<OSL::ShaderGroupRef> bump_state;
+  OSL::ShaderGroupRef background_state;
+
+  /* attributes */
+  struct Attribute {
+    TypeDesc type;
+    AttributeDescriptor desc;
+    ParamValue value;
+  };
+
+  typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
+  typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
+
+  vector<AttributeMap> attribute_map;
+  ObjectNameMap object_name_map;
+  vector<ustring> object_names;
+};
+
+/* trace() call result */
+struct OSLTraceData {
+  Ray ray;
+  Intersection isect;
+  ShaderData sd;
+  bool setup;
+  bool init;
+  bool hit;
+};
+
+/* thread key for thread specific data lookup */
+struct OSLThreadData {
+  OSL::ShaderGlobals globals;
+  OSL::PerThreadInfo *osl_thread_info;
+  OSLTraceData tracedata;
+  OSL::ShadingContext *context;
+  OIIO::TextureSystem::Perthread *oiio_thread_info;
+};
+
+CCL_NAMESPACE_END
+
+#endif
+
+#endif /* __OSL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp
deleted file mode 100644
index 3b8661ce45d..00000000000
--- a/intern/cycles/kernel/osl/osl_bssrdf.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <OSL/genclosure.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/osl/osl_closures.h"
-
-// clang-format off
-#include "kernel/kernel_types.h"
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/bsdf_diffuse.h"
-#include "kernel/closure/bsdf_principled_diffuse.h"
-#include "kernel/closure/bssrdf.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-static ustring u_burley("burley");
-static ustring u_random_walk_fixed_radius("random_walk_fixed_radius");
-static ustring u_random_walk("random_walk");
-
-class CBSSRDFClosure : public CClosurePrimitive {
- public:
-  Bssrdf params;
-  float ior;
-  ustring method;
-
-  CBSSRDFClosure()
-  {
-    params.roughness = FLT_MAX;
-    params.anisotropy = 1.0f;
-    ior = 1.4f;
-  }
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    if (method == u_burley) {
-      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
-    }
-    else if (method == u_random_walk_fixed_radius) {
-      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
-    }
-    else if (method == u_random_walk) {
-      alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
-    }
-  }
-
-  void alloc(ShaderData *sd, uint32_t path_flag, float3 weight, ClosureType type)
-  {
-    Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
-    if (bssrdf) {
-      /* disable in case of diffuse ancestor, can't see it well then and
-       * adds considerably noise due to probabilities of continuing path
-       * getting lower and lower */
-      if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
-        params.radius = make_float3(0.0f, 0.0f, 0.0f);
-      }
-
-      /* create one closure per color channel */
-      bssrdf->radius = params.radius;
-      bssrdf->albedo = params.albedo;
-      bssrdf->N = params.N;
-      bssrdf->roughness = params.roughness;
-      bssrdf->anisotropy = clamp(params.anisotropy, 0.0f, 0.9f);
-      sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, clamp(ior, 1.01f, 3.8f));
-    }
-  }
-};
-
-ClosureParam *closure_bssrdf_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
-      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
-      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
-      CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
-      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
-      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, ior, "ior"),
-      CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.anisotropy, "anisotropy"),
-      CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(CBSSRDFClosure)};
-  return params;
-}
-
-CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp
deleted file mode 100644
index 89bab35b60b..00000000000
--- a/intern/cycles/kernel/osl/osl_closures.cpp
+++ /dev/null
@@ -1,1006 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <OSL/genclosure.h>
-#include <OSL/oslclosure.h>
-
-#include "kernel/osl/osl_closures.h"
-#include "kernel/osl/osl_shader.h"
-
-#include "util/util_math.h"
-#include "util/util_param.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/kernel_types.h"
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/bsdf_ashikhmin_velvet.h"
-#include "kernel/closure/bsdf_diffuse.h"
-#include "kernel/closure/bsdf_microfacet.h"
-#include "kernel/closure/bsdf_microfacet_multi.h"
-#include "kernel/closure/bsdf_oren_nayar.h"
-#include "kernel/closure/bsdf_reflection.h"
-#include "kernel/closure/bsdf_refraction.h"
-#include "kernel/closure/bsdf_transparent.h"
-#include "kernel/closure/bsdf_ashikhmin_shirley.h"
-#include "kernel/closure/bsdf_toon.h"
-#include "kernel/closure/bsdf_hair.h"
-#include "kernel/closure/bsdf_hair_principled.h"
-#include "kernel/closure/bsdf_principled_diffuse.h"
-#include "kernel/closure/bsdf_principled_sheen.h"
-#include "kernel/closure/volume.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-/* BSDF class definitions */
-
-BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE)
-  BSDF_CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N)
-BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
-
-BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
-  BSDF_CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N)
-BSDF_CLOSURE_CLASS_END(Translucent, translucent)
-
-BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
-  BSDF_CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness)
-BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
-
-BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
-  BSDF_CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N)
-BSDF_CLOSURE_CLASS_END(Reflection, reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
-  BSDF_CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior)
-BSDF_CLOSURE_CLASS_END(Refraction, refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
-  BSDF_CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma)
-BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley,
-                         ashikhmin_shirley,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_REFLECT)
-  BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N)
-  BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T)
-  BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x)
-  BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y)
-BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley)
-
-BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
-  BSDF_CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size)
-  BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth)
-BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
-  BSDF_CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size)
-  BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth)
-BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXIsotropic,
-                         microfacet_ggx_isotropic,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_REFLECT)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXIsotropicClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXIsotropicClosure, params.alpha_x)
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXIsotropic, microfacet_ggx_isotropic)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX,
-                         microfacet_ggx,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_REFLECT)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.T)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_y)
-BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannIsotropic,
-                         microfacet_beckmann_isotropic,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_REFLECT)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannIsotropicClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannIsotropicClosure, params.alpha_x)
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannIsotropic, microfacet_beckmann_isotropic)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann,
-                         microfacet_beckmann,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_REFLECT)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.T)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_y)
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction,
-                         microfacet_ggx_refraction,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_TRANSMIT)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior)
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction,
-                         microfacet_beckmann_refraction,
-                         MicrofacetBsdf,
-                         LABEL_GLOSSY | LABEL_TRANSMIT)
-  BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x)
-  BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior)
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
-  BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1)
-  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2)
-  BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T)
-  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset)
-BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
-  BSDF_CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1)
-  BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2)
-  BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T)
-  BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset)
-BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
-
-BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse,
-                         principled_diffuse,
-                         PrincipledDiffuseBsdf,
-                         LABEL_DIFFUSE)
-  BSDF_CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N)
-  BSDF_CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness)
-BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
-
-class PrincipledSheenClosure : public CBSDFClosure {
- public:
-  PrincipledSheenBsdf params;
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    if (!skip(sd, path_flag, LABEL_DIFFUSE)) {
-      PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc_osl(
-          sd, sizeof(PrincipledSheenBsdf), weight, &params);
-      sd->flag |= (bsdf) ? bsdf_principled_sheen_setup(sd, bsdf) : 0;
-    }
-  }
-};
-
-static ClosureParam *bsdf_principled_sheen_params()
-{
-  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
-                                  CLOSURE_STRING_KEYPARAM(PrincipledSheenClosure, label, "label"),
-                                  CLOSURE_FINISH_PARAM(PrincipledSheenClosure)};
-  return params;
-}
-
-CCLOSURE_PREPARE_STATIC(closure_bsdf_principled_sheen_prepare, PrincipledSheenClosure)
-
-/* PRINCIPLED HAIR BSDF */
-class PrincipledHairClosure : public CBSDFClosure {
- public:
-  PrincipledHairBSDF params;
-
-  PrincipledHairBSDF *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl(
-        sd, sizeof(PrincipledHairBSDF), weight, &params);
-    if (!bsdf) {
-      return NULL;
-    }
-
-    PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
-        sd, sizeof(PrincipledHairExtra));
-    if (!extra) {
-      return NULL;
-    }
-
-    bsdf->extra = extra;
-    return bsdf;
-  }
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    if (!skip(sd, path_flag, LABEL_GLOSSY)) {
-      PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight);
-      if (!bsdf) {
-        return;
-      }
-
-      sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
-    }
-  }
-};
-
-static ClosureParam *closure_bsdf_principled_hair_params()
-{
-  static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
-                                  CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
-                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
-                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
-                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
-                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
-                                  CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
-                                  CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
-                                  CLOSURE_FINISH_PARAM(PrincipledHairClosure)};
-
-  return params;
-}
-
-CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure)
-
-/* DISNEY PRINCIPLED CLEARCOAT */
-class PrincipledClearcoatClosure : public CBSDFClosure {
- public:
-  MicrofacetBsdf params;
-  float clearcoat, clearcoat_roughness;
-
-  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
-        sd, sizeof(MicrofacetBsdf), weight, &params);
-    if (!bsdf) {
-      return NULL;
-    }
-
-    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-    if (!extra) {
-      return NULL;
-    }
-
-    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->extra = extra;
-    bsdf->ior = 1.5f;
-    bsdf->alpha_x = clearcoat_roughness;
-    bsdf->alpha_y = clearcoat_roughness;
-    bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
-    bsdf->extra->clearcoat = clearcoat;
-    return bsdf;
-  }
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
-  }
-};
-
-ClosureParam *closure_bsdf_principled_clearcoat_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
-      CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
-      CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
-      CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure)
-
-/* Registration */
-
-static void register_closure(OSL::ShadingSystem *ss,
-                             const char *name,
-                             int id,
-                             OSL::ClosureParam *params,
-                             OSL::PrepareClosureFunc prepare)
-{
-  /* optimization: it's possible to not use a prepare function at all and
-   * only initialize the actual class when accessing the closure component
-   * data, but then we need to map the id to the class somehow */
-#if OSL_LIBRARY_VERSION_CODE >= 10900
-  ss->register_closure(name, id, params, prepare, NULL);
-#else
-  ss->register_closure(name, id, params, prepare, NULL, 16);
-#endif
-}
-
-void OSLShader::register_closures(OSLShadingSystem *ss_)
-{
-  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_;
-  int id = 0;
-
-  register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare);
-  register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
-  register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare);
-  register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare);
-  register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare);
-  register_closure(ss,
-                   "transparent",
-                   id++,
-                   closure_bsdf_transparent_params(),
-                   closure_bsdf_transparent_prepare);
-
-  register_closure(
-      ss, "microfacet", id++, closure_bsdf_microfacet_params(), closure_bsdf_microfacet_prepare);
-  register_closure(ss,
-                   "microfacet_ggx",
-                   id++,
-                   bsdf_microfacet_ggx_isotropic_params(),
-                   bsdf_microfacet_ggx_isotropic_prepare);
-  register_closure(
-      ss, "microfacet_ggx_aniso", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
-  register_closure(ss,
-                   "microfacet_ggx_refraction",
-                   id++,
-                   bsdf_microfacet_ggx_refraction_params(),
-                   bsdf_microfacet_ggx_refraction_prepare);
-  register_closure(ss,
-                   "microfacet_multi_ggx",
-                   id++,
-                   closure_bsdf_microfacet_multi_ggx_params(),
-                   closure_bsdf_microfacet_multi_ggx_prepare);
-  register_closure(ss,
-                   "microfacet_multi_ggx_glass",
-                   id++,
-                   closure_bsdf_microfacet_multi_ggx_glass_params(),
-                   closure_bsdf_microfacet_multi_ggx_glass_prepare);
-  register_closure(ss,
-                   "microfacet_multi_ggx_aniso",
-                   id++,
-                   closure_bsdf_microfacet_multi_ggx_aniso_params(),
-                   closure_bsdf_microfacet_multi_ggx_aniso_prepare);
-  register_closure(ss,
-                   "microfacet_ggx_fresnel",
-                   id++,
-                   closure_bsdf_microfacet_ggx_fresnel_params(),
-                   closure_bsdf_microfacet_ggx_fresnel_prepare);
-  register_closure(ss,
-                   "microfacet_ggx_aniso_fresnel",
-                   id++,
-                   closure_bsdf_microfacet_ggx_aniso_fresnel_params(),
-                   closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
-  register_closure(ss,
-                   "microfacet_multi_ggx_fresnel",
-                   id++,
-                   closure_bsdf_microfacet_multi_ggx_fresnel_params(),
-                   closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
-  register_closure(ss,
-                   "microfacet_multi_ggx_glass_fresnel",
-                   id++,
-                   closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(),
-                   closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
-  register_closure(ss,
-                   "microfacet_multi_ggx_aniso_fresnel",
-                   id++,
-                   closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(),
-                   closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
-  register_closure(ss,
-                   "microfacet_beckmann",
-                   id++,
-                   bsdf_microfacet_beckmann_isotropic_params(),
-                   bsdf_microfacet_beckmann_isotropic_prepare);
-  register_closure(ss,
-                   "microfacet_beckmann_aniso",
-                   id++,
-                   bsdf_microfacet_beckmann_params(),
-                   bsdf_microfacet_beckmann_prepare);
-  register_closure(ss,
-                   "microfacet_beckmann_refraction",
-                   id++,
-                   bsdf_microfacet_beckmann_refraction_params(),
-                   bsdf_microfacet_beckmann_refraction_prepare);
-  register_closure(ss,
-                   "ashikhmin_shirley",
-                   id++,
-                   bsdf_ashikhmin_shirley_params(),
-                   bsdf_ashikhmin_shirley_prepare);
-  register_closure(
-      ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
-  register_closure(
-      ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
-  register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
-  register_closure(ss,
-                   "principled_diffuse",
-                   id++,
-                   bsdf_principled_diffuse_params(),
-                   bsdf_principled_diffuse_prepare);
-  register_closure(ss,
-                   "principled_sheen",
-                   id++,
-                   bsdf_principled_sheen_params(),
-                   closure_bsdf_principled_sheen_prepare);
-  register_closure(ss,
-                   "principled_clearcoat",
-                   id++,
-                   closure_bsdf_principled_clearcoat_params(),
-                   closure_bsdf_principled_clearcoat_prepare);
-
-  register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare);
-  register_closure(
-      ss, "background", id++, closure_background_params(), closure_background_prepare);
-  register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare);
-  register_closure(ss,
-                   "diffuse_ramp",
-                   id++,
-                   closure_bsdf_diffuse_ramp_params(),
-                   closure_bsdf_diffuse_ramp_prepare);
-  register_closure(
-      ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
-  register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare);
-
-  register_closure(
-      ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
-  register_closure(ss,
-                   "hair_transmission",
-                   id++,
-                   bsdf_hair_transmission_params(),
-                   bsdf_hair_transmission_prepare);
-
-  register_closure(ss,
-                   "principled_hair",
-                   id++,
-                   closure_bsdf_principled_hair_params(),
-                   closure_bsdf_principled_hair_prepare);
-
-  register_closure(ss,
-                   "henyey_greenstein",
-                   id++,
-                   closure_henyey_greenstein_params(),
-                   closure_henyey_greenstein_prepare);
-  register_closure(
-      ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare);
-}
-
-/* BSDF Closure */
-
-bool CBSDFClosure::skip(const ShaderData *sd, uint32_t path_flag, int scattering)
-{
-  /* caustic options */
-  if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
-    const KernelGlobalsCPU *kg = sd->osl_globals;
-
-    if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
-        (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-/* Standard Microfacet Closure */
-
-class MicrofacetClosure : public CBSDFClosure {
- public:
-  MicrofacetBsdf params;
-  ustring distribution;
-  int refract;
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    static ustring u_ggx("ggx");
-    static ustring u_default("default");
-
-    const int label = (refract) ? LABEL_TRANSMIT : LABEL_REFLECT;
-    if (skip(sd, path_flag, LABEL_GLOSSY | label)) {
-      return;
-    }
-
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
-        sd, sizeof(MicrofacetBsdf), weight, &params);
-
-    if (!bsdf) {
-      return;
-    }
-
-    /* GGX */
-    if (distribution == u_ggx || distribution == u_default) {
-      if (!refract) {
-        if (params.alpha_x == params.alpha_y) {
-          /* Isotropic */
-          sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
-        }
-        else {
-          /* Anisotropic */
-          sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
-        }
-      }
-      else {
-        sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-      }
-    }
-    /* Beckmann */
-    else {
-      if (!refract) {
-        if (params.alpha_x == params.alpha_y) {
-          /* Isotropic */
-          sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
-        }
-        else {
-          /* Anisotropic */
-          sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
-        }
-      }
-      else {
-        sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
-      }
-    }
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_params()
-{
-  static ClosureParam params[] = {CLOSURE_STRING_PARAM(MicrofacetClosure, distribution),
-                                  CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.N),
-                                  CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.T),
-                                  CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_x),
-                                  CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_y),
-                                  CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.ior),
-                                  CLOSURE_INT_PARAM(MicrofacetClosure, refract),
-                                  CLOSURE_STRING_KEYPARAM(MicrofacetClosure, label, "label"),
-                                  CLOSURE_FINISH_PARAM(MicrofacetClosure)};
-
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_prepare, MicrofacetClosure)
-
-/* GGX closures with Fresnel */
-
-class MicrofacetFresnelClosure : public CBSDFClosure {
- public:
-  MicrofacetBsdf params;
-  float3 color;
-  float3 cspec0;
-
-  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    /* Technically, the MultiGGX Glass closure may also transmit. However,
-     * since this is set statically and only used for caustic flags, this
-     * is probably as good as it gets. */
-    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
-      return NULL;
-    }
-
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
-        sd, sizeof(MicrofacetBsdf), weight, &params);
-    if (!bsdf) {
-      return NULL;
-    }
-
-    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-    if (!extra) {
-      return NULL;
-    }
-
-    bsdf->extra = extra;
-    bsdf->extra->color = color;
-    bsdf->extra->cspec0 = cspec0;
-    bsdf->extra->clearcoat = 0.0f;
-    return bsdf;
-  }
-};
-
-class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->alpha_y = bsdf->alpha_x;
-    sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
-      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
-      CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure);
-
-class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
-      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
-      CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
-      CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
-      CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare,
-                 MicrofacetGGXAnisoFresnelClosure);
-
-/* Multiscattering GGX closures */
-
-class MicrofacetMultiClosure : public CBSDFClosure {
- public:
-  MicrofacetBsdf params;
-  float3 color;
-
-  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    /* Technically, the MultiGGX closure may also transmit. However,
-     * since this is set statically and only used for caustic flags, this
-     * is probably as good as it gets. */
-    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
-      return NULL;
-    }
-
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
-        sd, sizeof(MicrofacetBsdf), weight, &params);
-    if (!bsdf) {
-      return NULL;
-    }
-
-    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-    if (!extra) {
-      return NULL;
-    }
-
-    bsdf->extra = extra;
-    bsdf->extra->color = color;
-    bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->extra->clearcoat = 0.0f;
-    return bsdf;
-  }
-};
-
-class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    bsdf->ior = 0.0f;
-    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->alpha_y = bsdf->alpha_x;
-    sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
-      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure);
-
-class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    bsdf->ior = 0.0f;
-    sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
-      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure);
-
-class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure {
- public:
-  MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure()
-  {
-  }
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->alpha_y = bsdf->alpha_x;
-    sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
-      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure);
-
-/* Multiscattering GGX closures with Fresnel */
-
-class MicrofacetMultiFresnelClosure : public CBSDFClosure {
- public:
-  MicrofacetBsdf params;
-  float3 color;
-  float3 cspec0;
-
-  MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    /* Technically, the MultiGGX closure may also transmit. However,
-     * since this is set statically and only used for caustic flags, this
-     * is probably as good as it gets. */
-    if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
-      return NULL;
-    }
-
-    MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
-        sd, sizeof(MicrofacetBsdf), weight, &params);
-    if (!bsdf) {
-      return NULL;
-    }
-
-    MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
-    if (!extra) {
-      return NULL;
-    }
-
-    bsdf->extra = extra;
-    bsdf->extra->color = color;
-    bsdf->extra->cspec0 = cspec0;
-    bsdf->extra->clearcoat = 0.0f;
-    return bsdf;
-  }
-};
-
-class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->alpha_y = bsdf->alpha_x;
-    sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
-      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare,
-                 MicrofacetMultiGGXFresnelClosure);
-
-class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
-      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare,
-                 MicrofacetMultiGGXAnisoFresnelClosure);
-
-class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure {
- public:
-  MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure()
-  {
-  }
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
-    if (!bsdf) {
-      return;
-    }
-
-    bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-    bsdf->alpha_y = bsdf->alpha_x;
-    sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
-  }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
-      CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
-      CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
-      CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
-  return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare,
-                 MicrofacetMultiGGXGlassFresnelClosure);
-
-/* Transparent */
-
-class TransparentClosure : public CBSDFClosure {
- public:
-  ShaderClosure params;
-  float3 unused;
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    bsdf_transparent_setup(sd, weight, path_flag);
-  }
-};
-
-ClosureParam *closure_bsdf_transparent_params()
-{
-  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
-                                  CLOSURE_FINISH_PARAM(TransparentClosure)};
-  return params;
-}
-
-CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
-
-/* Volume */
-
-class VolumeAbsorptionClosure : public CBSDFClosure {
- public:
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    volume_extinction_setup(sd, weight);
-  }
-};
-
-ClosureParam *closure_absorption_params()
-{
-  static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
-                                  CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)};
-  return params;
-}
-
-CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure)
-
-class VolumeHenyeyGreensteinClosure : public CBSDFClosure {
- public:
-  HenyeyGreensteinVolume params;
-
-  void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
-  {
-    volume_extinction_setup(sd, weight);
-
-    HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl(
-        sd, sizeof(HenyeyGreensteinVolume), weight, &params);
-    if (!volume) {
-      return;
-    }
-
-    sd->flag |= volume_henyey_greenstein_setup(volume);
-  }
-};
-
-ClosureParam *closure_henyey_greenstein_params()
-{
-  static ClosureParam params[] = {
-      CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
-      CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
-      CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)};
-  return params;
-}
-
-CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h
deleted file mode 100644
index 7869d793737..00000000000
--- a/intern/cycles/kernel/osl/osl_closures.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __OSL_CLOSURES_H__
-#define __OSL_CLOSURES_H__
-
-#include "kernel/kernel_types.h"
-#include "util/util_types.h"
-
-#include <OSL/genclosure.h>
-#include <OSL/oslclosure.h>
-#include <OSL/oslexec.h>
-
-CCL_NAMESPACE_BEGIN
-
-OSL::ClosureParam *closure_emission_params();
-OSL::ClosureParam *closure_background_params();
-OSL::ClosureParam *closure_holdout_params();
-OSL::ClosureParam *closure_bsdf_diffuse_ramp_params();
-OSL::ClosureParam *closure_bsdf_phong_ramp_params();
-OSL::ClosureParam *closure_bsdf_transparent_params();
-OSL::ClosureParam *closure_bssrdf_params();
-OSL::ClosureParam *closure_absorption_params();
-OSL::ClosureParam *closure_henyey_greenstein_params();
-OSL::ClosureParam *closure_bsdf_microfacet_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params();
-OSL::ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params();
-OSL::ClosureParam *closure_bsdf_principled_clearcoat_params();
-
-void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
-void closure_background_prepare(OSL::RendererServices *, int id, void *data);
-void closure_holdout_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_transparent_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
-void closure_absorption_prepare(OSL::RendererServices *, int id, void *data);
-void closure_henyey_greenstein_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
-                                                       int id,
-                                                       void *data);
-void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *,
-                                                       int id,
-                                                       void *data);
-void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *,
-                                                             int id,
-                                                             void *data);
-void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
-                                                             int id,
-                                                             void *data);
-void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data);
-
-#define CCLOSURE_PREPARE(name, classname) \
-  void name(RendererServices *, int id, void *data) \
-  { \
-    memset(data, 0, sizeof(classname)); \
-    new (data) classname(); \
-  }
-
-#define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname)
-
-#define CLOSURE_FLOAT3_PARAM(st, fld) \
-  { \
-    TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \
-  }
-
-#define BSDF_CLOSURE_FLOAT_PARAM(st, fld) CLOSURE_FLOAT_PARAM(st, fld),
-#define BSDF_CLOSURE_FLOAT3_PARAM(st, fld) CLOSURE_FLOAT3_PARAM(st, fld),
-
-#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
-#define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z)
-#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
-
-/* Closure */
-
-class CClosurePrimitive {
- public:
-  virtual void setup(ShaderData *sd, uint32_t path_flag, float3 weight) = 0;
-
-  OSL::ustring label;
-};
-
-/* BSDF */
-
-class CBSDFClosure : public CClosurePrimitive {
- public:
-  bool skip(const ShaderData *sd, uint32_t path_flag, int scattering);
-};
-
-#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \
-\
-  class Upper##Closure : public CBSDFClosure { \
-   public: \
-    structname params; \
-    float3 unused; \
-\
-    void setup(ShaderData *sd, uint32_t path_flag, float3 weight) \
-    { \
-      if (!skip(sd, path_flag, TYPE)) { \
-        structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
-        sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
-      } \
-    } \
-  }; \
-\
-  static ClosureParam *bsdf_##lower##_params() \
-  { \
-    static ClosureParam params[] = {
-
-/* parameters */
-
-#define BSDF_CLOSURE_CLASS_END(Upper, lower) \
-  CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \
-  } \
-  ; \
-  return params; \
-  } \
-\
-  CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
-
-CCL_NAMESPACE_END
-
-#endif /* __OSL_CLOSURES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
deleted file mode 100644
index f1789f0d7eb..00000000000
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __OSL_GLOBALS_H__
-#define __OSL_GLOBALS_H__
-
-#ifdef WITH_OSL
-
-#  include <OSL/oslexec.h>
-
-#  include <OpenImageIO/refcnt.h>
-#  include <OpenImageIO/unordered_map_concurrent.h>
-
-#  include "util/util_map.h"
-#  include "util/util_param.h"
-#  include "util/util_thread.h"
-#  include "util/util_unique_ptr.h"
-#  include "util/util_vector.h"
-
-#  ifndef WIN32
-using std::isfinite;
-#  endif
-
-CCL_NAMESPACE_BEGIN
-
-class OSLRenderServices;
-class ColorSpaceProcessor;
-
-/* OSL Globals
- *
- * Data needed by OSL render services, that is global to a rendering session.
- * This includes all OSL shaders, name to attribute mapping and texture handles.
- */
-
-struct OSLGlobals {
-  OSLGlobals()
-  {
-    ss = NULL;
-    ts = NULL;
-    services = NULL;
-    use = false;
-  }
-
-  bool use;
-
-  /* shading system */
-  OSL::ShadingSystem *ss;
-  OSL::TextureSystem *ts;
-  OSLRenderServices *services;
-
-  /* shader states */
-  vector<OSL::ShaderGroupRef> surface_state;
-  vector<OSL::ShaderGroupRef> volume_state;
-  vector<OSL::ShaderGroupRef> displacement_state;
-  vector<OSL::ShaderGroupRef> bump_state;
-  OSL::ShaderGroupRef background_state;
-
-  /* attributes */
-  struct Attribute {
-    TypeDesc type;
-    AttributeDescriptor desc;
-    ParamValue value;
-  };
-
-  typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
-  typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
-
-  vector<AttributeMap> attribute_map;
-  ObjectNameMap object_name_map;
-  vector<ustring> object_names;
-};
-
-/* trace() call result */
-struct OSLTraceData {
-  Ray ray;
-  Intersection isect;
-  ShaderData sd;
-  bool setup;
-  bool init;
-  bool hit;
-};
-
-/* thread key for thread specific data lookup */
-struct OSLThreadData {
-  OSL::ShaderGlobals globals;
-  OSL::PerThreadInfo *osl_thread_info;
-  OSLTraceData tracedata;
-  OSL::ShadingContext *context;
-  OIIO::TextureSystem::Perthread *oiio_thread_info;
-};
-
-CCL_NAMESPACE_END
-
-#endif
-
-#endif /* __OSL_GLOBALS_H__ */
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
deleted file mode 100644
index 56b04fd280e..00000000000
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ /dev/null
@@ -1,1724 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* TODO(sergey): There is a bit of headers dependency hell going on
- * here, so for now we just put here. In the future it might be better
- * to have dedicated file for such tweaks.
- */
-#if (defined(__GNUC__) && !defined(__clang__)) && defined(NDEBUG)
-#  pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
-#  pragma GCC diagnostic ignored "-Wuninitialized"
-#endif
-
-#include <string.h>
-
-#include "scene/colorspace.h"
-#include "scene/mesh.h"
-#include "scene/object.h"
-#include "scene/scene.h"
-
-#include "kernel/osl/osl_closures.h"
-#include "kernel/osl/osl_globals.h"
-#include "kernel/osl/osl_services.h"
-#include "kernel/osl/osl_shader.h"
-
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_string.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-#include "kernel/device/cpu/image.h"
-
-#include "kernel/util/util_differential.h"
-
-#include "kernel/integrator/integrator_state.h"
-#include "kernel/integrator/integrator_state_flow.h"
-
-#include "kernel/geom/geom.h"
-
-#include "kernel/bvh/bvh.h"
-
-#include "kernel/camera/camera.h"
-#include "kernel/camera/camera_projection.h"
-
-#include "kernel/integrator/integrator_path_state.h"
-#include "kernel/integrator/integrator_shader_eval.h"
-
-#include "kernel/util/util_color.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-/* RenderServices implementation */
-
-static void copy_matrix(OSL::Matrix44 &m, const Transform &tfm)
-{
-  ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
-  memcpy((void *)&m, &t, sizeof(m));
-}
-
-static void copy_matrix(OSL::Matrix44 &m, const ProjectionTransform &tfm)
-{
-  ProjectionTransform t = projection_transpose(tfm);
-  memcpy((void *)&m, &t, sizeof(m));
-}
-
-/* static ustrings */
-ustring OSLRenderServices::u_distance("distance");
-ustring OSLRenderServices::u_index("index");
-ustring OSLRenderServices::u_world("world");
-ustring OSLRenderServices::u_camera("camera");
-ustring OSLRenderServices::u_screen("screen");
-ustring OSLRenderServices::u_raster("raster");
-ustring OSLRenderServices::u_ndc("NDC");
-ustring OSLRenderServices::u_object_location("object:location");
-ustring OSLRenderServices::u_object_color("object:color");
-ustring OSLRenderServices::u_object_index("object:index");
-ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated");
-ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv");
-ustring OSLRenderServices::u_material_index("material:index");
-ustring OSLRenderServices::u_object_random("object:random");
-ustring OSLRenderServices::u_particle_index("particle:index");
-ustring OSLRenderServices::u_particle_random("particle:random");
-ustring OSLRenderServices::u_particle_age("particle:age");
-ustring OSLRenderServices::u_particle_lifetime("particle:lifetime");
-ustring OSLRenderServices::u_particle_location("particle:location");
-ustring OSLRenderServices::u_particle_rotation("particle:rotation");
-ustring OSLRenderServices::u_particle_size("particle:size");
-ustring OSLRenderServices::u_particle_velocity("particle:velocity");
-ustring OSLRenderServices::u_particle_angular_velocity("particle:angular_velocity");
-ustring OSLRenderServices::u_geom_numpolyvertices("geom:numpolyvertices");
-ustring OSLRenderServices::u_geom_trianglevertices("geom:trianglevertices");
-ustring OSLRenderServices::u_geom_polyvertices("geom:polyvertices");
-ustring OSLRenderServices::u_geom_name("geom:name");
-ustring OSLRenderServices::u_geom_undisplaced("geom:undisplaced");
-ustring OSLRenderServices::u_is_smooth("geom:is_smooth");
-ustring OSLRenderServices::u_is_curve("geom:is_curve");
-ustring OSLRenderServices::u_curve_thickness("geom:curve_thickness");
-ustring OSLRenderServices::u_curve_length("geom:curve_length");
-ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
-ustring OSLRenderServices::u_curve_random("geom:curve_random");
-ustring OSLRenderServices::u_normal_map_normal("geom:normal_map_normal");
-ustring OSLRenderServices::u_path_ray_length("path:ray_length");
-ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
-ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
-ustring OSLRenderServices::u_path_glossy_depth("path:glossy_depth");
-ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth");
-ustring OSLRenderServices::u_path_transmission_depth("path:transmission_depth");
-ustring OSLRenderServices::u_trace("trace");
-ustring OSLRenderServices::u_hit("hit");
-ustring OSLRenderServices::u_hitdist("hitdist");
-ustring OSLRenderServices::u_N("N");
-ustring OSLRenderServices::u_Ng("Ng");
-ustring OSLRenderServices::u_P("P");
-ustring OSLRenderServices::u_I("I");
-ustring OSLRenderServices::u_u("u");
-ustring OSLRenderServices::u_v("v");
-ustring OSLRenderServices::u_empty;
-
-OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system)
-    : texture_system(texture_system)
-{
-}
-
-OSLRenderServices::~OSLRenderServices()
-{
-  if (texture_system) {
-    VLOG(2) << "OSL texture system stats:\n" << texture_system->getstats();
-  }
-}
-
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
-                                   OSL::Matrix44 &result,
-                                   OSL::TransformationPtr xform,
-                                   float time)
-{
-  /* this is only used for shader and object space, we don't really have
-   * a concept of shader space, so we just use object space for both. */
-  if (xform) {
-    const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobalsCPU *kg = sd->osl_globals;
-    int object = sd->object;
-
-    if (object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
-      Transform tfm;
-
-      if (time == sd->time)
-        tfm = object_get_transform(kg, sd);
-      else
-        tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
-#else
-      const Transform tfm = object_get_transform(kg, sd);
-#endif
-      copy_matrix(result, tfm);
-
-      return true;
-    }
-    else if (sd->type == PRIMITIVE_LAMP) {
-      const Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
-      copy_matrix(result, tfm);
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
-                                           OSL::Matrix44 &result,
-                                           OSL::TransformationPtr xform,
-                                           float time)
-{
-  /* this is only used for shader and object space, we don't really have
-   * a concept of shader space, so we just use object space for both. */
-  if (xform) {
-    const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobalsCPU *kg = sd->osl_globals;
-    int object = sd->object;
-
-    if (object != OBJECT_NONE) {
-#ifdef __OBJECT_MOTION__
-      Transform itfm;
-
-      if (time == sd->time)
-        itfm = object_get_inverse_transform(kg, sd);
-      else
-        object_fetch_transform_motion_test(kg, object, time, &itfm);
-#else
-      const Transform itfm = object_get_inverse_transform(kg, sd);
-#endif
-      copy_matrix(result, itfm);
-
-      return true;
-    }
-    else if (sd->type == PRIMITIVE_LAMP) {
-      const Transform itfm = lamp_fetch_transform(kg, sd->lamp, true);
-      copy_matrix(result, itfm);
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
-                                   OSL::Matrix44 &result,
-                                   ustring from,
-                                   float time)
-{
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobalsCPU *kg = sd->osl_globals;
-
-  if (from == u_ndc) {
-    copy_matrix(result, kernel_data.cam.ndctoworld);
-    return true;
-  }
-  else if (from == u_raster) {
-    copy_matrix(result, kernel_data.cam.rastertoworld);
-    return true;
-  }
-  else if (from == u_screen) {
-    copy_matrix(result, kernel_data.cam.screentoworld);
-    return true;
-  }
-  else if (from == u_camera) {
-    copy_matrix(result, kernel_data.cam.cameratoworld);
-    return true;
-  }
-  else if (from == u_world) {
-    result.makeIdentity();
-    return true;
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
-                                           OSL::Matrix44 &result,
-                                           ustring to,
-                                           float time)
-{
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobalsCPU *kg = sd->osl_globals;
-
-  if (to == u_ndc) {
-    copy_matrix(result, kernel_data.cam.worldtondc);
-    return true;
-  }
-  else if (to == u_raster) {
-    copy_matrix(result, kernel_data.cam.worldtoraster);
-    return true;
-  }
-  else if (to == u_screen) {
-    copy_matrix(result, kernel_data.cam.worldtoscreen);
-    return true;
-  }
-  else if (to == u_camera) {
-    copy_matrix(result, kernel_data.cam.worldtocamera);
-    return true;
-  }
-  else if (to == u_world) {
-    result.makeIdentity();
-    return true;
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
-                                   OSL::Matrix44 &result,
-                                   OSL::TransformationPtr xform)
-{
-  /* this is only used for shader and object space, we don't really have
-   * a concept of shader space, so we just use object space for both. */
-  if (xform) {
-    const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobalsCPU *kg = sd->osl_globals;
-    int object = sd->object;
-
-    if (object != OBJECT_NONE) {
-      const Transform tfm = object_get_transform(kg, sd);
-      copy_matrix(result, tfm);
-
-      return true;
-    }
-    else if (sd->type == PRIMITIVE_LAMP) {
-      const Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
-      copy_matrix(result, tfm);
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
-                                           OSL::Matrix44 &result,
-                                           OSL::TransformationPtr xform)
-{
-  /* this is only used for shader and object space, we don't really have
-   * a concept of shader space, so we just use object space for both. */
-  if (xform) {
-    const ShaderData *sd = (const ShaderData *)xform;
-    const KernelGlobalsCPU *kg = sd->osl_globals;
-    int object = sd->object;
-
-    if (object != OBJECT_NONE) {
-      const Transform tfm = object_get_inverse_transform(kg, sd);
-      copy_matrix(result, tfm);
-
-      return true;
-    }
-    else if (sd->type == PRIMITIVE_LAMP) {
-      const Transform itfm = lamp_fetch_transform(kg, sd->lamp, true);
-      copy_matrix(result, itfm);
-
-      return true;
-    }
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
-{
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobalsCPU *kg = sd->osl_globals;
-
-  if (from == u_ndc) {
-    copy_matrix(result, kernel_data.cam.ndctoworld);
-    return true;
-  }
-  else if (from == u_raster) {
-    copy_matrix(result, kernel_data.cam.rastertoworld);
-    return true;
-  }
-  else if (from == u_screen) {
-    copy_matrix(result, kernel_data.cam.screentoworld);
-    return true;
-  }
-  else if (from == u_camera) {
-    copy_matrix(result, kernel_data.cam.cameratoworld);
-    return true;
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
-                                           OSL::Matrix44 &result,
-                                           ustring to)
-{
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-  const KernelGlobalsCPU *kg = sd->osl_globals;
-
-  if (to == u_ndc) {
-    copy_matrix(result, kernel_data.cam.worldtondc);
-    return true;
-  }
-  else if (to == u_raster) {
-    copy_matrix(result, kernel_data.cam.worldtoraster);
-    return true;
-  }
-  else if (to == u_screen) {
-    copy_matrix(result, kernel_data.cam.worldtoscreen);
-    return true;
-  }
-  else if (to == u_camera) {
-    copy_matrix(result, kernel_data.cam.worldtocamera);
-    return true;
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg,
-                                            bool derivatives,
-                                            ustring object,
-                                            TypeDesc type,
-                                            ustring name,
-                                            int index,
-                                            void *val)
-{
-  return false;
-}
-
-static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, void *val)
-{
-  if (type == TypeFloatArray4) {
-    float *fval = (float *)val;
-    fval[0] = f[0].x;
-    fval[1] = f[0].y;
-    fval[2] = 0.0f;
-    fval[3] = 1.0f;
-
-    if (derivatives) {
-      fval[4] = f[1].x;
-      fval[5] = f[1].y;
-      fval[6] = 0.0f;
-      fval[7] = 0.0f;
-
-      fval[8] = f[2].x;
-      fval[9] = f[2].y;
-      fval[10] = 0.0f;
-      fval[11] = 0.0f;
-    }
-    return true;
-  }
-  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
-    float *fval = (float *)val;
-
-    fval[0] = f[0].x;
-    fval[1] = f[0].y;
-    fval[2] = 0.0f;
-
-    if (derivatives) {
-      fval[3] = f[1].x;
-      fval[4] = f[1].y;
-      fval[5] = 0.0f;
-
-      fval[6] = f[2].x;
-      fval[7] = f[2].y;
-      fval[8] = 0.0f;
-    }
-
-    return true;
-  }
-  else if (type == TypeDesc::TypeFloat) {
-    float *fval = (float *)val;
-    fval[0] = average(f[0]);
-
-    if (derivatives) {
-      fval[1] = average(f[1]);
-      fval[2] = average(f[2]);
-    }
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool set_attribute_float2(float2 f, TypeDesc type, bool derivatives, void *val)
-{
-  float2 fv[3];
-
-  fv[0] = f;
-  fv[1] = make_float2(0.0f, 0.0f);
-  fv[2] = make_float2(0.0f, 0.0f);
-
-  return set_attribute_float2(fv, type, derivatives, val);
-}
-
-static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
-{
-  if (type == TypeFloatArray4) {
-    float *fval = (float *)val;
-    fval[0] = f[0].x;
-    fval[1] = f[0].y;
-    fval[2] = f[0].z;
-    fval[3] = 1.0f;
-
-    if (derivatives) {
-      fval[4] = f[1].x;
-      fval[5] = f[1].y;
-      fval[6] = f[1].z;
-      fval[7] = 0.0f;
-
-      fval[8] = f[2].x;
-      fval[9] = f[2].y;
-      fval[10] = f[2].z;
-      fval[11] = 0.0f;
-    }
-    return true;
-  }
-  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
-    float *fval = (float *)val;
-
-    fval[0] = f[0].x;
-    fval[1] = f[0].y;
-    fval[2] = f[0].z;
-
-    if (derivatives) {
-      fval[3] = f[1].x;
-      fval[4] = f[1].y;
-      fval[5] = f[1].z;
-
-      fval[6] = f[2].x;
-      fval[7] = f[2].y;
-      fval[8] = f[2].z;
-    }
-
-    return true;
-  }
-  else if (type == TypeDesc::TypeFloat) {
-    float *fval = (float *)val;
-    fval[0] = average(f[0]);
-
-    if (derivatives) {
-      fval[1] = average(f[1]);
-      fval[2] = average(f[2]);
-    }
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void *val)
-{
-  float3 fv[3];
-
-  fv[0] = f;
-  fv[1] = make_float3(0.0f, 0.0f, 0.0f);
-  fv[2] = make_float3(0.0f, 0.0f, 0.0f);
-
-  return set_attribute_float3(fv, type, derivatives, val);
-}
-
-/* Attributes with the TypeRGBA type descriptor should be retrieved and stored
- * in a float array of size 4 (e.g. node_vertex_color.osl), this array have
- * a type descriptor TypeFloatArray4. If the storage is not a TypeFloatArray4,
- * we either store the first three components in a vector, store the average of
- * the components in a float, or fail the retrieval and do nothing. We allow
- * this for the correct operation of the Attribute node.
- */
-
-static bool set_attribute_float4(float4 f[3], TypeDesc type, bool derivatives, void *val)
-{
-  float *fval = (float *)val;
-  if (type == TypeFloatArray4) {
-    fval[0] = f[0].x;
-    fval[1] = f[0].y;
-    fval[2] = f[0].z;
-    fval[3] = f[0].w;
-
-    if (derivatives) {
-      fval[4] = f[1].x;
-      fval[5] = f[1].y;
-      fval[6] = f[1].z;
-      fval[7] = f[1].w;
-
-      fval[8] = f[2].x;
-      fval[9] = f[2].y;
-      fval[10] = f[2].z;
-      fval[11] = f[2].w;
-    }
-    return true;
-  }
-  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
-    fval[0] = f[0].x;
-    fval[1] = f[0].y;
-    fval[2] = f[0].z;
-
-    if (derivatives) {
-      fval[3] = f[1].x;
-      fval[4] = f[1].y;
-      fval[5] = f[1].z;
-
-      fval[6] = f[2].x;
-      fval[7] = f[2].y;
-      fval[8] = f[2].z;
-    }
-    return true;
-  }
-  else if (type == TypeDesc::TypeFloat) {
-    fval[0] = average(float4_to_float3(f[0]));
-
-    if (derivatives) {
-      fval[1] = average(float4_to_float3(f[1]));
-      fval[2] = average(float4_to_float3(f[2]));
-    }
-    return true;
-  }
-  return false;
-}
-
-static bool set_attribute_float4(float4 f, TypeDesc type, bool derivatives, void *val)
-{
-  float4 fv[3];
-
-  fv[0] = f;
-  fv[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  fv[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-  return set_attribute_float4(fv, type, derivatives, val);
-}
-
-static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
-{
-  if (type == TypeFloatArray4) {
-    float *fval = (float *)val;
-    fval[0] = f[0];
-    fval[1] = f[0];
-    fval[2] = f[0];
-    fval[3] = 1.0f;
-
-    if (derivatives) {
-      fval[4] = f[1];
-      fval[5] = f[1];
-      fval[6] = f[1];
-      fval[7] = 0.0f;
-
-      fval[8] = f[2];
-      fval[9] = f[2];
-      fval[10] = f[2];
-      fval[11] = 0.0f;
-    }
-    return true;
-  }
-  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
-           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
-    float *fval = (float *)val;
-    fval[0] = f[0];
-    fval[1] = f[0];
-    fval[2] = f[0];
-
-    if (derivatives) {
-      fval[3] = f[1];
-      fval[4] = f[1];
-      fval[5] = f[1];
-
-      fval[6] = f[2];
-      fval[7] = f[2];
-      fval[8] = f[2];
-    }
-
-    return true;
-  }
-  else if (type == TypeDesc::TypeFloat) {
-    float *fval = (float *)val;
-    fval[0] = f[0];
-
-    if (derivatives) {
-      fval[1] = f[1];
-      fval[2] = f[2];
-    }
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool set_attribute_float(float f, TypeDesc type, bool derivatives, void *val)
-{
-  float fv[3];
-
-  fv[0] = f;
-  fv[1] = 0.0f;
-  fv[2] = 0.0f;
-
-  return set_attribute_float(fv, type, derivatives, val);
-}
-
-static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
-{
-  if (type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
-    int *ival = (int *)val;
-    ival[0] = i;
-
-    if (derivatives) {
-      ival[1] = 0;
-      ival[2] = 0;
-    }
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
-{
-  if (type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR &&
-      type.arraylen == 0) {
-    ustring *sval = (ustring *)val;
-    sval[0] = str;
-
-    if (derivatives) {
-      sval[1] = OSLRenderServices::u_empty;
-      sval[2] = OSLRenderServices::u_empty;
-    }
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, void *val)
-{
-  if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
-    float *fval = (float *)val;
-
-    fval[0] = P[0].x;
-    fval[1] = P[0].y;
-    fval[2] = P[0].z;
-
-    fval[3] = P[1].x;
-    fval[4] = P[1].y;
-    fval[5] = P[1].z;
-
-    fval[6] = P[2].x;
-    fval[7] = P[2].y;
-    fval[8] = P[2].z;
-
-    if (type.arraylen > 3)
-      memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3));
-    if (derivatives)
-      memset(fval + type.arraylen * 3, 0, sizeof(float) * 2 * 3 * type.arraylen);
-
-    return true;
-  }
-
-  return false;
-}
-
-static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val)
-{
-  if (type == TypeDesc::TypeMatrix) {
-    copy_matrix(*(OSL::Matrix44 *)val, tfm);
-    return true;
-  }
-
-  return false;
-}
-
-static bool get_primitive_attribute(const KernelGlobalsCPU *kg,
-                                    const ShaderData *sd,
-                                    const OSLGlobals::Attribute &attr,
-                                    const TypeDesc &type,
-                                    bool derivatives,
-                                    void *val)
-{
-  if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
-      attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
-    float3 fval[3];
-    if (primitive_is_volume_attribute(sd, attr.desc)) {
-      fval[0] = primitive_volume_attribute_float3(kg, sd, attr.desc);
-    }
-    else {
-      memset(fval, 0, sizeof(fval));
-      fval[0] = primitive_surface_attribute_float3(
-          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-    }
-    return set_attribute_float3(fval, type, derivatives, val);
-  }
-  else if (attr.type == TypeFloat2) {
-    if (primitive_is_volume_attribute(sd, attr.desc)) {
-      assert(!"Float2 attribute not support for volumes");
-      return false;
-    }
-    else {
-      float2 fval[3];
-      fval[0] = primitive_surface_attribute_float2(
-          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-      return set_attribute_float2(fval, type, derivatives, val);
-    }
-  }
-  else if (attr.type == TypeDesc::TypeFloat) {
-    float fval[3];
-    if (primitive_is_volume_attribute(sd, attr.desc)) {
-      memset(fval, 0, sizeof(fval));
-      fval[0] = primitive_volume_attribute_float(kg, sd, attr.desc);
-    }
-    else {
-      fval[0] = primitive_surface_attribute_float(
-          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-    }
-    return set_attribute_float(fval, type, derivatives, val);
-  }
-  else if (attr.type == TypeDesc::TypeFloat4 || attr.type == TypeRGBA) {
-    float4 fval[3];
-    if (primitive_is_volume_attribute(sd, attr.desc)) {
-      memset(fval, 0, sizeof(fval));
-      fval[0] = primitive_volume_attribute_float4(kg, sd, attr.desc);
-    }
-    else {
-      fval[0] = primitive_surface_attribute_float4(
-          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
-    }
-    return set_attribute_float4(fval, type, derivatives, val);
-  }
-  else {
-    return false;
-  }
-}
-
-static bool get_mesh_attribute(const KernelGlobalsCPU *kg,
-                               const ShaderData *sd,
-                               const OSLGlobals::Attribute &attr,
-                               const TypeDesc &type,
-                               bool derivatives,
-                               void *val)
-{
-  if (attr.type == TypeDesc::TypeMatrix) {
-    Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
-    return set_attribute_matrix(tfm, type, val);
-  }
-  else {
-    return false;
-  }
-}
-
-static bool get_object_attribute(const OSLGlobals::Attribute &attr,
-                                 TypeDesc type,
-                                 bool derivatives,
-                                 void *val)
-{
-  if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
-      attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
-    return set_attribute_float3(*(float3 *)attr.value.data(), type, derivatives, val);
-  }
-  else if (attr.type == TypeFloat2) {
-    return set_attribute_float2(*(float2 *)attr.value.data(), type, derivatives, val);
-  }
-  else if (attr.type == TypeDesc::TypeFloat) {
-    return set_attribute_float(*(float *)attr.value.data(), type, derivatives, val);
-  }
-  else if (attr.type == TypeRGBA || attr.type == TypeDesc::TypeFloat4) {
-    return set_attribute_float4(*(float4 *)attr.value.data(), type, derivatives, val);
-  }
-  else if (attr.type == type) {
-    size_t datasize = attr.value.datasize();
-
-    memcpy(val, attr.value.data(), datasize);
-    if (derivatives) {
-      memset((char *)val + datasize, 0, datasize * 2);
-    }
-
-    return true;
-  }
-  else {
-    return false;
-  }
-}
-
-bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg,
-                                                      ShaderData *sd,
-                                                      ustring name,
-                                                      TypeDesc type,
-                                                      bool derivatives,
-                                                      void *val)
-{
-  /* todo: turn this into hash table? */
-
-  /* Object Attributes */
-  if (name == u_object_location) {
-    float3 f = object_location(kg, sd);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-  else if (name == u_object_color) {
-    float3 f = object_color(kg, sd->object);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-  else if (name == u_object_index) {
-    float f = object_pass_id(kg, sd->object);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_geom_dupli_generated) {
-    float3 f = object_dupli_generated(kg, sd->object);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-  else if (name == u_geom_dupli_uv) {
-    float3 f = object_dupli_uv(kg, sd->object);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-  else if (name == u_material_index) {
-    float f = shader_pass_id(kg, sd);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_object_random) {
-    float f = object_random_number(kg, sd->object);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-
-  /* Particle Attributes */
-  else if (name == u_particle_index) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float f = particle_index(kg, particle_id);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_particle_random) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float f = hash_uint2_to_float(particle_index(kg, particle_id), 0);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-
-  else if (name == u_particle_age) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float f = particle_age(kg, particle_id);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_particle_lifetime) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float f = particle_lifetime(kg, particle_id);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_particle_location) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float3 f = particle_location(kg, particle_id);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-#if 0 /* unsupported */
-  else if (name == u_particle_rotation) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float4 f = particle_rotation(kg, particle_id);
-    return set_attribute_float4(f, type, derivatives, val);
-  }
-#endif
-  else if (name == u_particle_size) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float f = particle_size(kg, particle_id);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_particle_velocity) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float3 f = particle_velocity(kg, particle_id);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-  else if (name == u_particle_angular_velocity) {
-    int particle_id = object_particle_id(kg, sd->object);
-    float3 f = particle_angular_velocity(kg, particle_id);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-
-  /* Geometry Attributes */
-  else if (name == u_geom_numpolyvertices) {
-    return set_attribute_int(3, type, derivatives, val);
-  }
-  else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) &&
-           sd->type & PRIMITIVE_ALL_TRIANGLE) {
-    float3 P[3];
-
-    if (sd->type & PRIMITIVE_TRIANGLE)
-      triangle_vertices(kg, sd->prim, P);
-    else
-      motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
-
-    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      object_position_transform(kg, sd, &P[0]);
-      object_position_transform(kg, sd, &P[1]);
-      object_position_transform(kg, sd, &P[2]);
-    }
-
-    return set_attribute_float3_3(P, type, derivatives, val);
-  }
-  else if (name == u_geom_name) {
-    ustring object_name = kg->osl->object_names[sd->object];
-    return set_attribute_string(object_name, type, derivatives, val);
-  }
-  else if (name == u_is_smooth) {
-    float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  /* Hair Attributes */
-  else if (name == u_is_curve) {
-    float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_curve_thickness) {
-    float f = curve_thickness(kg, sd);
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_curve_tangent_normal) {
-    float3 f = curve_tangent_normal(kg, sd);
-    return set_attribute_float3(f, type, derivatives, val);
-  }
-  else if (name == u_normal_map_normal) {
-    if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
-      float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
-      return set_attribute_float3(f, type, derivatives, val);
-    }
-    else {
-      return false;
-    }
-  }
-  else {
-    return false;
-  }
-}
-
-bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
-                                                 ShaderData *sd,
-                                                 ustring name,
-                                                 TypeDesc type,
-                                                 bool derivatives,
-                                                 void *val)
-{
-  if (name == u_path_ray_length) {
-    /* Ray Length */
-    float f = sd->ray_length;
-    return set_attribute_float(f, type, derivatives, val);
-  }
-  else if (name == u_path_ray_depth) {
-    /* Ray Depth */
-    const IntegratorStateCPU *state = sd->osl_path_state;
-    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
-    int f = (state) ? state->path.bounce : (shadow_state) ? shadow_state->shadow_path.bounce : 0;
-    return set_attribute_int(f, type, derivatives, val);
-  }
-  else if (name == u_path_diffuse_depth) {
-    /* Diffuse Ray Depth */
-    const IntegratorStateCPU *state = sd->osl_path_state;
-    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
-    int f = (state)        ? state->path.diffuse_bounce :
-            (shadow_state) ? shadow_state->shadow_path.diffuse_bounce :
-                             0;
-    return set_attribute_int(f, type, derivatives, val);
-  }
-  else if (name == u_path_glossy_depth) {
-    /* Glossy Ray Depth */
-    const IntegratorStateCPU *state = sd->osl_path_state;
-    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
-    int f = (state)        ? state->path.glossy_bounce :
-            (shadow_state) ? shadow_state->shadow_path.glossy_bounce :
-                             0;
-    return set_attribute_int(f, type, derivatives, val);
-  }
-  else if (name == u_path_transmission_depth) {
-    /* Transmission Ray Depth */
-    const IntegratorStateCPU *state = sd->osl_path_state;
-    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
-    int f = (state)        ? state->path.transmission_bounce :
-            (shadow_state) ? shadow_state->shadow_path.transmission_bounce :
-                             0;
-    return set_attribute_int(f, type, derivatives, val);
-  }
-  else if (name == u_path_transparent_depth) {
-    /* Transparent Ray Depth */
-    const IntegratorStateCPU *state = sd->osl_path_state;
-    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
-    int f = (state)        ? state->path.transparent_bounce :
-            (shadow_state) ? shadow_state->shadow_path.transparent_bounce :
-                             0;
-    return set_attribute_int(f, type, derivatives, val);
-  }
-  else if (name == u_ndc) {
-    /* NDC coordinates with special exception for orthographic projection. */
-    OSLThreadData *tdata = kg->osl_tdata;
-    OSL::ShaderGlobals *globals = &tdata->globals;
-    float3 ndc[3];
-
-    if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
-        kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
-      ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
-
-      if (derivatives) {
-        ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) -
-                 ndc[0];
-        ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) -
-                 ndc[0];
-      }
-    }
-    else {
-      ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
-
-      if (derivatives) {
-        ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
-        ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
-      }
-    }
-
-    return set_attribute_float3(ndc, type, derivatives, val);
-  }
-  else
-    return false;
-}
-
-bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg,
-                                      bool derivatives,
-                                      ustring object_name,
-                                      TypeDesc type,
-                                      ustring name,
-                                      void *val)
-{
-  if (sg == NULL || sg->renderstate == NULL)
-    return false;
-
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-  return get_attribute(sd, derivatives, object_name, type, name, val);
-}
-
-bool OSLRenderServices::get_attribute(
-    ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val)
-{
-  const KernelGlobalsCPU *kg = sd->osl_globals;
-  int prim_type = 0;
-  int object;
-
-  /* lookup of attribute on another object */
-  if (object_name != u_empty) {
-    OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
-
-    if (it == kg->osl->object_name_map.end())
-      return false;
-
-    object = it->second;
-  }
-  else {
-    object = sd->object;
-    prim_type = attribute_primitive_type(kg, sd);
-
-    if (object == OBJECT_NONE)
-      return get_background_attribute(kg, sd, name, type, derivatives, val);
-  }
-
-  /* find attribute on object */
-  object = object * ATTR_PRIM_TYPES + prim_type;
-  OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object];
-  OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
-
-  if (it != attribute_map.end()) {
-    const OSLGlobals::Attribute &attr = it->second;
-
-    if (attr.desc.element != ATTR_ELEMENT_OBJECT) {
-      /* triangle and vertex attributes */
-      if (get_primitive_attribute(kg, sd, attr, type, derivatives, val))
-        return true;
-      else
-        return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
-    }
-    else {
-      /* object attribute */
-      return get_object_attribute(attr, type, derivatives, val);
-    }
-  }
-  else {
-    /* not found in attribute, check standard object info */
-    bool is_std_object_attribute = get_object_standard_attribute(
-        kg, sd, name, type, derivatives, val);
-
-    if (is_std_object_attribute)
-      return true;
-
-    return get_background_attribute(kg, sd, name, type, derivatives, val);
-  }
-
-  return false;
-}
-
-bool OSLRenderServices::get_userdata(
-    bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val)
-{
-  return false; /* disabled by lockgeom */
-}
-
-#if OSL_LIBRARY_VERSION_CODE >= 11100
-TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename,
-                                                                    OSL::ShadingContext *)
-#else
-
-TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename)
-#endif
-{
-  OSLTextureHandleMap::iterator it = textures.find(filename);
-
-  /* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
-  if (it != textures.end()) {
-    if (it->second->type != OSLTextureHandle::OIIO) {
-      return (TextureSystem::TextureHandle *)it->second.get();
-    }
-  }
-
-  /* Get handle from OpenImageIO. */
-  OSL::TextureSystem *ts = texture_system;
-  TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
-  if (handle == NULL) {
-    return NULL;
-  }
-
-  /* Insert new OSLTextureHandle if needed. */
-  if (it == textures.end()) {
-    textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
-    it = textures.find(filename);
-  }
-
-  /* Assign OIIO texture handle and return. */
-  it->second->oiio_handle = handle;
-  return (TextureSystem::TextureHandle *)it->second.get();
-}
-
-bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
-{
-  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
-
-  if (handle->oiio_handle) {
-    OSL::TextureSystem *ts = texture_system;
-    return ts->good(handle->oiio_handle);
-  }
-  else {
-    return true;
-  }
-}
-
-bool OSLRenderServices::texture(ustring filename,
-                                TextureHandle *texture_handle,
-                                TexturePerthread *texture_thread_info,
-                                TextureOpt &options,
-                                OSL::ShaderGlobals *sg,
-                                float s,
-                                float t,
-                                float dsdx,
-                                float dtdx,
-                                float dsdy,
-                                float dtdy,
-                                int nchannels,
-                                float *result,
-                                float *dresultds,
-                                float *dresultdt,
-                                ustring *errormessage)
-{
-  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
-  OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-  KernelGlobals kernel_globals = sd->osl_globals;
-  bool status = false;
-
-  switch (texture_type) {
-    case OSLTextureHandle::BEVEL: {
-      /* Bevel shader hack. */
-      if (nchannels >= 3) {
-        const IntegratorStateCPU *state = sd->osl_path_state;
-        if (state) {
-          int num_samples = (int)s;
-          float radius = t;
-          float3 N = svm_bevel(kernel_globals, state, sd, radius, num_samples);
-          result[0] = N.x;
-          result[1] = N.y;
-          result[2] = N.z;
-          status = true;
-        }
-      }
-      break;
-    }
-    case OSLTextureHandle::AO: {
-      /* AO shader hack. */
-      const IntegratorStateCPU *state = sd->osl_path_state;
-      if (state) {
-        int num_samples = (int)s;
-        float radius = t;
-        float3 N = make_float3(dsdx, dtdx, dsdy);
-        int flags = 0;
-        if ((int)dtdy) {
-          flags |= NODE_AO_INSIDE;
-        }
-        if ((int)options.sblur) {
-          flags |= NODE_AO_ONLY_LOCAL;
-        }
-        if ((int)options.tblur) {
-          flags |= NODE_AO_GLOBAL_RADIUS;
-        }
-        result[0] = svm_ao(kernel_globals, state, sd, N, radius, num_samples, flags);
-        status = true;
-      }
-      break;
-    }
-    case OSLTextureHandle::SVM: {
-      /* Packed texture. */
-      float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t);
-
-      result[0] = rgba[0];
-      if (nchannels > 1)
-        result[1] = rgba[1];
-      if (nchannels > 2)
-        result[2] = rgba[2];
-      if (nchannels > 3)
-        result[3] = rgba[3];
-      status = true;
-      break;
-    }
-    case OSLTextureHandle::IES: {
-      /* IES light. */
-      result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t);
-      status = true;
-      break;
-    }
-    case OSLTextureHandle::OIIO: {
-      /* OpenImageIO texture cache. */
-      OSL::TextureSystem *ts = texture_system;
-
-      if (handle && handle->oiio_handle) {
-        if (texture_thread_info == NULL) {
-          OSLThreadData *tdata = kernel_globals->osl_tdata;
-          texture_thread_info = tdata->oiio_thread_info;
-        }
-
-        status = ts->texture(handle->oiio_handle,
-                             texture_thread_info,
-                             options,
-                             s,
-                             t,
-                             dsdx,
-                             dtdx,
-                             dsdy,
-                             dtdy,
-                             nchannels,
-                             result,
-                             dresultds,
-                             dresultdt);
-      }
-      else {
-        status = ts->texture(filename,
-                             options,
-                             s,
-                             t,
-                             dsdx,
-                             dtdx,
-                             dsdy,
-                             dtdy,
-                             nchannels,
-                             result,
-                             dresultds,
-                             dresultdt);
-      }
-
-      if (!status) {
-        /* This might be slow, but prevents error messages leak and
-         * other nasty stuff happening. */
-        ts->geterror();
-      }
-      else if (handle && handle->processor) {
-        ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
-      }
-      break;
-    }
-  }
-
-  if (!status) {
-    if (nchannels == 3 || nchannels == 4) {
-      result[0] = 1.0f;
-      result[1] = 0.0f;
-      result[2] = 1.0f;
-
-      if (nchannels == 4)
-        result[3] = 1.0f;
-    }
-  }
-
-  return status;
-}
-
-bool OSLRenderServices::texture3d(ustring filename,
-                                  TextureHandle *texture_handle,
-                                  TexturePerthread *texture_thread_info,
-                                  TextureOpt &options,
-                                  OSL::ShaderGlobals *sg,
-                                  const OSL::Vec3 &P,
-                                  const OSL::Vec3 &dPdx,
-                                  const OSL::Vec3 &dPdy,
-                                  const OSL::Vec3 &dPdz,
-                                  int nchannels,
-                                  float *result,
-                                  float *dresultds,
-                                  float *dresultdt,
-                                  float *dresultdr,
-                                  ustring *errormessage)
-{
-  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
-  OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
-  bool status = false;
-
-  switch (texture_type) {
-    case OSLTextureHandle::SVM: {
-      /* Packed texture. */
-      ShaderData *sd = (ShaderData *)(sg->renderstate);
-      KernelGlobals kernel_globals = sd->osl_globals;
-      int slot = handle->svm_slot;
-      float3 P_float3 = make_float3(P.x, P.y, P.z);
-      float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
-
-      result[0] = rgba[0];
-      if (nchannels > 1)
-        result[1] = rgba[1];
-      if (nchannels > 2)
-        result[2] = rgba[2];
-      if (nchannels > 3)
-        result[3] = rgba[3];
-      status = true;
-      break;
-    }
-    case OSLTextureHandle::OIIO: {
-      /* OpenImageIO texture cache. */
-      OSL::TextureSystem *ts = texture_system;
-
-      if (handle && handle->oiio_handle) {
-        if (texture_thread_info == NULL) {
-          ShaderData *sd = (ShaderData *)(sg->renderstate);
-          KernelGlobals kernel_globals = sd->osl_globals;
-          OSLThreadData *tdata = kernel_globals->osl_tdata;
-          texture_thread_info = tdata->oiio_thread_info;
-        }
-
-        status = ts->texture3d(handle->oiio_handle,
-                               texture_thread_info,
-                               options,
-                               P,
-                               dPdx,
-                               dPdy,
-                               dPdz,
-                               nchannels,
-                               result,
-                               dresultds,
-                               dresultdt,
-                               dresultdr);
-      }
-      else {
-        status = ts->texture3d(filename,
-                               options,
-                               P,
-                               dPdx,
-                               dPdy,
-                               dPdz,
-                               nchannels,
-                               result,
-                               dresultds,
-                               dresultdt,
-                               dresultdr);
-      }
-
-      if (!status) {
-        /* This might be slow, but prevents error messages leak and
-         * other nasty stuff happening. */
-        ts->geterror();
-      }
-      else if (handle && handle->processor) {
-        ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
-      }
-      break;
-    }
-    case OSLTextureHandle::IES:
-    case OSLTextureHandle::AO:
-    case OSLTextureHandle::BEVEL: {
-      status = false;
-      break;
-    }
-  }
-
-  if (!status) {
-    if (nchannels == 3 || nchannels == 4) {
-      result[0] = 1.0f;
-      result[1] = 0.0f;
-      result[2] = 1.0f;
-
-      if (nchannels == 4)
-        result[3] = 1.0f;
-    }
-  }
-
-  return status;
-}
-
-bool OSLRenderServices::environment(ustring filename,
-                                    TextureHandle *texture_handle,
-                                    TexturePerthread *thread_info,
-                                    TextureOpt &options,
-                                    OSL::ShaderGlobals *sg,
-                                    const OSL::Vec3 &R,
-                                    const OSL::Vec3 &dRdx,
-                                    const OSL::Vec3 &dRdy,
-                                    int nchannels,
-                                    float *result,
-                                    float *dresultds,
-                                    float *dresultdt,
-                                    ustring *errormessage)
-{
-  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
-  OSL::TextureSystem *ts = texture_system;
-  bool status = false;
-
-  if (handle && handle->oiio_handle) {
-    if (thread_info == NULL) {
-      ShaderData *sd = (ShaderData *)(sg->renderstate);
-      KernelGlobals kernel_globals = sd->osl_globals;
-      OSLThreadData *tdata = kernel_globals->osl_tdata;
-      thread_info = tdata->oiio_thread_info;
-    }
-
-    status = ts->environment(handle->oiio_handle,
-                             thread_info,
-                             options,
-                             R,
-                             dRdx,
-                             dRdy,
-                             nchannels,
-                             result,
-                             dresultds,
-                             dresultdt);
-  }
-  else {
-    status = ts->environment(
-        filename, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt);
-  }
-
-  if (!status) {
-    if (nchannels == 3 || nchannels == 4) {
-      result[0] = 1.0f;
-      result[1] = 0.0f;
-      result[2] = 1.0f;
-
-      if (nchannels == 4)
-        result[3] = 1.0f;
-    }
-  }
-  else if (handle && handle->processor) {
-    ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
-  }
-
-  return status;
-}
-
-#if OSL_LIBRARY_VERSION_CODE >= 11100
-bool OSLRenderServices::get_texture_info(ustring filename,
-                                         TextureHandle *texture_handle,
-                                         TexturePerthread *,
-                                         OSL::ShadingContext *,
-                                         int subimage,
-                                         ustring dataname,
-                                         TypeDesc datatype,
-                                         void *data,
-                                         ustring *)
-#else
-bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
-                                         ustring filename,
-                                         TextureHandle *texture_handle,
-                                         int subimage,
-                                         ustring dataname,
-                                         TypeDesc datatype,
-                                         void *data)
-#endif
-{
-  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
-
-  /* No texture info for other texture types. */
-  if (handle && handle->type != OSLTextureHandle::OIIO) {
-    return false;
-  }
-
-  /* Get texture info from OpenImageIO. */
-  OSL::TextureSystem *ts = texture_system;
-  return ts->get_texture_info(filename, subimage, dataname, datatype, data);
-}
-
-int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg,
-                                         ustring filename,
-                                         const OSL::Vec3 &center,
-                                         float radius,
-                                         int max_points,
-                                         bool sort,
-                                         size_t *out_indices,
-                                         float *out_distances,
-                                         int derivs_offset)
-{
-  return 0;
-}
-
-int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg,
-                                      ustring filename,
-                                      size_t *indices,
-                                      int count,
-                                      ustring attr_name,
-                                      TypeDesc attr_type,
-                                      void *out_data)
-{
-  return 0;
-}
-
-bool OSLRenderServices::pointcloud_write(OSL::ShaderGlobals *sg,
-                                         ustring filename,
-                                         const OSL::Vec3 &pos,
-                                         int nattribs,
-                                         const ustring *names,
-                                         const TypeDesc *types,
-                                         const void **data)
-{
-  return false;
-}
-
-bool OSLRenderServices::trace(TraceOpt &options,
-                              OSL::ShaderGlobals *sg,
-                              const OSL::Vec3 &P,
-                              const OSL::Vec3 &dPdx,
-                              const OSL::Vec3 &dPdy,
-                              const OSL::Vec3 &R,
-                              const OSL::Vec3 &dRdx,
-                              const OSL::Vec3 &dRdy)
-{
-  /* todo: options.shader support, maybe options.traceset */
-  ShaderData *sd = (ShaderData *)(sg->renderstate);
-
-  /* setup ray */
-  Ray ray;
-
-  ray.P = TO_FLOAT3(P);
-  ray.D = TO_FLOAT3(R);
-  ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
-  ray.time = sd->time;
-
-  if (options.mindist == 0.0f) {
-    /* avoid self-intersections */
-    if (ray.P == sd->P) {
-      bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
-      ray.P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng);
-    }
-  }
-  else {
-    /* offset for minimum distance */
-    ray.P += options.mindist * ray.D;
-  }
-
-  /* ray differentials */
-  differential3 dP;
-  dP.dx = TO_FLOAT3(dPdx);
-  dP.dy = TO_FLOAT3(dPdy);
-  ray.dP = differential_make_compact(dP);
-  differential3 dD;
-  dD.dx = TO_FLOAT3(dRdx);
-  dD.dy = TO_FLOAT3(dRdy);
-  ray.dD = differential_make_compact(dD);
-
-  /* allocate trace data */
-  OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
-  tracedata->ray = ray;
-  tracedata->setup = false;
-  tracedata->init = true;
-  tracedata->hit = false;
-  tracedata->sd.osl_globals = sd->osl_globals;
-
-  const KernelGlobalsCPU *kg = sd->osl_globals;
-
-  /* Can't raytrace from shaders like displacement, before BVH exists. */
-  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
-    return false;
-  }
-
-  /* Raytrace, leaving out shadow opaque to avoid early exit. */
-  uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
-  tracedata->hit = scene_intersect(kg, &ray, visibility, &tracedata->isect);
-  return tracedata->hit;
-}
-
-bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
-                                   ustring source,
-                                   ustring name,
-                                   TypeDesc type,
-                                   void *val,
-                                   bool derivatives)
-{
-  OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
-
-  if (source == u_trace && tracedata->init) {
-    if (name == u_hit) {
-      return set_attribute_int(tracedata->hit, type, derivatives, val);
-    }
-    else if (tracedata->hit) {
-      if (name == u_hitdist) {
-        float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
-        return set_attribute_float(f, type, derivatives, val);
-      }
-      else {
-        ShaderData *sd = &tracedata->sd;
-        const KernelGlobalsCPU *kg = sd->osl_globals;
-
-        if (!tracedata->setup) {
-          /* lazy shader data setup */
-          shader_setup_from_ray(kg, sd, &tracedata->ray, &tracedata->isect);
-          tracedata->setup = true;
-        }
-
-        if (name == u_N) {
-          return set_attribute_float3(sd->N, type, derivatives, val);
-        }
-        else if (name == u_Ng) {
-          return set_attribute_float3(sd->Ng, type, derivatives, val);
-        }
-        else if (name == u_P) {
-          float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
-          return set_attribute_float3(f, type, derivatives, val);
-        }
-        else if (name == u_I) {
-          float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
-          return set_attribute_float3(f, type, derivatives, val);
-        }
-        else if (name == u_u) {
-          float f[3] = {sd->u, sd->du.dx, sd->du.dy};
-          return set_attribute_float(f, type, derivatives, val);
-        }
-        else if (name == u_v) {
-          float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
-          return set_attribute_float(f, type, derivatives, val);
-        }
-
-        return get_attribute(sd, derivatives, u_empty, type, name, val);
-      }
-    }
-  }
-
-  return false;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
deleted file mode 100644
index d9f57c642ad..00000000000
--- a/intern/cycles/kernel/osl/osl_services.h
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __OSL_SERVICES_H__
-#define __OSL_SERVICES_H__
-
-/* OSL Render Services
- *
- * Implementation of OSL render services, to retriever matrices, attributes,
- * textures and point clouds. In principle this should only be accessing
- * kernel data, but currently we also reach back into the Scene to retrieve
- * attributes.
- */
-
-#include <OSL/oslclosure.h>
-#include <OSL/oslexec.h>
-#include <OSL/rendererservices.h>
-
-#ifdef WITH_PTEX
-class PtexCache;
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-class Object;
-class Scene;
-class Shader;
-struct ShaderData;
-struct float3;
-struct KernelGlobalsCPU;
-
-/* OSL Texture Handle
- *
- * OSL texture lookups are string based. If those strings are known at compile
- * time, the OSL compiler can cache a texture handle to use instead of a string.
- *
- * By default it uses TextureSystem::TextureHandle. But since we want to support
- * different kinds of textures and color space conversions, this is our own handle
- * with additional data.
- *
- * These are stored in a concurrent hash map, because OSL can compile multiple
- * shaders in parallel. */
-
-struct OSLTextureHandle : public OIIO::RefCnt {
-  enum Type { OIIO, SVM, IES, BEVEL, AO };
-
-  OSLTextureHandle(Type type = OIIO, int svm_slot = -1)
-      : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL)
-  {
-  }
-
-  Type type;
-  int svm_slot;
-  OSL::TextureSystem::TextureHandle *oiio_handle;
-  ColorSpaceProcessor *processor;
-};
-
-typedef OIIO::intrusive_ptr<OSLTextureHandle> OSLTextureHandleRef;
-typedef OIIO::unordered_map_concurrent<ustring, OSLTextureHandleRef, ustringHash>
-    OSLTextureHandleMap;
-
-/* OSL Render Services
- *
- * Interface for OSL to access attributes, textures and other scene data. */
-
-class OSLRenderServices : public OSL::RendererServices {
- public:
-  OSLRenderServices(OSL::TextureSystem *texture_system);
-  ~OSLRenderServices();
-
-  bool get_matrix(OSL::ShaderGlobals *sg,
-                  OSL::Matrix44 &result,
-                  OSL::TransformationPtr xform,
-                  float time) override;
-  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
-                          OSL::Matrix44 &result,
-                          OSL::TransformationPtr xform,
-                          float time) override;
-
-  bool get_matrix(OSL::ShaderGlobals *sg,
-                  OSL::Matrix44 &result,
-                  ustring from,
-                  float time) override;
-  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
-                          OSL::Matrix44 &result,
-                          ustring to,
-                          float time) override;
-
-  bool get_matrix(OSL::ShaderGlobals *sg,
-                  OSL::Matrix44 &result,
-                  OSL::TransformationPtr xform) override;
-  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
-                          OSL::Matrix44 &result,
-                          OSL::TransformationPtr xform) override;
-
-  bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
-  bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
-
-  bool get_array_attribute(OSL::ShaderGlobals *sg,
-                           bool derivatives,
-                           ustring object,
-                           TypeDesc type,
-                           ustring name,
-                           int index,
-                           void *val) override;
-  bool get_attribute(OSL::ShaderGlobals *sg,
-                     bool derivatives,
-                     ustring object,
-                     TypeDesc type,
-                     ustring name,
-                     void *val) override;
-  bool get_attribute(ShaderData *sd,
-                     bool derivatives,
-                     ustring object_name,
-                     TypeDesc type,
-                     ustring name,
-                     void *val);
-
-  bool get_userdata(
-      bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val) override;
-
-  int pointcloud_search(OSL::ShaderGlobals *sg,
-                        ustring filename,
-                        const OSL::Vec3 &center,
-                        float radius,
-                        int max_points,
-                        bool sort,
-                        size_t *out_indices,
-                        float *out_distances,
-                        int derivs_offset) override;
-
-  int pointcloud_get(OSL::ShaderGlobals *sg,
-                     ustring filename,
-                     size_t *indices,
-                     int count,
-                     ustring attr_name,
-                     TypeDesc attr_type,
-                     void *out_data) override;
-
-  bool pointcloud_write(OSL::ShaderGlobals *sg,
-                        ustring filename,
-                        const OSL::Vec3 &pos,
-                        int nattribs,
-                        const ustring *names,
-                        const TypeDesc *types,
-                        const void **data) override;
-
-  bool trace(TraceOpt &options,
-             OSL::ShaderGlobals *sg,
-             const OSL::Vec3 &P,
-             const OSL::Vec3 &dPdx,
-             const OSL::Vec3 &dPdy,
-             const OSL::Vec3 &R,
-             const OSL::Vec3 &dRdx,
-             const OSL::Vec3 &dRdy) override;
-
-  bool getmessage(OSL::ShaderGlobals *sg,
-                  ustring source,
-                  ustring name,
-                  TypeDesc type,
-                  void *val,
-                  bool derivatives) override;
-
-#if OSL_LIBRARY_VERSION_CODE >= 11100
-  TextureSystem::TextureHandle *get_texture_handle(ustring filename,
-                                                   OSL::ShadingContext *context) override;
-#else
-  TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
-#endif
-
-  bool good(TextureSystem::TextureHandle *texture_handle) override;
-
-  bool texture(ustring filename,
-               TextureSystem::TextureHandle *texture_handle,
-               TexturePerthread *texture_thread_info,
-               TextureOpt &options,
-               OSL::ShaderGlobals *sg,
-               float s,
-               float t,
-               float dsdx,
-               float dtdx,
-               float dsdy,
-               float dtdy,
-               int nchannels,
-               float *result,
-               float *dresultds,
-               float *dresultdt,
-               ustring *errormessage) override;
-
-  bool texture3d(ustring filename,
-                 TextureHandle *texture_handle,
-                 TexturePerthread *texture_thread_info,
-                 TextureOpt &options,
-                 OSL::ShaderGlobals *sg,
-                 const OSL::Vec3 &P,
-                 const OSL::Vec3 &dPdx,
-                 const OSL::Vec3 &dPdy,
-                 const OSL::Vec3 &dPdz,
-                 int nchannels,
-                 float *result,
-                 float *dresultds,
-                 float *dresultdt,
-                 float *dresultdr,
-                 ustring *errormessage) override;
-
-  bool environment(ustring filename,
-                   TextureHandle *texture_handle,
-                   TexturePerthread *texture_thread_info,
-                   TextureOpt &options,
-                   OSL::ShaderGlobals *sg,
-                   const OSL::Vec3 &R,
-                   const OSL::Vec3 &dRdx,
-                   const OSL::Vec3 &dRdy,
-                   int nchannels,
-                   float *result,
-                   float *dresultds,
-                   float *dresultdt,
-                   ustring *errormessage) override;
-
-#if OSL_LIBRARY_VERSION_CODE >= 11100
-  bool get_texture_info(ustring filename,
-                        TextureHandle *texture_handle,
-                        TexturePerthread *texture_thread_info,
-                        OSL::ShadingContext *shading_context,
-                        int subimage,
-                        ustring dataname,
-                        TypeDesc datatype,
-                        void *data,
-                        ustring *errormessage) override;
-#else
-  bool get_texture_info(OSL::ShaderGlobals *sg,
-                        ustring filename,
-                        TextureHandle *texture_handle,
-                        int subimage,
-                        ustring dataname,
-                        TypeDesc datatype,
-                        void *data) override;
-#endif
-
-  static bool get_background_attribute(const KernelGlobalsCPU *kg,
-                                       ShaderData *sd,
-                                       ustring name,
-                                       TypeDesc type,
-                                       bool derivatives,
-                                       void *val);
-  static bool get_object_standard_attribute(const KernelGlobalsCPU *kg,
-                                            ShaderData *sd,
-                                            ustring name,
-                                            TypeDesc type,
-                                            bool derivatives,
-                                            void *val);
-
-  static ustring u_distance;
-  static ustring u_index;
-  static ustring u_world;
-  static ustring u_camera;
-  static ustring u_screen;
-  static ustring u_raster;
-  static ustring u_ndc;
-  static ustring u_object_location;
-  static ustring u_object_color;
-  static ustring u_object_index;
-  static ustring u_geom_dupli_generated;
-  static ustring u_geom_dupli_uv;
-  static ustring u_material_index;
-  static ustring u_object_random;
-  static ustring u_particle_index;
-  static ustring u_particle_random;
-  static ustring u_particle_age;
-  static ustring u_particle_lifetime;
-  static ustring u_particle_location;
-  static ustring u_particle_rotation;
-  static ustring u_particle_size;
-  static ustring u_particle_velocity;
-  static ustring u_particle_angular_velocity;
-  static ustring u_geom_numpolyvertices;
-  static ustring u_geom_trianglevertices;
-  static ustring u_geom_polyvertices;
-  static ustring u_geom_name;
-  static ustring u_geom_undisplaced;
-  static ustring u_is_smooth;
-  static ustring u_is_curve;
-  static ustring u_curve_thickness;
-  static ustring u_curve_length;
-  static ustring u_curve_tangent_normal;
-  static ustring u_curve_random;
-  static ustring u_normal_map_normal;
-  static ustring u_path_ray_length;
-  static ustring u_path_ray_depth;
-  static ustring u_path_diffuse_depth;
-  static ustring u_path_glossy_depth;
-  static ustring u_path_transparent_depth;
-  static ustring u_path_transmission_depth;
-  static ustring u_trace;
-  static ustring u_hit;
-  static ustring u_hitdist;
-  static ustring u_N;
-  static ustring u_Ng;
-  static ustring u_P;
-  static ustring u_I;
-  static ustring u_u;
-  static ustring u_v;
-  static ustring u_empty;
-  static ustring u_at_bevel;
-  static ustring u_at_ao;
-
-  /* Texture system and texture handle map are part of the services instead of
-   * globals to be shared between different render sessions. This saves memory,
-   * and is required because texture handles are cached as part of the shared
-   * shading system. */
-  OSL::TextureSystem *texture_system;
-  OSLTextureHandleMap textures;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __OSL_SERVICES_H__ */
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
deleted file mode 100644
index 6426a09b33d..00000000000
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <OSL/oslexec.h>
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/kernel_types.h"
-
-#include "kernel/geom/geom_object.h"
-
-#include "kernel/integrator/integrator_state.h"
-
-#include "kernel/osl/osl_closures.h"
-#include "kernel/osl/osl_globals.h"
-#include "kernel/osl/osl_services.h"
-#include "kernel/osl/osl_shader.h"
-// clang-format on
-
-#include "scene/attribute.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Threads */
-
-void OSLShader::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals)
-{
-  /* no osl used? */
-  if (!osl_globals->use) {
-    kg->osl = NULL;
-    return;
-  }
-
-  /* Per thread kernel data init. */
-  kg->osl = osl_globals;
-
-  OSL::ShadingSystem *ss = kg->osl->ss;
-  OSLThreadData *tdata = new OSLThreadData();
-
-  memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
-  tdata->globals.tracedata = &tdata->tracedata;
-  tdata->globals.flipHandedness = false;
-  tdata->osl_thread_info = ss->create_thread_info();
-  tdata->context = ss->get_context(tdata->osl_thread_info);
-
-  tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
-
-  kg->osl_ss = (OSLShadingSystem *)ss;
-  kg->osl_tdata = tdata;
-}
-
-void OSLShader::thread_free(KernelGlobalsCPU *kg)
-{
-  if (!kg->osl)
-    return;
-
-  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
-  OSLThreadData *tdata = kg->osl_tdata;
-  ss->release_context(tdata->context);
-
-  ss->destroy_thread_info(tdata->osl_thread_info);
-
-  delete tdata;
-
-  kg->osl = NULL;
-  kg->osl_ss = NULL;
-  kg->osl_tdata = NULL;
-}
-
-/* Globals */
-
-static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
-                                        ShaderData *sd,
-                                        const void *state,
-                                        uint32_t path_flag,
-                                        OSLThreadData *tdata)
-{
-  OSL::ShaderGlobals *globals = &tdata->globals;
-
-  /* copy from shader data to shader globals */
-  globals->P = TO_VEC3(sd->P);
-  globals->dPdx = TO_VEC3(sd->dP.dx);
-  globals->dPdy = TO_VEC3(sd->dP.dy);
-  globals->I = TO_VEC3(sd->I);
-  globals->dIdx = TO_VEC3(sd->dI.dx);
-  globals->dIdy = TO_VEC3(sd->dI.dy);
-  globals->N = TO_VEC3(sd->N);
-  globals->Ng = TO_VEC3(sd->Ng);
-  globals->u = sd->u;
-  globals->dudx = sd->du.dx;
-  globals->dudy = sd->du.dy;
-  globals->v = sd->v;
-  globals->dvdx = sd->dv.dx;
-  globals->dvdy = sd->dv.dy;
-  globals->dPdu = TO_VEC3(sd->dPdu);
-  globals->dPdv = TO_VEC3(sd->dPdv);
-  globals->surfacearea = 1.0f;
-  globals->time = sd->time;
-
-  /* booleans */
-  globals->raytype = path_flag;
-  globals->backfacing = (sd->flag & SD_BACKFACING);
-
-  /* shader data to be used in services callbacks */
-  globals->renderstate = sd;
-
-  /* hacky, we leave it to services to fetch actual object matrix */
-  globals->shader2common = sd;
-  globals->object2common = sd;
-
-  /* must be set to NULL before execute */
-  globals->Ci = NULL;
-
-  /* clear trace data */
-  tdata->tracedata.init = false;
-
-  /* Used by render-services. */
-  sd->osl_globals = kg;
-  if (path_flag & PATH_RAY_SHADOW) {
-    sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
-  }
-  else {
-    sd->osl_path_state = (const IntegratorStateCPU *)state;
-  }
-}
-
-/* Surface */
-
-static void flatten_surface_closure_tree(ShaderData *sd,
-                                         uint32_t path_flag,
-                                         const OSL::ClosureColor *closure,
-                                         float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
-  /* OSL gives us a closure tree, we flatten it into arrays per
-   * closure type, for evaluation, sampling, etc later on. */
-
-  switch (closure->id) {
-    case OSL::ClosureColor::MUL: {
-      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-      flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
-      break;
-    }
-    case OSL::ClosureColor::ADD: {
-      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-      flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
-      flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
-      break;
-    }
-    default: {
-      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
-      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
-      if (prim) {
-#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
-        weight = weight * TO_FLOAT3(comp->w);
-#endif
-        prim->setup(sd, path_flag, weight);
-      }
-      break;
-    }
-  }
-}
-
-void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
-                             const void *state,
-                             ShaderData *sd,
-                             uint32_t path_flag)
-{
-  /* setup shader globals from shader data */
-  OSLThreadData *tdata = kg->osl_tdata;
-  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
-  /* execute shader for this point */
-  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
-  OSL::ShaderGlobals *globals = &tdata->globals;
-  OSL::ShadingContext *octx = tdata->context;
-  int shader = sd->shader & SHADER_MASK;
-
-  /* automatic bump shader */
-  if (kg->osl->bump_state[shader]) {
-    /* save state */
-    float3 P = sd->P;
-    float3 dPdx = sd->dP.dx;
-    float3 dPdy = sd->dP.dy;
-
-    /* set state as if undisplaced */
-    if (sd->flag & SD_HAS_DISPLACEMENT) {
-      float data[9];
-      bool found = kg->osl->services->get_attribute(sd,
-                                                    true,
-                                                    OSLRenderServices::u_empty,
-                                                    TypeDesc::TypeVector,
-                                                    OSLRenderServices::u_geom_undisplaced,
-                                                    data);
-      (void)found;
-      assert(found);
-
-      memcpy(&sd->P, data, sizeof(float) * 3);
-      memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3);
-      memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3);
-
-      object_position_transform(kg, sd, &sd->P);
-      object_dir_transform(kg, sd, &sd->dP.dx);
-      object_dir_transform(kg, sd, &sd->dP.dy);
-
-      globals->P = TO_VEC3(sd->P);
-      globals->dPdx = TO_VEC3(sd->dP.dx);
-      globals->dPdy = TO_VEC3(sd->dP.dy);
-    }
-
-    /* execute bump shader */
-    ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
-
-    /* reset state */
-    sd->P = P;
-    sd->dP.dx = dPdx;
-    sd->dP.dy = dPdy;
-
-    globals->P = TO_VEC3(P);
-    globals->dPdx = TO_VEC3(dPdx);
-    globals->dPdy = TO_VEC3(dPdy);
-  }
-
-  /* surface shader */
-  if (kg->osl->surface_state[shader]) {
-    ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
-  }
-
-  /* flatten closure tree */
-  if (globals->Ci)
-    flatten_surface_closure_tree(sd, path_flag, globals->Ci);
-}
-
-/* Background */
-
-static void flatten_background_closure_tree(ShaderData *sd,
-                                            const OSL::ClosureColor *closure,
-                                            float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
-  /* OSL gives us a closure tree, if we are shading for background there
-   * is only one supported closure type at the moment, which has no evaluation
-   * functions, so we just sum the weights */
-
-  switch (closure->id) {
-    case OSL::ClosureColor::MUL: {
-      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-      flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
-      break;
-    }
-    case OSL::ClosureColor::ADD: {
-      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-
-      flatten_background_closure_tree(sd, add->closureA, weight);
-      flatten_background_closure_tree(sd, add->closureB, weight);
-      break;
-    }
-    default: {
-      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
-      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
-      if (prim) {
-#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
-        weight = weight * TO_FLOAT3(comp->w);
-#endif
-        prim->setup(sd, 0, weight);
-      }
-      break;
-    }
-  }
-}
-
-void OSLShader::eval_background(const KernelGlobalsCPU *kg,
-                                const void *state,
-                                ShaderData *sd,
-                                uint32_t path_flag)
-{
-  /* setup shader globals from shader data */
-  OSLThreadData *tdata = kg->osl_tdata;
-  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
-  /* execute shader for this point */
-  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
-  OSL::ShaderGlobals *globals = &tdata->globals;
-  OSL::ShadingContext *octx = tdata->context;
-
-  if (kg->osl->background_state) {
-    ss->execute(octx, *(kg->osl->background_state), *globals);
-  }
-
-  /* return background color immediately */
-  if (globals->Ci)
-    flatten_background_closure_tree(sd, globals->Ci);
-}
-
-/* Volume */
-
-static void flatten_volume_closure_tree(ShaderData *sd,
-                                        const OSL::ClosureColor *closure,
-                                        float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
-  /* OSL gives us a closure tree, we flatten it into arrays per
-   * closure type, for evaluation, sampling, etc later on. */
-
-  switch (closure->id) {
-    case OSL::ClosureColor::MUL: {
-      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
-      flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
-      break;
-    }
-    case OSL::ClosureColor::ADD: {
-      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-      flatten_volume_closure_tree(sd, add->closureA, weight);
-      flatten_volume_closure_tree(sd, add->closureB, weight);
-      break;
-    }
-    default: {
-      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
-      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
-      if (prim) {
-#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
-        weight = weight * TO_FLOAT3(comp->w);
-#endif
-        prim->setup(sd, 0, weight);
-      }
-    }
-  }
-}
-
-void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
-                            const void *state,
-                            ShaderData *sd,
-                            uint32_t path_flag)
-{
-  /* setup shader globals from shader data */
-  OSLThreadData *tdata = kg->osl_tdata;
-  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
-  /* execute shader */
-  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
-  OSL::ShaderGlobals *globals = &tdata->globals;
-  OSL::ShadingContext *octx = tdata->context;
-  int shader = sd->shader & SHADER_MASK;
-
-  if (kg->osl->volume_state[shader]) {
-    ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
-  }
-
-  /* flatten closure tree */
-  if (globals->Ci)
-    flatten_volume_closure_tree(sd, globals->Ci);
-}
-
-/* Displacement */
-
-void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd)
-{
-  /* setup shader globals from shader data */
-  OSLThreadData *tdata = kg->osl_tdata;
-
-  shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
-
-  /* execute shader */
-  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
-  OSL::ShaderGlobals *globals = &tdata->globals;
-  OSL::ShadingContext *octx = tdata->context;
-  int shader = sd->shader & SHADER_MASK;
-
-  if (kg->osl->displacement_state[shader]) {
-    ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
-  }
-
-  /* get back position */
-  sd->P = TO_FLOAT3(globals->P);
-}
-
-/* Attributes */
-
-int OSLShader::find_attribute(const KernelGlobalsCPU *kg,
-                              const ShaderData *sd,
-                              uint id,
-                              AttributeDescriptor *desc)
-{
-  /* for OSL, a hash map is used to lookup the attribute by name. */
-  int object = sd->object * ATTR_PRIM_TYPES;
-
-  OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
-  ustring stdname(std::string("geom:") +
-                  std::string(Attribute::standard_name((AttributeStandard)id)));
-  OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
-
-  if (it != attr_map.end()) {
-    const OSLGlobals::Attribute &osl_attr = it->second;
-    *desc = osl_attr.desc;
-
-    if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
-      desc->offset = ATTR_STD_NOT_FOUND;
-      return ATTR_STD_NOT_FOUND;
-    }
-
-    /* return result */
-    if (osl_attr.desc.element == ATTR_ELEMENT_NONE) {
-      desc->offset = ATTR_STD_NOT_FOUND;
-    }
-    return desc->offset;
-  }
-  else {
-    desc->offset = ATTR_STD_NOT_FOUND;
-    return (int)ATTR_STD_NOT_FOUND;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h
deleted file mode 100644
index 037a18a1f19..00000000000
--- a/intern/cycles/kernel/osl/osl_shader.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __OSL_SHADER_H__
-#define __OSL_SHADER_H__
-
-#ifdef WITH_OSL
-
-/* OSL Shader Engine
- *
- * Holds all variables to execute and use OSL shaders from the kernel. These
- * are initialized externally by OSLShaderManager before rendering starts.
- *
- * Before/after a thread starts rendering, thread_init/thread_free must be
- * called, which will store any per thread OSL state in thread local storage.
- * This means no thread state must be passed along in the kernel itself.
- */
-
-#  include "kernel/kernel_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Scene;
-
-struct ShaderClosure;
-struct ShaderData;
-struct IntegratorStateCPU;
-struct differential3;
-struct KernelGlobalsCPU;
-
-struct OSLGlobals;
-struct OSLShadingSystem;
-
-class OSLShader {
- public:
-  /* init */
-  static void register_closures(OSLShadingSystem *ss);
-
-  /* per thread data */
-  static void thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals);
-  static void thread_free(KernelGlobalsCPU *kg);
-
-  /* eval */
-  static void eval_surface(const KernelGlobalsCPU *kg,
-                           const void *state,
-                           ShaderData *sd,
-                           uint32_t path_flag);
-  static void eval_background(const KernelGlobalsCPU *kg,
-                              const void *state,
-                              ShaderData *sd,
-                              uint32_t path_flag);
-  static void eval_volume(const KernelGlobalsCPU *kg,
-                          const void *state,
-                          ShaderData *sd,
-                          uint32_t path_flag);
-  static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd);
-
-  /* attributes */
-  static int find_attribute(const KernelGlobalsCPU *kg,
-                            const ShaderData *sd,
-                            uint id,
-                            AttributeDescriptor *desc);
-};
-
-CCL_NAMESPACE_END
-
-#endif
-
-#endif /* __OSL_SHADER_H__ */
diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp
new file mode 100644
index 00000000000..ca0a5a068b3
--- /dev/null
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -0,0 +1,1724 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* TODO(sergey): There is a bit of headers dependency hell going on
+ * here, so for now we just put here. In the future it might be better
+ * to have dedicated file for such tweaks.
+ */
+#if (defined(__GNUC__) && !defined(__clang__)) && defined(NDEBUG)
+#  pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#  pragma GCC diagnostic ignored "-Wuninitialized"
+#endif
+
+#include <string.h>
+
+#include "scene/colorspace.h"
+#include "scene/mesh.h"
+#include "scene/object.h"
+#include "scene/scene.h"
+
+#include "kernel/osl/closures.h"
+#include "kernel/osl/globals.h"
+#include "kernel/osl/services.h"
+#include "kernel/osl/shader.h"
+
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/string.h"
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+#include "kernel/device/cpu/image.h"
+
+#include "kernel/util/differential.h"
+
+#include "kernel/integrator/state.h"
+#include "kernel/integrator/state_flow.h"
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/bvh/bvh.h"
+
+#include "kernel/camera/camera.h"
+#include "kernel/camera/projection.h"
+
+#include "kernel/integrator/path_state.h"
+#include "kernel/integrator/shader_eval.h"
+
+#include "kernel/util/color.h"
+// clang-format on
+
+CCL_NAMESPACE_BEGIN
+
+/* RenderServices implementation */
+
+static void copy_matrix(OSL::Matrix44 &m, const Transform &tfm)
+{
+  ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
+  memcpy((void *)&m, &t, sizeof(m));
+}
+
+static void copy_matrix(OSL::Matrix44 &m, const ProjectionTransform &tfm)
+{
+  ProjectionTransform t = projection_transpose(tfm);
+  memcpy((void *)&m, &t, sizeof(m));
+}
+
+/* static ustrings */
+ustring OSLRenderServices::u_distance("distance");
+ustring OSLRenderServices::u_index("index");
+ustring OSLRenderServices::u_world("world");
+ustring OSLRenderServices::u_camera("camera");
+ustring OSLRenderServices::u_screen("screen");
+ustring OSLRenderServices::u_raster("raster");
+ustring OSLRenderServices::u_ndc("NDC");
+ustring OSLRenderServices::u_object_location("object:location");
+ustring OSLRenderServices::u_object_color("object:color");
+ustring OSLRenderServices::u_object_index("object:index");
+ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated");
+ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv");
+ustring OSLRenderServices::u_material_index("material:index");
+ustring OSLRenderServices::u_object_random("object:random");
+ustring OSLRenderServices::u_particle_index("particle:index");
+ustring OSLRenderServices::u_particle_random("particle:random");
+ustring OSLRenderServices::u_particle_age("particle:age");
+ustring OSLRenderServices::u_particle_lifetime("particle:lifetime");
+ustring OSLRenderServices::u_particle_location("particle:location");
+ustring OSLRenderServices::u_particle_rotation("particle:rotation");
+ustring OSLRenderServices::u_particle_size("particle:size");
+ustring OSLRenderServices::u_particle_velocity("particle:velocity");
+ustring OSLRenderServices::u_particle_angular_velocity("particle:angular_velocity");
+ustring OSLRenderServices::u_geom_numpolyvertices("geom:numpolyvertices");
+ustring OSLRenderServices::u_geom_trianglevertices("geom:trianglevertices");
+ustring OSLRenderServices::u_geom_polyvertices("geom:polyvertices");
+ustring OSLRenderServices::u_geom_name("geom:name");
+ustring OSLRenderServices::u_geom_undisplaced("geom:undisplaced");
+ustring OSLRenderServices::u_is_smooth("geom:is_smooth");
+ustring OSLRenderServices::u_is_curve("geom:is_curve");
+ustring OSLRenderServices::u_curve_thickness("geom:curve_thickness");
+ustring OSLRenderServices::u_curve_length("geom:curve_length");
+ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
+ustring OSLRenderServices::u_curve_random("geom:curve_random");
+ustring OSLRenderServices::u_normal_map_normal("geom:normal_map_normal");
+ustring OSLRenderServices::u_path_ray_length("path:ray_length");
+ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
+ustring OSLRenderServices::u_path_diffuse_depth("path:diffuse_depth");
+ustring OSLRenderServices::u_path_glossy_depth("path:glossy_depth");
+ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth");
+ustring OSLRenderServices::u_path_transmission_depth("path:transmission_depth");
+ustring OSLRenderServices::u_trace("trace");
+ustring OSLRenderServices::u_hit("hit");
+ustring OSLRenderServices::u_hitdist("hitdist");
+ustring OSLRenderServices::u_N("N");
+ustring OSLRenderServices::u_Ng("Ng");
+ustring OSLRenderServices::u_P("P");
+ustring OSLRenderServices::u_I("I");
+ustring OSLRenderServices::u_u("u");
+ustring OSLRenderServices::u_v("v");
+ustring OSLRenderServices::u_empty;
+
+OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system)
+    : texture_system(texture_system)
+{
+}
+
+OSLRenderServices::~OSLRenderServices()
+{
+  if (texture_system) {
+    VLOG(2) << "OSL texture system stats:\n" << texture_system->getstats();
+  }
+}
+
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+                                   OSL::Matrix44 &result,
+                                   OSL::TransformationPtr xform,
+                                   float time)
+{
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
+    int object = sd->object;
+
+    if (object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+      Transform tfm;
+
+      if (time == sd->time)
+        tfm = object_get_transform(kg, sd);
+      else
+        tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
+#else
+      const Transform tfm = object_get_transform(kg, sd);
+#endif
+      copy_matrix(result, tfm);
+
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      const Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
+      copy_matrix(result, tfm);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           OSL::TransformationPtr xform,
+                                           float time)
+{
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
+    int object = sd->object;
+
+    if (object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+      Transform itfm;
+
+      if (time == sd->time)
+        itfm = object_get_inverse_transform(kg, sd);
+      else
+        object_fetch_transform_motion_test(kg, object, time, &itfm);
+#else
+      const Transform itfm = object_get_inverse_transform(kg, sd);
+#endif
+      copy_matrix(result, itfm);
+
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      const Transform itfm = lamp_fetch_transform(kg, sd->lamp, true);
+      copy_matrix(result, itfm);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+                                   OSL::Matrix44 &result,
+                                   ustring from,
+                                   float time)
+{
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  const KernelGlobalsCPU *kg = sd->osl_globals;
+
+  if (from == u_ndc) {
+    copy_matrix(result, kernel_data.cam.ndctoworld);
+    return true;
+  }
+  else if (from == u_raster) {
+    copy_matrix(result, kernel_data.cam.rastertoworld);
+    return true;
+  }
+  else if (from == u_screen) {
+    copy_matrix(result, kernel_data.cam.screentoworld);
+    return true;
+  }
+  else if (from == u_camera) {
+    copy_matrix(result, kernel_data.cam.cameratoworld);
+    return true;
+  }
+  else if (from == u_world) {
+    result.makeIdentity();
+    return true;
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           ustring to,
+                                           float time)
+{
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  const KernelGlobalsCPU *kg = sd->osl_globals;
+
+  if (to == u_ndc) {
+    copy_matrix(result, kernel_data.cam.worldtondc);
+    return true;
+  }
+  else if (to == u_raster) {
+    copy_matrix(result, kernel_data.cam.worldtoraster);
+    return true;
+  }
+  else if (to == u_screen) {
+    copy_matrix(result, kernel_data.cam.worldtoscreen);
+    return true;
+  }
+  else if (to == u_camera) {
+    copy_matrix(result, kernel_data.cam.worldtocamera);
+    return true;
+  }
+  else if (to == u_world) {
+    result.makeIdentity();
+    return true;
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
+                                   OSL::Matrix44 &result,
+                                   OSL::TransformationPtr xform)
+{
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
+    int object = sd->object;
+
+    if (object != OBJECT_NONE) {
+      const Transform tfm = object_get_transform(kg, sd);
+      copy_matrix(result, tfm);
+
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      const Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
+      copy_matrix(result, tfm);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           OSL::TransformationPtr xform)
+{
+  /* this is only used for shader and object space, we don't really have
+   * a concept of shader space, so we just use object space for both. */
+  if (xform) {
+    const ShaderData *sd = (const ShaderData *)xform;
+    const KernelGlobalsCPU *kg = sd->osl_globals;
+    int object = sd->object;
+
+    if (object != OBJECT_NONE) {
+      const Transform tfm = object_get_inverse_transform(kg, sd);
+      copy_matrix(result, tfm);
+
+      return true;
+    }
+    else if (sd->type == PRIMITIVE_LAMP) {
+      const Transform itfm = lamp_fetch_transform(kg, sd->lamp, true);
+      copy_matrix(result, itfm);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
+{
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  const KernelGlobalsCPU *kg = sd->osl_globals;
+
+  if (from == u_ndc) {
+    copy_matrix(result, kernel_data.cam.ndctoworld);
+    return true;
+  }
+  else if (from == u_raster) {
+    copy_matrix(result, kernel_data.cam.rastertoworld);
+    return true;
+  }
+  else if (from == u_screen) {
+    copy_matrix(result, kernel_data.cam.screentoworld);
+    return true;
+  }
+  else if (from == u_camera) {
+    copy_matrix(result, kernel_data.cam.cameratoworld);
+    return true;
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
+                                           OSL::Matrix44 &result,
+                                           ustring to)
+{
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  const KernelGlobalsCPU *kg = sd->osl_globals;
+
+  if (to == u_ndc) {
+    copy_matrix(result, kernel_data.cam.worldtondc);
+    return true;
+  }
+  else if (to == u_raster) {
+    copy_matrix(result, kernel_data.cam.worldtoraster);
+    return true;
+  }
+  else if (to == u_screen) {
+    copy_matrix(result, kernel_data.cam.worldtoscreen);
+    return true;
+  }
+  else if (to == u_camera) {
+    copy_matrix(result, kernel_data.cam.worldtocamera);
+    return true;
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg,
+                                            bool derivatives,
+                                            ustring object,
+                                            TypeDesc type,
+                                            ustring name,
+                                            int index,
+                                            void *val)
+{
+  return false;
+}
+
+static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, void *val)
+{
+  if (type == TypeFloatArray4) {
+    float *fval = (float *)val;
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = 0.0f;
+    fval[3] = 1.0f;
+
+    if (derivatives) {
+      fval[4] = f[1].x;
+      fval[5] = f[1].y;
+      fval[6] = 0.0f;
+      fval[7] = 0.0f;
+
+      fval[8] = f[2].x;
+      fval[9] = f[2].y;
+      fval[10] = 0.0f;
+      fval[11] = 0.0f;
+    }
+    return true;
+  }
+  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    float *fval = (float *)val;
+
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = 0.0f;
+
+    if (derivatives) {
+      fval[3] = f[1].x;
+      fval[4] = f[1].y;
+      fval[5] = 0.0f;
+
+      fval[6] = f[2].x;
+      fval[7] = f[2].y;
+      fval[8] = 0.0f;
+    }
+
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    float *fval = (float *)val;
+    fval[0] = average(f[0]);
+
+    if (derivatives) {
+      fval[1] = average(f[1]);
+      fval[2] = average(f[2]);
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool set_attribute_float2(float2 f, TypeDesc type, bool derivatives, void *val)
+{
+  float2 fv[3];
+
+  fv[0] = f;
+  fv[1] = make_float2(0.0f, 0.0f);
+  fv[2] = make_float2(0.0f, 0.0f);
+
+  return set_attribute_float2(fv, type, derivatives, val);
+}
+
+static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
+{
+  if (type == TypeFloatArray4) {
+    float *fval = (float *)val;
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = f[0].z;
+    fval[3] = 1.0f;
+
+    if (derivatives) {
+      fval[4] = f[1].x;
+      fval[5] = f[1].y;
+      fval[6] = f[1].z;
+      fval[7] = 0.0f;
+
+      fval[8] = f[2].x;
+      fval[9] = f[2].y;
+      fval[10] = f[2].z;
+      fval[11] = 0.0f;
+    }
+    return true;
+  }
+  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    float *fval = (float *)val;
+
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = f[0].z;
+
+    if (derivatives) {
+      fval[3] = f[1].x;
+      fval[4] = f[1].y;
+      fval[5] = f[1].z;
+
+      fval[6] = f[2].x;
+      fval[7] = f[2].y;
+      fval[8] = f[2].z;
+    }
+
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    float *fval = (float *)val;
+    fval[0] = average(f[0]);
+
+    if (derivatives) {
+      fval[1] = average(f[1]);
+      fval[2] = average(f[2]);
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void *val)
+{
+  float3 fv[3];
+
+  fv[0] = f;
+  fv[1] = make_float3(0.0f, 0.0f, 0.0f);
+  fv[2] = make_float3(0.0f, 0.0f, 0.0f);
+
+  return set_attribute_float3(fv, type, derivatives, val);
+}
+
+/* Attributes with the TypeRGBA type descriptor should be retrieved and stored
+ * in a float array of size 4 (e.g. node_vertex_color.osl), this array have
+ * a type descriptor TypeFloatArray4. If the storage is not a TypeFloatArray4,
+ * we either store the first three components in a vector, store the average of
+ * the components in a float, or fail the retrieval and do nothing. We allow
+ * this for the correct operation of the Attribute node.
+ */
+
+static bool set_attribute_float4(float4 f[3], TypeDesc type, bool derivatives, void *val)
+{
+  float *fval = (float *)val;
+  if (type == TypeFloatArray4) {
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = f[0].z;
+    fval[3] = f[0].w;
+
+    if (derivatives) {
+      fval[4] = f[1].x;
+      fval[5] = f[1].y;
+      fval[6] = f[1].z;
+      fval[7] = f[1].w;
+
+      fval[8] = f[2].x;
+      fval[9] = f[2].y;
+      fval[10] = f[2].z;
+      fval[11] = f[2].w;
+    }
+    return true;
+  }
+  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    fval[0] = f[0].x;
+    fval[1] = f[0].y;
+    fval[2] = f[0].z;
+
+    if (derivatives) {
+      fval[3] = f[1].x;
+      fval[4] = f[1].y;
+      fval[5] = f[1].z;
+
+      fval[6] = f[2].x;
+      fval[7] = f[2].y;
+      fval[8] = f[2].z;
+    }
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    fval[0] = average(float4_to_float3(f[0]));
+
+    if (derivatives) {
+      fval[1] = average(float4_to_float3(f[1]));
+      fval[2] = average(float4_to_float3(f[2]));
+    }
+    return true;
+  }
+  return false;
+}
+
+static bool set_attribute_float4(float4 f, TypeDesc type, bool derivatives, void *val)
+{
+  float4 fv[3];
+
+  fv[0] = f;
+  fv[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  fv[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  return set_attribute_float4(fv, type, derivatives, val);
+}
+
+static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
+{
+  if (type == TypeFloatArray4) {
+    float *fval = (float *)val;
+    fval[0] = f[0];
+    fval[1] = f[0];
+    fval[2] = f[0];
+    fval[3] = 1.0f;
+
+    if (derivatives) {
+      fval[4] = f[1];
+      fval[5] = f[1];
+      fval[6] = f[1];
+      fval[7] = 0.0f;
+
+      fval[8] = f[2];
+      fval[9] = f[2];
+      fval[10] = f[2];
+      fval[11] = 0.0f;
+    }
+    return true;
+  }
+  else if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
+           type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) {
+    float *fval = (float *)val;
+    fval[0] = f[0];
+    fval[1] = f[0];
+    fval[2] = f[0];
+
+    if (derivatives) {
+      fval[3] = f[1];
+      fval[4] = f[1];
+      fval[5] = f[1];
+
+      fval[6] = f[2];
+      fval[7] = f[2];
+      fval[8] = f[2];
+    }
+
+    return true;
+  }
+  else if (type == TypeDesc::TypeFloat) {
+    float *fval = (float *)val;
+    fval[0] = f[0];
+
+    if (derivatives) {
+      fval[1] = f[1];
+      fval[2] = f[2];
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool set_attribute_float(float f, TypeDesc type, bool derivatives, void *val)
+{
+  float fv[3];
+
+  fv[0] = f;
+  fv[1] = 0.0f;
+  fv[2] = 0.0f;
+
+  return set_attribute_float(fv, type, derivatives, val);
+}
+
+static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
+{
+  if (type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
+    int *ival = (int *)val;
+    ival[0] = i;
+
+    if (derivatives) {
+      ival[1] = 0;
+      ival[2] = 0;
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
+{
+  if (type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR &&
+      type.arraylen == 0) {
+    ustring *sval = (ustring *)val;
+    sval[0] = str;
+
+    if (derivatives) {
+      sval[1] = OSLRenderServices::u_empty;
+      sval[2] = OSLRenderServices::u_empty;
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, void *val)
+{
+  if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
+    float *fval = (float *)val;
+
+    fval[0] = P[0].x;
+    fval[1] = P[0].y;
+    fval[2] = P[0].z;
+
+    fval[3] = P[1].x;
+    fval[4] = P[1].y;
+    fval[5] = P[1].z;
+
+    fval[6] = P[2].x;
+    fval[7] = P[2].y;
+    fval[8] = P[2].z;
+
+    if (type.arraylen > 3)
+      memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3));
+    if (derivatives)
+      memset(fval + type.arraylen * 3, 0, sizeof(float) * 2 * 3 * type.arraylen);
+
+    return true;
+  }
+
+  return false;
+}
+
+static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val)
+{
+  if (type == TypeDesc::TypeMatrix) {
+    copy_matrix(*(OSL::Matrix44 *)val, tfm);
+    return true;
+  }
+
+  return false;
+}
+
+static bool get_primitive_attribute(const KernelGlobalsCPU *kg,
+                                    const ShaderData *sd,
+                                    const OSLGlobals::Attribute &attr,
+                                    const TypeDesc &type,
+                                    bool derivatives,
+                                    void *val)
+{
+  if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
+      attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
+    float3 fval[3];
+    if (primitive_is_volume_attribute(sd, attr.desc)) {
+      fval[0] = primitive_volume_attribute_float3(kg, sd, attr.desc);
+    }
+    else {
+      memset(fval, 0, sizeof(fval));
+      fval[0] = primitive_surface_attribute_float3(
+          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+    }
+    return set_attribute_float3(fval, type, derivatives, val);
+  }
+  else if (attr.type == TypeFloat2) {
+    if (primitive_is_volume_attribute(sd, attr.desc)) {
+      assert(!"Float2 attribute not support for volumes");
+      return false;
+    }
+    else {
+      float2 fval[3];
+      fval[0] = primitive_surface_attribute_float2(
+          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+      return set_attribute_float2(fval, type, derivatives, val);
+    }
+  }
+  else if (attr.type == TypeDesc::TypeFloat) {
+    float fval[3];
+    if (primitive_is_volume_attribute(sd, attr.desc)) {
+      memset(fval, 0, sizeof(fval));
+      fval[0] = primitive_volume_attribute_float(kg, sd, attr.desc);
+    }
+    else {
+      fval[0] = primitive_surface_attribute_float(
+          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+    }
+    return set_attribute_float(fval, type, derivatives, val);
+  }
+  else if (attr.type == TypeDesc::TypeFloat4 || attr.type == TypeRGBA) {
+    float4 fval[3];
+    if (primitive_is_volume_attribute(sd, attr.desc)) {
+      memset(fval, 0, sizeof(fval));
+      fval[0] = primitive_volume_attribute_float4(kg, sd, attr.desc);
+    }
+    else {
+      fval[0] = primitive_surface_attribute_float4(
+          kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+    }
+    return set_attribute_float4(fval, type, derivatives, val);
+  }
+  else {
+    return false;
+  }
+}
+
+static bool get_mesh_attribute(const KernelGlobalsCPU *kg,
+                               const ShaderData *sd,
+                               const OSLGlobals::Attribute &attr,
+                               const TypeDesc &type,
+                               bool derivatives,
+                               void *val)
+{
+  if (attr.type == TypeDesc::TypeMatrix) {
+    Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
+    return set_attribute_matrix(tfm, type, val);
+  }
+  else {
+    return false;
+  }
+}
+
+static bool get_object_attribute(const OSLGlobals::Attribute &attr,
+                                 TypeDesc type,
+                                 bool derivatives,
+                                 void *val)
+{
+  if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
+      attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
+    return set_attribute_float3(*(float3 *)attr.value.data(), type, derivatives, val);
+  }
+  else if (attr.type == TypeFloat2) {
+    return set_attribute_float2(*(float2 *)attr.value.data(), type, derivatives, val);
+  }
+  else if (attr.type == TypeDesc::TypeFloat) {
+    return set_attribute_float(*(float *)attr.value.data(), type, derivatives, val);
+  }
+  else if (attr.type == TypeRGBA || attr.type == TypeDesc::TypeFloat4) {
+    return set_attribute_float4(*(float4 *)attr.value.data(), type, derivatives, val);
+  }
+  else if (attr.type == type) {
+    size_t datasize = attr.value.datasize();
+
+    memcpy(val, attr.value.data(), datasize);
+    if (derivatives) {
+      memset((char *)val + datasize, 0, datasize * 2);
+    }
+
+    return true;
+  }
+  else {
+    return false;
+  }
+}
+
+bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg,
+                                                      ShaderData *sd,
+                                                      ustring name,
+                                                      TypeDesc type,
+                                                      bool derivatives,
+                                                      void *val)
+{
+  /* todo: turn this into hash table? */
+
+  /* Object Attributes */
+  if (name == u_object_location) {
+    float3 f = object_location(kg, sd);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_object_color) {
+    float3 f = object_color(kg, sd->object);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_object_index) {
+    float f = object_pass_id(kg, sd->object);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_geom_dupli_generated) {
+    float3 f = object_dupli_generated(kg, sd->object);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_geom_dupli_uv) {
+    float3 f = object_dupli_uv(kg, sd->object);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_material_index) {
+    float f = shader_pass_id(kg, sd);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_object_random) {
+    float f = object_random_number(kg, sd->object);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+
+  /* Particle Attributes */
+  else if (name == u_particle_index) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_index(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_random) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = hash_uint2_to_float(particle_index(kg, particle_id), 0);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+
+  else if (name == u_particle_age) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_age(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_lifetime) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_lifetime(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_location) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float3 f = particle_location(kg, particle_id);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+#if 0 /* unsupported */
+  else if (name == u_particle_rotation) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float4 f = particle_rotation(kg, particle_id);
+    return set_attribute_float4(f, type, derivatives, val);
+  }
+#endif
+  else if (name == u_particle_size) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float f = particle_size(kg, particle_id);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_particle_velocity) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float3 f = particle_velocity(kg, particle_id);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_particle_angular_velocity) {
+    int particle_id = object_particle_id(kg, sd->object);
+    float3 f = particle_angular_velocity(kg, particle_id);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+
+  /* Geometry Attributes */
+  else if (name == u_geom_numpolyvertices) {
+    return set_attribute_int(3, type, derivatives, val);
+  }
+  else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) &&
+           sd->type & PRIMITIVE_ALL_TRIANGLE) {
+    float3 P[3];
+
+    if (sd->type & PRIMITIVE_TRIANGLE)
+      triangle_vertices(kg, sd->prim, P);
+    else
+      motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
+
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_position_transform(kg, sd, &P[0]);
+      object_position_transform(kg, sd, &P[1]);
+      object_position_transform(kg, sd, &P[2]);
+    }
+
+    return set_attribute_float3_3(P, type, derivatives, val);
+  }
+  else if (name == u_geom_name) {
+    ustring object_name = kg->osl->object_names[sd->object];
+    return set_attribute_string(object_name, type, derivatives, val);
+  }
+  else if (name == u_is_smooth) {
+    float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  /* Hair Attributes */
+  else if (name == u_is_curve) {
+    float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_curve_thickness) {
+    float f = curve_thickness(kg, sd);
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_curve_tangent_normal) {
+    float3 f = curve_tangent_normal(kg, sd);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
+  else if (name == u_normal_map_normal) {
+    if (sd->type & PRIMITIVE_ALL_TRIANGLE) {
+      float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
+      return set_attribute_float3(f, type, derivatives, val);
+    }
+    else {
+      return false;
+    }
+  }
+  else {
+    return false;
+  }
+}
+
+bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
+                                                 ShaderData *sd,
+                                                 ustring name,
+                                                 TypeDesc type,
+                                                 bool derivatives,
+                                                 void *val)
+{
+  if (name == u_path_ray_length) {
+    /* Ray Length */
+    float f = sd->ray_length;
+    return set_attribute_float(f, type, derivatives, val);
+  }
+  else if (name == u_path_ray_depth) {
+    /* Ray Depth */
+    const IntegratorStateCPU *state = sd->osl_path_state;
+    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
+    int f = (state) ? state->path.bounce : (shadow_state) ? shadow_state->shadow_path.bounce : 0;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_diffuse_depth) {
+    /* Diffuse Ray Depth */
+    const IntegratorStateCPU *state = sd->osl_path_state;
+    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
+    int f = (state)        ? state->path.diffuse_bounce :
+            (shadow_state) ? shadow_state->shadow_path.diffuse_bounce :
+                             0;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_glossy_depth) {
+    /* Glossy Ray Depth */
+    const IntegratorStateCPU *state = sd->osl_path_state;
+    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
+    int f = (state)        ? state->path.glossy_bounce :
+            (shadow_state) ? shadow_state->shadow_path.glossy_bounce :
+                             0;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_transmission_depth) {
+    /* Transmission Ray Depth */
+    const IntegratorStateCPU *state = sd->osl_path_state;
+    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
+    int f = (state)        ? state->path.transmission_bounce :
+            (shadow_state) ? shadow_state->shadow_path.transmission_bounce :
+                             0;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_path_transparent_depth) {
+    /* Transparent Ray Depth */
+    const IntegratorStateCPU *state = sd->osl_path_state;
+    const IntegratorShadowStateCPU *shadow_state = sd->osl_shadow_path_state;
+    int f = (state)        ? state->path.transparent_bounce :
+            (shadow_state) ? shadow_state->shadow_path.transparent_bounce :
+                             0;
+    return set_attribute_int(f, type, derivatives, val);
+  }
+  else if (name == u_ndc) {
+    /* NDC coordinates with special exception for orthographic projection. */
+    OSLThreadData *tdata = kg->osl_tdata;
+    OSL::ShaderGlobals *globals = &tdata->globals;
+    float3 ndc[3];
+
+    if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+        kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+      ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
+
+      if (derivatives) {
+        ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f)) -
+                 ndc[0];
+        ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f)) -
+                 ndc[0];
+      }
+    }
+    else {
+      ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
+
+      if (derivatives) {
+        ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
+        ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
+      }
+    }
+
+    return set_attribute_float3(ndc, type, derivatives, val);
+  }
+  else
+    return false;
+}
+
+bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg,
+                                      bool derivatives,
+                                      ustring object_name,
+                                      TypeDesc type,
+                                      ustring name,
+                                      void *val)
+{
+  if (sg == NULL || sg->renderstate == NULL)
+    return false;
+
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  return get_attribute(sd, derivatives, object_name, type, name, val);
+}
+
+bool OSLRenderServices::get_attribute(
+    ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val)
+{
+  const KernelGlobalsCPU *kg = sd->osl_globals;
+  int prim_type = 0;
+  int object;
+
+  /* lookup of attribute on another object */
+  if (object_name != u_empty) {
+    OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
+
+    if (it == kg->osl->object_name_map.end())
+      return false;
+
+    object = it->second;
+  }
+  else {
+    object = sd->object;
+    prim_type = attribute_primitive_type(kg, sd);
+
+    if (object == OBJECT_NONE)
+      return get_background_attribute(kg, sd, name, type, derivatives, val);
+  }
+
+  /* find attribute on object */
+  object = object * ATTR_PRIM_TYPES + prim_type;
+  OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object];
+  OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
+
+  if (it != attribute_map.end()) {
+    const OSLGlobals::Attribute &attr = it->second;
+
+    if (attr.desc.element != ATTR_ELEMENT_OBJECT) {
+      /* triangle and vertex attributes */
+      if (get_primitive_attribute(kg, sd, attr, type, derivatives, val))
+        return true;
+      else
+        return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
+    }
+    else {
+      /* object attribute */
+      return get_object_attribute(attr, type, derivatives, val);
+    }
+  }
+  else {
+    /* not found in attribute, check standard object info */
+    bool is_std_object_attribute = get_object_standard_attribute(
+        kg, sd, name, type, derivatives, val);
+
+    if (is_std_object_attribute)
+      return true;
+
+    return get_background_attribute(kg, sd, name, type, derivatives, val);
+  }
+
+  return false;
+}
+
+bool OSLRenderServices::get_userdata(
+    bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val)
+{
+  return false; /* disabled by lockgeom */
+}
+
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename,
+                                                                    OSL::ShadingContext *)
+#else
+
+TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename)
+#endif
+{
+  OSLTextureHandleMap::iterator it = textures.find(filename);
+
+  /* For non-OIIO textures, just return a pointer to our own OSLTextureHandle. */
+  if (it != textures.end()) {
+    if (it->second->type != OSLTextureHandle::OIIO) {
+      return (TextureSystem::TextureHandle *)it->second.get();
+    }
+  }
+
+  /* Get handle from OpenImageIO. */
+  OSL::TextureSystem *ts = texture_system;
+  TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
+  if (handle == NULL) {
+    return NULL;
+  }
+
+  /* Insert new OSLTextureHandle if needed. */
+  if (it == textures.end()) {
+    textures.insert(filename, new OSLTextureHandle(OSLTextureHandle::OIIO));
+    it = textures.find(filename);
+  }
+
+  /* Assign OIIO texture handle and return. */
+  it->second->oiio_handle = handle;
+  return (TextureSystem::TextureHandle *)it->second.get();
+}
+
+bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
+{
+  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+
+  if (handle->oiio_handle) {
+    OSL::TextureSystem *ts = texture_system;
+    return ts->good(handle->oiio_handle);
+  }
+  else {
+    return true;
+  }
+}
+
+bool OSLRenderServices::texture(ustring filename,
+                                TextureHandle *texture_handle,
+                                TexturePerthread *texture_thread_info,
+                                TextureOpt &options,
+                                OSL::ShaderGlobals *sg,
+                                float s,
+                                float t,
+                                float dsdx,
+                                float dtdx,
+                                float dsdy,
+                                float dtdy,
+                                int nchannels,
+                                float *result,
+                                float *dresultds,
+                                float *dresultdt,
+                                ustring *errormessage)
+{
+  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+  OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+  KernelGlobals kernel_globals = sd->osl_globals;
+  bool status = false;
+
+  switch (texture_type) {
+    case OSLTextureHandle::BEVEL: {
+      /* Bevel shader hack. */
+      if (nchannels >= 3) {
+        const IntegratorStateCPU *state = sd->osl_path_state;
+        if (state) {
+          int num_samples = (int)s;
+          float radius = t;
+          float3 N = svm_bevel(kernel_globals, state, sd, radius, num_samples);
+          result[0] = N.x;
+          result[1] = N.y;
+          result[2] = N.z;
+          status = true;
+        }
+      }
+      break;
+    }
+    case OSLTextureHandle::AO: {
+      /* AO shader hack. */
+      const IntegratorStateCPU *state = sd->osl_path_state;
+      if (state) {
+        int num_samples = (int)s;
+        float radius = t;
+        float3 N = make_float3(dsdx, dtdx, dsdy);
+        int flags = 0;
+        if ((int)dtdy) {
+          flags |= NODE_AO_INSIDE;
+        }
+        if ((int)options.sblur) {
+          flags |= NODE_AO_ONLY_LOCAL;
+        }
+        if ((int)options.tblur) {
+          flags |= NODE_AO_GLOBAL_RADIUS;
+        }
+        result[0] = svm_ao(kernel_globals, state, sd, N, radius, num_samples, flags);
+        status = true;
+      }
+      break;
+    }
+    case OSLTextureHandle::SVM: {
+      /* Packed texture. */
+      float4 rgba = kernel_tex_image_interp(kernel_globals, handle->svm_slot, s, 1.0f - t);
+
+      result[0] = rgba[0];
+      if (nchannels > 1)
+        result[1] = rgba[1];
+      if (nchannels > 2)
+        result[2] = rgba[2];
+      if (nchannels > 3)
+        result[3] = rgba[3];
+      status = true;
+      break;
+    }
+    case OSLTextureHandle::IES: {
+      /* IES light. */
+      result[0] = kernel_ies_interp(kernel_globals, handle->svm_slot, s, t);
+      status = true;
+      break;
+    }
+    case OSLTextureHandle::OIIO: {
+      /* OpenImageIO texture cache. */
+      OSL::TextureSystem *ts = texture_system;
+
+      if (handle && handle->oiio_handle) {
+        if (texture_thread_info == NULL) {
+          OSLThreadData *tdata = kernel_globals->osl_tdata;
+          texture_thread_info = tdata->oiio_thread_info;
+        }
+
+        status = ts->texture(handle->oiio_handle,
+                             texture_thread_info,
+                             options,
+                             s,
+                             t,
+                             dsdx,
+                             dtdx,
+                             dsdy,
+                             dtdy,
+                             nchannels,
+                             result,
+                             dresultds,
+                             dresultdt);
+      }
+      else {
+        status = ts->texture(filename,
+                             options,
+                             s,
+                             t,
+                             dsdx,
+                             dtdx,
+                             dsdy,
+                             dtdy,
+                             nchannels,
+                             result,
+                             dresultds,
+                             dresultdt);
+      }
+
+      if (!status) {
+        /* This might be slow, but prevents error messages leak and
+         * other nasty stuff happening. */
+        ts->geterror();
+      }
+      else if (handle && handle->processor) {
+        ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
+      }
+      break;
+    }
+  }
+
+  if (!status) {
+    if (nchannels == 3 || nchannels == 4) {
+      result[0] = 1.0f;
+      result[1] = 0.0f;
+      result[2] = 1.0f;
+
+      if (nchannels == 4)
+        result[3] = 1.0f;
+    }
+  }
+
+  return status;
+}
+
+bool OSLRenderServices::texture3d(ustring filename,
+                                  TextureHandle *texture_handle,
+                                  TexturePerthread *texture_thread_info,
+                                  TextureOpt &options,
+                                  OSL::ShaderGlobals *sg,
+                                  const OSL::Vec3 &P,
+                                  const OSL::Vec3 &dPdx,
+                                  const OSL::Vec3 &dPdy,
+                                  const OSL::Vec3 &dPdz,
+                                  int nchannels,
+                                  float *result,
+                                  float *dresultds,
+                                  float *dresultdt,
+                                  float *dresultdr,
+                                  ustring *errormessage)
+{
+  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+  OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
+  bool status = false;
+
+  switch (texture_type) {
+    case OSLTextureHandle::SVM: {
+      /* Packed texture. */
+      ShaderData *sd = (ShaderData *)(sg->renderstate);
+      KernelGlobals kernel_globals = sd->osl_globals;
+      int slot = handle->svm_slot;
+      float3 P_float3 = make_float3(P.x, P.y, P.z);
+      float4 rgba = kernel_tex_image_interp_3d(kernel_globals, slot, P_float3, INTERPOLATION_NONE);
+
+      result[0] = rgba[0];
+      if (nchannels > 1)
+        result[1] = rgba[1];
+      if (nchannels > 2)
+        result[2] = rgba[2];
+      if (nchannels > 3)
+        result[3] = rgba[3];
+      status = true;
+      break;
+    }
+    case OSLTextureHandle::OIIO: {
+      /* OpenImageIO texture cache. */
+      OSL::TextureSystem *ts = texture_system;
+
+      if (handle && handle->oiio_handle) {
+        if (texture_thread_info == NULL) {
+          ShaderData *sd = (ShaderData *)(sg->renderstate);
+          KernelGlobals kernel_globals = sd->osl_globals;
+          OSLThreadData *tdata = kernel_globals->osl_tdata;
+          texture_thread_info = tdata->oiio_thread_info;
+        }
+
+        status = ts->texture3d(handle->oiio_handle,
+                               texture_thread_info,
+                               options,
+                               P,
+                               dPdx,
+                               dPdy,
+                               dPdz,
+                               nchannels,
+                               result,
+                               dresultds,
+                               dresultdt,
+                               dresultdr);
+      }
+      else {
+        status = ts->texture3d(filename,
+                               options,
+                               P,
+                               dPdx,
+                               dPdy,
+                               dPdz,
+                               nchannels,
+                               result,
+                               dresultds,
+                               dresultdt,
+                               dresultdr);
+      }
+
+      if (!status) {
+        /* This might be slow, but prevents error messages leak and
+         * other nasty stuff happening. */
+        ts->geterror();
+      }
+      else if (handle && handle->processor) {
+        ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
+      }
+      break;
+    }
+    case OSLTextureHandle::IES:
+    case OSLTextureHandle::AO:
+    case OSLTextureHandle::BEVEL: {
+      status = false;
+      break;
+    }
+  }
+
+  if (!status) {
+    if (nchannels == 3 || nchannels == 4) {
+      result[0] = 1.0f;
+      result[1] = 0.0f;
+      result[2] = 1.0f;
+
+      if (nchannels == 4)
+        result[3] = 1.0f;
+    }
+  }
+
+  return status;
+}
+
+bool OSLRenderServices::environment(ustring filename,
+                                    TextureHandle *texture_handle,
+                                    TexturePerthread *thread_info,
+                                    TextureOpt &options,
+                                    OSL::ShaderGlobals *sg,
+                                    const OSL::Vec3 &R,
+                                    const OSL::Vec3 &dRdx,
+                                    const OSL::Vec3 &dRdy,
+                                    int nchannels,
+                                    float *result,
+                                    float *dresultds,
+                                    float *dresultdt,
+                                    ustring *errormessage)
+{
+  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+  OSL::TextureSystem *ts = texture_system;
+  bool status = false;
+
+  if (handle && handle->oiio_handle) {
+    if (thread_info == NULL) {
+      ShaderData *sd = (ShaderData *)(sg->renderstate);
+      KernelGlobals kernel_globals = sd->osl_globals;
+      OSLThreadData *tdata = kernel_globals->osl_tdata;
+      thread_info = tdata->oiio_thread_info;
+    }
+
+    status = ts->environment(handle->oiio_handle,
+                             thread_info,
+                             options,
+                             R,
+                             dRdx,
+                             dRdy,
+                             nchannels,
+                             result,
+                             dresultds,
+                             dresultdt);
+  }
+  else {
+    status = ts->environment(
+        filename, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt);
+  }
+
+  if (!status) {
+    if (nchannels == 3 || nchannels == 4) {
+      result[0] = 1.0f;
+      result[1] = 0.0f;
+      result[2] = 1.0f;
+
+      if (nchannels == 4)
+        result[3] = 1.0f;
+    }
+  }
+  else if (handle && handle->processor) {
+    ColorSpaceManager::to_scene_linear(handle->processor, result, nchannels);
+  }
+
+  return status;
+}
+
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+bool OSLRenderServices::get_texture_info(ustring filename,
+                                         TextureHandle *texture_handle,
+                                         TexturePerthread *,
+                                         OSL::ShadingContext *,
+                                         int subimage,
+                                         ustring dataname,
+                                         TypeDesc datatype,
+                                         void *data,
+                                         ustring *)
+#else
+bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
+                                         ustring filename,
+                                         TextureHandle *texture_handle,
+                                         int subimage,
+                                         ustring dataname,
+                                         TypeDesc datatype,
+                                         void *data)
+#endif
+{
+  OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
+
+  /* No texture info for other texture types. */
+  if (handle && handle->type != OSLTextureHandle::OIIO) {
+    return false;
+  }
+
+  /* Get texture info from OpenImageIO. */
+  OSL::TextureSystem *ts = texture_system;
+  return ts->get_texture_info(filename, subimage, dataname, datatype, data);
+}
+
+int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg,
+                                         ustring filename,
+                                         const OSL::Vec3 &center,
+                                         float radius,
+                                         int max_points,
+                                         bool sort,
+                                         size_t *out_indices,
+                                         float *out_distances,
+                                         int derivs_offset)
+{
+  return 0;
+}
+
+int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg,
+                                      ustring filename,
+                                      size_t *indices,
+                                      int count,
+                                      ustring attr_name,
+                                      TypeDesc attr_type,
+                                      void *out_data)
+{
+  return 0;
+}
+
+bool OSLRenderServices::pointcloud_write(OSL::ShaderGlobals *sg,
+                                         ustring filename,
+                                         const OSL::Vec3 &pos,
+                                         int nattribs,
+                                         const ustring *names,
+                                         const TypeDesc *types,
+                                         const void **data)
+{
+  return false;
+}
+
+bool OSLRenderServices::trace(TraceOpt &options,
+                              OSL::ShaderGlobals *sg,
+                              const OSL::Vec3 &P,
+                              const OSL::Vec3 &dPdx,
+                              const OSL::Vec3 &dPdy,
+                              const OSL::Vec3 &R,
+                              const OSL::Vec3 &dRdx,
+                              const OSL::Vec3 &dRdy)
+{
+  /* todo: options.shader support, maybe options.traceset */
+  ShaderData *sd = (ShaderData *)(sg->renderstate);
+
+  /* setup ray */
+  Ray ray;
+
+  ray.P = TO_FLOAT3(P);
+  ray.D = TO_FLOAT3(R);
+  ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
+  ray.time = sd->time;
+
+  if (options.mindist == 0.0f) {
+    /* avoid self-intersections */
+    if (ray.P == sd->P) {
+      bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
+      ray.P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng);
+    }
+  }
+  else {
+    /* offset for minimum distance */
+    ray.P += options.mindist * ray.D;
+  }
+
+  /* ray differentials */
+  differential3 dP;
+  dP.dx = TO_FLOAT3(dPdx);
+  dP.dy = TO_FLOAT3(dPdy);
+  ray.dP = differential_make_compact(dP);
+  differential3 dD;
+  dD.dx = TO_FLOAT3(dRdx);
+  dD.dy = TO_FLOAT3(dRdy);
+  ray.dD = differential_make_compact(dD);
+
+  /* allocate trace data */
+  OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
+  tracedata->ray = ray;
+  tracedata->setup = false;
+  tracedata->init = true;
+  tracedata->hit = false;
+  tracedata->sd.osl_globals = sd->osl_globals;
+
+  const KernelGlobalsCPU *kg = sd->osl_globals;
+
+  /* Can't raytrace from shaders like displacement, before BVH exists. */
+  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+    return false;
+  }
+
+  /* Raytrace, leaving out shadow opaque to avoid early exit. */
+  uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
+  tracedata->hit = scene_intersect(kg, &ray, visibility, &tracedata->isect);
+  return tracedata->hit;
+}
+
+bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
+                                   ustring source,
+                                   ustring name,
+                                   TypeDesc type,
+                                   void *val,
+                                   bool derivatives)
+{
+  OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata;
+
+  if (source == u_trace && tracedata->init) {
+    if (name == u_hit) {
+      return set_attribute_int(tracedata->hit, type, derivatives, val);
+    }
+    else if (tracedata->hit) {
+      if (name == u_hitdist) {
+        float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
+        return set_attribute_float(f, type, derivatives, val);
+      }
+      else {
+        ShaderData *sd = &tracedata->sd;
+        const KernelGlobalsCPU *kg = sd->osl_globals;
+
+        if (!tracedata->setup) {
+          /* lazy shader data setup */
+          shader_setup_from_ray(kg, sd, &tracedata->ray, &tracedata->isect);
+          tracedata->setup = true;
+        }
+
+        if (name == u_N) {
+          return set_attribute_float3(sd->N, type, derivatives, val);
+        }
+        else if (name == u_Ng) {
+          return set_attribute_float3(sd->Ng, type, derivatives, val);
+        }
+        else if (name == u_P) {
+          float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
+          return set_attribute_float3(f, type, derivatives, val);
+        }
+        else if (name == u_I) {
+          float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
+          return set_attribute_float3(f, type, derivatives, val);
+        }
+        else if (name == u_u) {
+          float f[3] = {sd->u, sd->du.dx, sd->du.dy};
+          return set_attribute_float(f, type, derivatives, val);
+        }
+        else if (name == u_v) {
+          float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
+          return set_attribute_float(f, type, derivatives, val);
+        }
+
+        return get_attribute(sd, derivatives, u_empty, type, name, val);
+      }
+    }
+  }
+
+  return false;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h
new file mode 100644
index 00000000000..d9f57c642ad
--- /dev/null
+++ b/intern/cycles/kernel/osl/services.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OSL_SERVICES_H__
+#define __OSL_SERVICES_H__
+
+/* OSL Render Services
+ *
+ * Implementation of OSL render services, to retriever matrices, attributes,
+ * textures and point clouds. In principle this should only be accessing
+ * kernel data, but currently we also reach back into the Scene to retrieve
+ * attributes.
+ */
+
+#include <OSL/oslclosure.h>
+#include <OSL/oslexec.h>
+#include <OSL/rendererservices.h>
+
+#ifdef WITH_PTEX
+class PtexCache;
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+class Object;
+class Scene;
+class Shader;
+struct ShaderData;
+struct float3;
+struct KernelGlobalsCPU;
+
+/* OSL Texture Handle
+ *
+ * OSL texture lookups are string based. If those strings are known at compile
+ * time, the OSL compiler can cache a texture handle to use instead of a string.
+ *
+ * By default it uses TextureSystem::TextureHandle. But since we want to support
+ * different kinds of textures and color space conversions, this is our own handle
+ * with additional data.
+ *
+ * These are stored in a concurrent hash map, because OSL can compile multiple
+ * shaders in parallel. */
+
+struct OSLTextureHandle : public OIIO::RefCnt {
+  enum Type { OIIO, SVM, IES, BEVEL, AO };
+
+  OSLTextureHandle(Type type = OIIO, int svm_slot = -1)
+      : type(type), svm_slot(svm_slot), oiio_handle(NULL), processor(NULL)
+  {
+  }
+
+  Type type;
+  int svm_slot;
+  OSL::TextureSystem::TextureHandle *oiio_handle;
+  ColorSpaceProcessor *processor;
+};
+
+typedef OIIO::intrusive_ptr<OSLTextureHandle> OSLTextureHandleRef;
+typedef OIIO::unordered_map_concurrent<ustring, OSLTextureHandleRef, ustringHash>
+    OSLTextureHandleMap;
+
+/* OSL Render Services
+ *
+ * Interface for OSL to access attributes, textures and other scene data. */
+
+class OSLRenderServices : public OSL::RendererServices {
+ public:
+  OSLRenderServices(OSL::TextureSystem *texture_system);
+  ~OSLRenderServices();
+
+  bool get_matrix(OSL::ShaderGlobals *sg,
+                  OSL::Matrix44 &result,
+                  OSL::TransformationPtr xform,
+                  float time) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+                          OSL::Matrix44 &result,
+                          OSL::TransformationPtr xform,
+                          float time) override;
+
+  bool get_matrix(OSL::ShaderGlobals *sg,
+                  OSL::Matrix44 &result,
+                  ustring from,
+                  float time) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+                          OSL::Matrix44 &result,
+                          ustring to,
+                          float time) override;
+
+  bool get_matrix(OSL::ShaderGlobals *sg,
+                  OSL::Matrix44 &result,
+                  OSL::TransformationPtr xform) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg,
+                          OSL::Matrix44 &result,
+                          OSL::TransformationPtr xform) override;
+
+  bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
+  bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override;
+
+  bool get_array_attribute(OSL::ShaderGlobals *sg,
+                           bool derivatives,
+                           ustring object,
+                           TypeDesc type,
+                           ustring name,
+                           int index,
+                           void *val) override;
+  bool get_attribute(OSL::ShaderGlobals *sg,
+                     bool derivatives,
+                     ustring object,
+                     TypeDesc type,
+                     ustring name,
+                     void *val) override;
+  bool get_attribute(ShaderData *sd,
+                     bool derivatives,
+                     ustring object_name,
+                     TypeDesc type,
+                     ustring name,
+                     void *val);
+
+  bool get_userdata(
+      bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val) override;
+
+  int pointcloud_search(OSL::ShaderGlobals *sg,
+                        ustring filename,
+                        const OSL::Vec3 &center,
+                        float radius,
+                        int max_points,
+                        bool sort,
+                        size_t *out_indices,
+                        float *out_distances,
+                        int derivs_offset) override;
+
+  int pointcloud_get(OSL::ShaderGlobals *sg,
+                     ustring filename,
+                     size_t *indices,
+                     int count,
+                     ustring attr_name,
+                     TypeDesc attr_type,
+                     void *out_data) override;
+
+  bool pointcloud_write(OSL::ShaderGlobals *sg,
+                        ustring filename,
+                        const OSL::Vec3 &pos,
+                        int nattribs,
+                        const ustring *names,
+                        const TypeDesc *types,
+                        const void **data) override;
+
+  bool trace(TraceOpt &options,
+             OSL::ShaderGlobals *sg,
+             const OSL::Vec3 &P,
+             const OSL::Vec3 &dPdx,
+             const OSL::Vec3 &dPdy,
+             const OSL::Vec3 &R,
+             const OSL::Vec3 &dRdx,
+             const OSL::Vec3 &dRdy) override;
+
+  bool getmessage(OSL::ShaderGlobals *sg,
+                  ustring source,
+                  ustring name,
+                  TypeDesc type,
+                  void *val,
+                  bool derivatives) override;
+
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+  TextureSystem::TextureHandle *get_texture_handle(ustring filename,
+                                                   OSL::ShadingContext *context) override;
+#else
+  TextureSystem::TextureHandle *get_texture_handle(ustring filename) override;
+#endif
+
+  bool good(TextureSystem::TextureHandle *texture_handle) override;
+
+  bool texture(ustring filename,
+               TextureSystem::TextureHandle *texture_handle,
+               TexturePerthread *texture_thread_info,
+               TextureOpt &options,
+               OSL::ShaderGlobals *sg,
+               float s,
+               float t,
+               float dsdx,
+               float dtdx,
+               float dsdy,
+               float dtdy,
+               int nchannels,
+               float *result,
+               float *dresultds,
+               float *dresultdt,
+               ustring *errormessage) override;
+
+  bool texture3d(ustring filename,
+                 TextureHandle *texture_handle,
+                 TexturePerthread *texture_thread_info,
+                 TextureOpt &options,
+                 OSL::ShaderGlobals *sg,
+                 const OSL::Vec3 &P,
+                 const OSL::Vec3 &dPdx,
+                 const OSL::Vec3 &dPdy,
+                 const OSL::Vec3 &dPdz,
+                 int nchannels,
+                 float *result,
+                 float *dresultds,
+                 float *dresultdt,
+                 float *dresultdr,
+                 ustring *errormessage) override;
+
+  bool environment(ustring filename,
+                   TextureHandle *texture_handle,
+                   TexturePerthread *texture_thread_info,
+                   TextureOpt &options,
+                   OSL::ShaderGlobals *sg,
+                   const OSL::Vec3 &R,
+                   const OSL::Vec3 &dRdx,
+                   const OSL::Vec3 &dRdy,
+                   int nchannels,
+                   float *result,
+                   float *dresultds,
+                   float *dresultdt,
+                   ustring *errormessage) override;
+
+#if OSL_LIBRARY_VERSION_CODE >= 11100
+  bool get_texture_info(ustring filename,
+                        TextureHandle *texture_handle,
+                        TexturePerthread *texture_thread_info,
+                        OSL::ShadingContext *shading_context,
+                        int subimage,
+                        ustring dataname,
+                        TypeDesc datatype,
+                        void *data,
+                        ustring *errormessage) override;
+#else
+  bool get_texture_info(OSL::ShaderGlobals *sg,
+                        ustring filename,
+                        TextureHandle *texture_handle,
+                        int subimage,
+                        ustring dataname,
+                        TypeDesc datatype,
+                        void *data) override;
+#endif
+
+  static bool get_background_attribute(const KernelGlobalsCPU *kg,
+                                       ShaderData *sd,
+                                       ustring name,
+                                       TypeDesc type,
+                                       bool derivatives,
+                                       void *val);
+  static bool get_object_standard_attribute(const KernelGlobalsCPU *kg,
+                                            ShaderData *sd,
+                                            ustring name,
+                                            TypeDesc type,
+                                            bool derivatives,
+                                            void *val);
+
+  static ustring u_distance;
+  static ustring u_index;
+  static ustring u_world;
+  static ustring u_camera;
+  static ustring u_screen;
+  static ustring u_raster;
+  static ustring u_ndc;
+  static ustring u_object_location;
+  static ustring u_object_color;
+  static ustring u_object_index;
+  static ustring u_geom_dupli_generated;
+  static ustring u_geom_dupli_uv;
+  static ustring u_material_index;
+  static ustring u_object_random;
+  static ustring u_particle_index;
+  static ustring u_particle_random;
+  static ustring u_particle_age;
+  static ustring u_particle_lifetime;
+  static ustring u_particle_location;
+  static ustring u_particle_rotation;
+  static ustring u_particle_size;
+  static ustring u_particle_velocity;
+  static ustring u_particle_angular_velocity;
+  static ustring u_geom_numpolyvertices;
+  static ustring u_geom_trianglevertices;
+  static ustring u_geom_polyvertices;
+  static ustring u_geom_name;
+  static ustring u_geom_undisplaced;
+  static ustring u_is_smooth;
+  static ustring u_is_curve;
+  static ustring u_curve_thickness;
+  static ustring u_curve_length;
+  static ustring u_curve_tangent_normal;
+  static ustring u_curve_random;
+  static ustring u_normal_map_normal;
+  static ustring u_path_ray_length;
+  static ustring u_path_ray_depth;
+  static ustring u_path_diffuse_depth;
+  static ustring u_path_glossy_depth;
+  static ustring u_path_transparent_depth;
+  static ustring u_path_transmission_depth;
+  static ustring u_trace;
+  static ustring u_hit;
+  static ustring u_hitdist;
+  static ustring u_N;
+  static ustring u_Ng;
+  static ustring u_P;
+  static ustring u_I;
+  static ustring u_u;
+  static ustring u_v;
+  static ustring u_empty;
+  static ustring u_at_bevel;
+  static ustring u_at_ao;
+
+  /* Texture system and texture handle map are part of the services instead of
+   * globals to be shared between different render sessions. This saves memory,
+   * and is required because texture handles are cached as part of the shared
+   * shading system. */
+  OSL::TextureSystem *texture_system;
+  OSLTextureHandleMap textures;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __OSL_SERVICES_H__ */
diff --git a/intern/cycles/kernel/osl/shader.cpp b/intern/cycles/kernel/osl/shader.cpp
new file mode 100644
index 00000000000..33633c69e29
--- /dev/null
+++ b/intern/cycles/kernel/osl/shader.cpp
@@ -0,0 +1,428 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <OSL/oslexec.h>
+
+// clang-format off
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/types.h"
+
+#include "kernel/geom/object.h"
+
+#include "kernel/integrator/state.h"
+
+#include "kernel/osl/closures.h"
+#include "kernel/osl/globals.h"
+#include "kernel/osl/services.h"
+#include "kernel/osl/shader.h"
+// clang-format on
+
+#include "scene/attribute.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Threads */
+
+void OSLShader::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals)
+{
+  /* no osl used? */
+  if (!osl_globals->use) {
+    kg->osl = NULL;
+    return;
+  }
+
+  /* Per thread kernel data init. */
+  kg->osl = osl_globals;
+
+  OSL::ShadingSystem *ss = kg->osl->ss;
+  OSLThreadData *tdata = new OSLThreadData();
+
+  memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
+  tdata->globals.tracedata = &tdata->tracedata;
+  tdata->globals.flipHandedness = false;
+  tdata->osl_thread_info = ss->create_thread_info();
+  tdata->context = ss->get_context(tdata->osl_thread_info);
+
+  tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
+
+  kg->osl_ss = (OSLShadingSystem *)ss;
+  kg->osl_tdata = tdata;
+}
+
+void OSLShader::thread_free(KernelGlobalsCPU *kg)
+{
+  if (!kg->osl)
+    return;
+
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSLThreadData *tdata = kg->osl_tdata;
+  ss->release_context(tdata->context);
+
+  ss->destroy_thread_info(tdata->osl_thread_info);
+
+  delete tdata;
+
+  kg->osl = NULL;
+  kg->osl_ss = NULL;
+  kg->osl_tdata = NULL;
+}
+
+/* Globals */
+
+static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
+                                        ShaderData *sd,
+                                        const void *state,
+                                        uint32_t path_flag,
+                                        OSLThreadData *tdata)
+{
+  OSL::ShaderGlobals *globals = &tdata->globals;
+
+  /* copy from shader data to shader globals */
+  globals->P = TO_VEC3(sd->P);
+  globals->dPdx = TO_VEC3(sd->dP.dx);
+  globals->dPdy = TO_VEC3(sd->dP.dy);
+  globals->I = TO_VEC3(sd->I);
+  globals->dIdx = TO_VEC3(sd->dI.dx);
+  globals->dIdy = TO_VEC3(sd->dI.dy);
+  globals->N = TO_VEC3(sd->N);
+  globals->Ng = TO_VEC3(sd->Ng);
+  globals->u = sd->u;
+  globals->dudx = sd->du.dx;
+  globals->dudy = sd->du.dy;
+  globals->v = sd->v;
+  globals->dvdx = sd->dv.dx;
+  globals->dvdy = sd->dv.dy;
+  globals->dPdu = TO_VEC3(sd->dPdu);
+  globals->dPdv = TO_VEC3(sd->dPdv);
+  globals->surfacearea = 1.0f;
+  globals->time = sd->time;
+
+  /* booleans */
+  globals->raytype = path_flag;
+  globals->backfacing = (sd->flag & SD_BACKFACING);
+
+  /* shader data to be used in services callbacks */
+  globals->renderstate = sd;
+
+  /* hacky, we leave it to services to fetch actual object matrix */
+  globals->shader2common = sd;
+  globals->object2common = sd;
+
+  /* must be set to NULL before execute */
+  globals->Ci = NULL;
+
+  /* clear trace data */
+  tdata->tracedata.init = false;
+
+  /* Used by render-services. */
+  sd->osl_globals = kg;
+  if (path_flag & PATH_RAY_SHADOW) {
+    sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
+  }
+  else {
+    sd->osl_path_state = (const IntegratorStateCPU *)state;
+  }
+}
+
+/* Surface */
+
+static void flatten_surface_closure_tree(ShaderData *sd,
+                                         uint32_t path_flag,
+                                         const OSL::ClosureColor *closure,
+                                         float3 weight = make_float3(1.0f, 1.0f, 1.0f))
+{
+  /* OSL gives us a closure tree, we flatten it into arrays per
+   * closure type, for evaluation, sampling, etc later on. */
+
+  switch (closure->id) {
+    case OSL::ClosureColor::MUL: {
+      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+      flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
+      break;
+    }
+    case OSL::ClosureColor::ADD: {
+      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+      flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
+      flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
+      break;
+    }
+    default: {
+      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+      if (prim) {
+#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
+        weight = weight * TO_FLOAT3(comp->w);
+#endif
+        prim->setup(sd, path_flag, weight);
+      }
+      break;
+    }
+  }
+}
+
+void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
+                             const void *state,
+                             ShaderData *sd,
+                             uint32_t path_flag)
+{
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+  /* execute shader for this point */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+  int shader = sd->shader & SHADER_MASK;
+
+  /* automatic bump shader */
+  if (kg->osl->bump_state[shader]) {
+    /* save state */
+    float3 P = sd->P;
+    float3 dPdx = sd->dP.dx;
+    float3 dPdy = sd->dP.dy;
+
+    /* set state as if undisplaced */
+    if (sd->flag & SD_HAS_DISPLACEMENT) {
+      float data[9];
+      bool found = kg->osl->services->get_attribute(sd,
+                                                    true,
+                                                    OSLRenderServices::u_empty,
+                                                    TypeDesc::TypeVector,
+                                                    OSLRenderServices::u_geom_undisplaced,
+                                                    data);
+      (void)found;
+      assert(found);
+
+      memcpy(&sd->P, data, sizeof(float) * 3);
+      memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3);
+      memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3);
+
+      object_position_transform(kg, sd, &sd->P);
+      object_dir_transform(kg, sd, &sd->dP.dx);
+      object_dir_transform(kg, sd, &sd->dP.dy);
+
+      globals->P = TO_VEC3(sd->P);
+      globals->dPdx = TO_VEC3(sd->dP.dx);
+      globals->dPdy = TO_VEC3(sd->dP.dy);
+    }
+
+    /* execute bump shader */
+    ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
+
+    /* reset state */
+    sd->P = P;
+    sd->dP.dx = dPdx;
+    sd->dP.dy = dPdy;
+
+    globals->P = TO_VEC3(P);
+    globals->dPdx = TO_VEC3(dPdx);
+    globals->dPdy = TO_VEC3(dPdy);
+  }
+
+  /* surface shader */
+  if (kg->osl->surface_state[shader]) {
+    ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
+  }
+
+  /* flatten closure tree */
+  if (globals->Ci)
+    flatten_surface_closure_tree(sd, path_flag, globals->Ci);
+}
+
+/* Background */
+
+static void flatten_background_closure_tree(ShaderData *sd,
+                                            const OSL::ClosureColor *closure,
+                                            float3 weight = make_float3(1.0f, 1.0f, 1.0f))
+{
+  /* OSL gives us a closure tree, if we are shading for background there
+   * is only one supported closure type at the moment, which has no evaluation
+   * functions, so we just sum the weights */
+
+  switch (closure->id) {
+    case OSL::ClosureColor::MUL: {
+      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+      flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
+      break;
+    }
+    case OSL::ClosureColor::ADD: {
+      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+
+      flatten_background_closure_tree(sd, add->closureA, weight);
+      flatten_background_closure_tree(sd, add->closureB, weight);
+      break;
+    }
+    default: {
+      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+      if (prim) {
+#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
+        weight = weight * TO_FLOAT3(comp->w);
+#endif
+        prim->setup(sd, 0, weight);
+      }
+      break;
+    }
+  }
+}
+
+void OSLShader::eval_background(const KernelGlobalsCPU *kg,
+                                const void *state,
+                                ShaderData *sd,
+                                uint32_t path_flag)
+{
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+  /* execute shader for this point */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+
+  if (kg->osl->background_state) {
+    ss->execute(octx, *(kg->osl->background_state), *globals);
+  }
+
+  /* return background color immediately */
+  if (globals->Ci)
+    flatten_background_closure_tree(sd, globals->Ci);
+}
+
+/* Volume */
+
+static void flatten_volume_closure_tree(ShaderData *sd,
+                                        const OSL::ClosureColor *closure,
+                                        float3 weight = make_float3(1.0f, 1.0f, 1.0f))
+{
+  /* OSL gives us a closure tree, we flatten it into arrays per
+   * closure type, for evaluation, sampling, etc later on. */
+
+  switch (closure->id) {
+    case OSL::ClosureColor::MUL: {
+      OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+      flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
+      break;
+    }
+    case OSL::ClosureColor::ADD: {
+      OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+      flatten_volume_closure_tree(sd, add->closureA, weight);
+      flatten_volume_closure_tree(sd, add->closureB, weight);
+      break;
+    }
+    default: {
+      OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
+      CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
+
+      if (prim) {
+#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
+        weight = weight * TO_FLOAT3(comp->w);
+#endif
+        prim->setup(sd, 0, weight);
+      }
+    }
+  }
+}
+
+void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
+                            const void *state,
+                            ShaderData *sd,
+                            uint32_t path_flag)
+{
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+  shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+  /* execute shader */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+  int shader = sd->shader & SHADER_MASK;
+
+  if (kg->osl->volume_state[shader]) {
+    ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
+  }
+
+  /* flatten closure tree */
+  if (globals->Ci)
+    flatten_volume_closure_tree(sd, globals->Ci);
+}
+
+/* Displacement */
+
+void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd)
+{
+  /* setup shader globals from shader data */
+  OSLThreadData *tdata = kg->osl_tdata;
+
+  shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
+
+  /* execute shader */
+  OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+  OSL::ShaderGlobals *globals = &tdata->globals;
+  OSL::ShadingContext *octx = tdata->context;
+  int shader = sd->shader & SHADER_MASK;
+
+  if (kg->osl->displacement_state[shader]) {
+    ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
+  }
+
+  /* get back position */
+  sd->P = TO_FLOAT3(globals->P);
+}
+
+/* Attributes */
+
+int OSLShader::find_attribute(const KernelGlobalsCPU *kg,
+                              const ShaderData *sd,
+                              uint id,
+                              AttributeDescriptor *desc)
+{
+  /* for OSL, a hash map is used to lookup the attribute by name. */
+  int object = sd->object * ATTR_PRIM_TYPES;
+
+  OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
+  ustring stdname(std::string("geom:") +
+                  std::string(Attribute::standard_name((AttributeStandard)id)));
+  OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
+
+  if (it != attr_map.end()) {
+    const OSLGlobals::Attribute &osl_attr = it->second;
+    *desc = osl_attr.desc;
+
+    if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
+      desc->offset = ATTR_STD_NOT_FOUND;
+      return ATTR_STD_NOT_FOUND;
+    }
+
+    /* return result */
+    if (osl_attr.desc.element == ATTR_ELEMENT_NONE) {
+      desc->offset = ATTR_STD_NOT_FOUND;
+    }
+    return desc->offset;
+  }
+  else {
+    desc->offset = ATTR_STD_NOT_FOUND;
+    return (int)ATTR_STD_NOT_FOUND;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/shader.h b/intern/cycles/kernel/osl/shader.h
new file mode 100644
index 00000000000..7d68d4eae7f
--- /dev/null
+++ b/intern/cycles/kernel/osl/shader.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OSL_SHADER_H__
+#define __OSL_SHADER_H__
+
+#ifdef WITH_OSL
+
+/* OSL Shader Engine
+ *
+ * Holds all variables to execute and use OSL shaders from the kernel. These
+ * are initialized externally by OSLShaderManager before rendering starts.
+ *
+ * Before/after a thread starts rendering, thread_init/thread_free must be
+ * called, which will store any per thread OSL state in thread local storage.
+ * This means no thread state must be passed along in the kernel itself.
+ */
+
+#  include "kernel/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Scene;
+
+struct ShaderClosure;
+struct ShaderData;
+struct IntegratorStateCPU;
+struct differential3;
+struct KernelGlobalsCPU;
+
+struct OSLGlobals;
+struct OSLShadingSystem;
+
+class OSLShader {
+ public:
+  /* init */
+  static void register_closures(OSLShadingSystem *ss);
+
+  /* per thread data */
+  static void thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals);
+  static void thread_free(KernelGlobalsCPU *kg);
+
+  /* eval */
+  static void eval_surface(const KernelGlobalsCPU *kg,
+                           const void *state,
+                           ShaderData *sd,
+                           uint32_t path_flag);
+  static void eval_background(const KernelGlobalsCPU *kg,
+                              const void *state,
+                              ShaderData *sd,
+                              uint32_t path_flag);
+  static void eval_volume(const KernelGlobalsCPU *kg,
+                          const void *state,
+                          ShaderData *sd,
+                          uint32_t path_flag);
+  static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd);
+
+  /* attributes */
+  static int find_attribute(const KernelGlobalsCPU *kg,
+                            const ShaderData *sd,
+                            uint id,
+                            AttributeDescriptor *desc);
+};
+
+CCL_NAMESPACE_END
+
+#endif
+
+#endif /* __OSL_SHADER_H__ */
diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h
new file mode 100644
index 00000000000..b62ec7fda42
--- /dev/null
+++ b/intern/cycles/kernel/sample/jitter.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
+{
+  x += seed;
+  x ^= (x * 0x6c50b47cu);
+  x ^= x * 0xb82f1e52u;
+  x ^= x * 0xc7afe638u;
+  x ^= x * 0x8d22f6e6u;
+
+  return x;
+}
+
+ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
+{
+  x = reverse_integer_bits(x);
+  x = laine_karras_permutation(x, seed);
+  x = reverse_integer_bits(x);
+
+  return x;
+}
+
+ccl_device_inline uint cmj_hash(uint i, uint p)
+{
+  i ^= p;
+  i ^= i >> 17;
+  i ^= i >> 10;
+  i *= 0xb36534e5;
+  i ^= i >> 12;
+  i ^= i >> 21;
+  i *= 0x93fc4795;
+  i ^= 0xdf6e307f;
+  i ^= i >> 17;
+  i *= 1 | p >> 18;
+
+  return i;
+}
+
+ccl_device_inline uint cmj_hash_simple(uint i, uint p)
+{
+  i = (i ^ 61) ^ p;
+  i += i << 3;
+  i ^= i >> 4;
+  i *= 0x27d4eb2d;
+  return i;
+}
+
+ccl_device_inline float cmj_randfloat(uint i, uint p)
+{
+  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
+{
+  return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
+}
+
+ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension)
+{
+  /* Perform Owen shuffle of the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
+
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  const uint dim = d % NUM_PMJ_PATTERNS;
+
+  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
+   *  the x part is used for even dims and the y for odd. */
+  int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1);
+
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
+
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+#  else
+  float dx = cmj_randfloat(d, rng_hash);
+#  endif
+  /* Jitter sample locations and map back into [0 1]. */
+  fx = fx + dx;
+  fx = fx - floorf(fx);
+#else
+#  warning "Not using Cranley-Patterson Rotation."
+#endif
+
+  return fx;
+}
+
+ccl_device void pmj_sample_2D(KernelGlobals kg,
+                              uint sample,
+                              uint rng_hash,
+                              uint dimension,
+                              ccl_private float *x,
+                              ccl_private float *y)
+{
+  /* Perform a shuffle on the sample number to reorder the samples. */
+#ifdef _SIMPLE_HASH_
+  const uint rv = cmj_hash_simple(dimension, rng_hash);
+#else /* Use a _REGULAR_HASH_. */
+  const uint rv = cmj_hash(dimension, rng_hash);
+#endif
+#ifdef _XOR_SHUFFLE_
+#  warning "Using XOR shuffle."
+  const uint s = sample ^ rv;
+#else /* Use _OWEN_SHUFFLE_ for reordering. */
+  const uint s = nested_uniform_scramble(sample, rv);
+#endif
+
+  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
+  const uint sample_set = s / NUM_PMJ_SAMPLES;
+  const uint d = (dimension + sample_set);
+  uint dim = d % NUM_PMJ_PATTERNS;
+  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
+
+  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
+  float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1);
+
+#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
+  /* Use Cranley-Patterson rotation to displace the sample pattern. */
+#  ifdef _SIMPLE_HASH_
+  float dx = cmj_randfloat_simple(d, rng_hash);
+  float dy = cmj_randfloat_simple(d + 1, rng_hash);
+#  else
+  float dx = cmj_randfloat(d, rng_hash);
+  float dy = cmj_randfloat(d + 1, rng_hash);
+#  endif
+  /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */
+  float sx = fx + dx;
+  float sy = fy + dy;
+  sx = sx - floorf(sx);
+  sy = sy - floorf(sy);
+#else
+#  warning "Not using Cranley Patterson Rotation."
+#endif
+
+  (*x) = sx;
+  (*y) = sy;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/lcg.h b/intern/cycles/kernel/sample/lcg.h
new file mode 100644
index 00000000000..92cfff639b4
--- /dev/null
+++ b/intern/cycles/kernel/sample/lcg.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Linear Congruential Generator */
+
+ccl_device uint lcg_step_uint(uint *rng)
+{
+  /* implicit mod 2^32 */
+  *rng = (1103515245 * (*rng) + 12345);
+  return *rng;
+}
+
+ccl_device float lcg_step_float(uint *rng)
+{
+  /* implicit mod 2^32 */
+  *rng = (1103515245 * (*rng) + 12345);
+  return (float)*rng * (1.0f / (float)0xFFFFFFFF);
+}
+
+ccl_device uint lcg_init(uint seed)
+{
+  uint rng = seed;
+  lcg_step_uint(&rng);
+  return rng;
+}
+
+ccl_device_inline uint lcg_state_init(const uint rng_hash,
+                                      const uint rng_offset,
+                                      const uint sample,
+                                      const uint scramble)
+{
+  return lcg_init(rng_hash + rng_offset + sample * scramble);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/mapping.h b/intern/cycles/kernel/sample/mapping.h
new file mode 100644
index 00000000000..3297aa2a29a
--- /dev/null
+++ b/intern/cycles/kernel/sample/mapping.h
@@ -0,0 +1,177 @@
+/*
+ * Parts adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* distribute uniform xy on [0,1] over unit disk [-1,1] */
+ccl_device void to_unit_disk(ccl_private float *x, ccl_private float *y)
+{
+  float phi = M_2PI_F * (*x);
+  float r = sqrtf(*y);
+
+  *x = r * cosf(phi);
+  *y = r * sinf(phi);
+}
+
+/* return an orthogonal tangent and bitangent given a normal and tangent that
+ * may not be exactly orthogonal */
+ccl_device void make_orthonormals_tangent(const float3 N,
+                                          const float3 T,
+                                          ccl_private float3 *a,
+                                          ccl_private float3 *b)
+{
+  *b = normalize(cross(N, T));
+  *a = cross(*b, N);
+}
+
+/* sample direction with cosine weighted distributed in hemisphere */
+ccl_device_inline void sample_cos_hemisphere(
+    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
+{
+  to_unit_disk(&randu, &randv);
+  float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+  *omega_in = randu * T + randv * B + costheta * N;
+  *pdf = costheta * M_1_PI_F;
+}
+
+/* sample direction uniformly distributed in hemisphere */
+ccl_device_inline void sample_uniform_hemisphere(
+    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
+{
+  float z = randu;
+  float r = sqrtf(max(0.0f, 1.0f - z * z));
+  float phi = M_2PI_F * randv;
+  float x = r * cosf(phi);
+  float y = r * sinf(phi);
+
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+  *omega_in = x * T + y * B + z * N;
+  *pdf = 0.5f * M_1_PI_F;
+}
+
+/* sample direction uniformly distributed in cone */
+ccl_device_inline void sample_uniform_cone(const float3 N,
+                                           float angle,
+                                           float randu,
+                                           float randv,
+                                           ccl_private float3 *omega_in,
+                                           ccl_private float *pdf)
+{
+  float zMin = cosf(angle);
+  float z = zMin - zMin * randu + randu;
+  float r = safe_sqrtf(1.0f - sqr(z));
+  float phi = M_2PI_F * randv;
+  float x = r * cosf(phi);
+  float y = r * sinf(phi);
+
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+  *omega_in = x * T + y * B + z * N;
+  *pdf = M_1_2PI_F / (1.0f - zMin);
+}
+
+ccl_device_inline float pdf_uniform_cone(const float3 N, float3 D, float angle)
+{
+  float zMin = cosf(angle);
+  float z = dot(N, D);
+  if (z > zMin) {
+    return M_1_2PI_F / (1.0f - zMin);
+  }
+  return 0.0f;
+}
+
+/* sample uniform point on the surface of a sphere */
+ccl_device float3 sample_uniform_sphere(float u1, float u2)
+{
+  float z = 1.0f - 2.0f * u1;
+  float r = sqrtf(fmaxf(0.0f, 1.0f - z * z));
+  float phi = M_2PI_F * u2;
+  float x = r * cosf(phi);
+  float y = r * sinf(phi);
+
+  return make_float3(x, y, z);
+}
+
+/* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping
+ * to better preserve stratification for some RNG sequences */
+ccl_device float2 concentric_sample_disk(float u1, float u2)
+{
+  float phi, r;
+  float a = 2.0f * u1 - 1.0f;
+  float b = 2.0f * u2 - 1.0f;
+
+  if (a == 0.0f && b == 0.0f) {
+    return zero_float2();
+  }
+  else if (a * a > b * b) {
+    r = a;
+    phi = M_PI_4_F * (b / a);
+  }
+  else {
+    r = b;
+    phi = M_PI_2_F - M_PI_4_F * (a / b);
+  }
+
+  return make_float2(r * cosf(phi), r * sinf(phi));
+}
+
+/* sample point in unit polygon with given number of corners and rotation */
+ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, float v)
+{
+  /* sample corner number and reuse u */
+  float corner = floorf(u * corners);
+  u = u * corners - corner;
+
+  /* uniform sampled triangle weights */
+  u = sqrtf(u);
+  v = v * u;
+  u = 1.0f - u;
+
+  /* point in triangle */
+  float angle = M_PI_F / corners;
+  float2 p = make_float2((u + v) * cosf(angle), (u - v) * sinf(angle));
+
+  /* rotate */
+  rotation += corner * 2.0f * angle;
+
+  float cr = cosf(rotation);
+  float sr = sinf(rotation);
+
+  return make_float2(cr * p.x - sr * p.y, sr * p.x + cr * p.y);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/mis.h b/intern/cycles/kernel/sample/mis.h
new file mode 100644
index 00000000000..0878b3aac36
--- /dev/null
+++ b/intern/cycles/kernel/sample/mis.h
@@ -0,0 +1,64 @@
+/*
+ * Parts adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Multiple importance sampling utilities. */
+
+ccl_device float balance_heuristic(float a, float b)
+{
+  return (a) / (a + b);
+}
+
+ccl_device float balance_heuristic_3(float a, float b, float c)
+{
+  return (a) / (a + b + c);
+}
+
+ccl_device float power_heuristic(float a, float b)
+{
+  return (a * a) / (a * a + b * b);
+}
+
+ccl_device float power_heuristic_3(float a, float b, float c)
+{
+  return (a * a) / (a * a + b * b + c * c);
+}
+
+ccl_device float max_heuristic(float a, float b)
+{
+  return (a > b) ? 1.0f : 0.0f;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h
new file mode 100644
index 00000000000..191b24a5f2a
--- /dev/null
+++ b/intern/cycles/kernel/sample/pattern.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "kernel/sample/jitter.h"
+#include "util/hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Pseudo random numbers, uncomment this for debugging correlations. Only run
+ * this single threaded on a CPU for repeatable results. */
+//#define __DEBUG_CORRELATION__
+
+/* High Dimensional Sobol.
+ *
+ * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
+ * to classic Van der Corput and Sobol sequences. */
+
+#ifdef __SOBOL__
+
+/* Skip initial numbers that for some dimensions have clear patterns that
+ * don't cover the entire sample space. Ideally we would have a better
+ * progressive pattern that doesn't suffer from this problem, because even
+ * with this offset some dimensions are quite poor.
+ */
+#  define SOBOL_SKIP 64
+
+ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension)
+{
+  uint result = 0;
+  uint i = index + SOBOL_SKIP;
+  for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
+    j += x;
+    result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1));
+  }
+  return result;
+}
+
+#endif /* __SOBOL__ */
+
+ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
+                                         uint rng_hash,
+                                         int sample,
+                                         int dimension)
+{
+#ifdef __DEBUG_CORRELATION__
+  return (float)drand48();
+#endif
+
+#ifdef __SOBOL__
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
+#endif
+  {
+    return pmj_sample_1D(kg, sample, rng_hash, dimension);
+  }
+
+#ifdef __SOBOL__
+  /* Sobol sequence value using direction vectors. */
+  uint result = sobol_dimension(kg, sample, dimension);
+  float r = (float)result * (1.0f / (float)0xFFFFFFFF);
+
+  /* Cranly-Patterson rotation using rng seed */
+  float shift;
+
+  /* Hash rng with dimension to solve correlation issues.
+   * See T38710, T50116.
+   */
+  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+  shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);
+
+  return r + shift - floorf(r + shift);
+#endif
+}
+
+ccl_device_forceinline void path_rng_2D(KernelGlobals kg,
+                                        uint rng_hash,
+                                        int sample,
+                                        int dimension,
+                                        ccl_private float *fx,
+                                        ccl_private float *fy)
+{
+#ifdef __DEBUG_CORRELATION__
+  *fx = (float)drand48();
+  *fy = (float)drand48();
+  return;
+#endif
+
+#ifdef __SOBOL__
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
+#endif
+  {
+    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
+
+    return;
+  }
+
+#ifdef __SOBOL__
+  /* Sobol. */
+  *fx = path_rng_1D(kg, rng_hash, sample, dimension);
+  *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1);
+#endif
+}
+
+/**
+ * 1D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
+ * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
+ * http://www.jcgt.org/published/0009/03/02/paper.pdf
+ */
+ccl_device_inline uint hash_iqint1(uint n)
+{
+  n = (n << 13U) ^ n;
+  n = n * (n * n * 15731U + 789221U) + 1376312589U;
+
+  return n;
+}
+
+/**
+ * 2D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
+ * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
+ * http://www.jcgt.org/published/0009/03/02/paper.pdf
+ */
+ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
+{
+  const uint qx = 1103515245U * ((x >> 1U) ^ (y));
+  const uint qy = 1103515245U * ((y >> 1U) ^ (x));
+  const uint n = 1103515245U * ((qx) ^ (qy >> 3U));
+
+  return n;
+}
+
+ccl_device_inline uint path_rng_hash_init(KernelGlobals kg,
+                                          const int sample,
+                                          const int x,
+                                          const int y)
+{
+  const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;
+
+#ifdef __DEBUG_CORRELATION__
+  srand48(rng_hash + sample);
+#else
+  (void)sample;
+#endif
+
+  return rng_hash;
+}
+
+ccl_device_inline bool sample_is_even(int pattern, int sample)
+{
+  if (pattern == SAMPLING_PATTERN_PMJ) {
+    /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
+     * We can use this to get divide sample sequence into two classes for easier variance
+     * estimation. */
+#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
+    return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
+#elif defined(__NVCC__)
+    return __popc(sample & 0xaaaaaaaa) & 1;
+#else
+    /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */
+    int i = sample & 0xaaaaaaaa;
+    i = i - ((i >> 1) & 0x55555555);
+    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+    i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+    return i & 1;
+#endif
+  }
+  else {
+    /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
+    return sample & 0x1;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/sample_jitter.h b/intern/cycles/kernel/sample/sample_jitter.h
deleted file mode 100644
index b62ec7fda42..00000000000
--- a/intern/cycles/kernel/sample/sample_jitter.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
-{
-  x += seed;
-  x ^= (x * 0x6c50b47cu);
-  x ^= x * 0xb82f1e52u;
-  x ^= x * 0xc7afe638u;
-  x ^= x * 0x8d22f6e6u;
-
-  return x;
-}
-
-ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
-{
-  x = reverse_integer_bits(x);
-  x = laine_karras_permutation(x, seed);
-  x = reverse_integer_bits(x);
-
-  return x;
-}
-
-ccl_device_inline uint cmj_hash(uint i, uint p)
-{
-  i ^= p;
-  i ^= i >> 17;
-  i ^= i >> 10;
-  i *= 0xb36534e5;
-  i ^= i >> 12;
-  i ^= i >> 21;
-  i *= 0x93fc4795;
-  i ^= 0xdf6e307f;
-  i ^= i >> 17;
-  i *= 1 | p >> 18;
-
-  return i;
-}
-
-ccl_device_inline uint cmj_hash_simple(uint i, uint p)
-{
-  i = (i ^ 61) ^ p;
-  i += i << 3;
-  i ^= i >> 4;
-  i *= 0x27d4eb2d;
-  return i;
-}
-
-ccl_device_inline float cmj_randfloat(uint i, uint p)
-{
-  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
-}
-
-ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
-{
-  return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
-}
-
-ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension)
-{
-  /* Perform Owen shuffle of the sample number to reorder the samples. */
-#ifdef _SIMPLE_HASH_
-  const uint rv = cmj_hash_simple(dimension, rng_hash);
-#else /* Use a _REGULAR_HASH_. */
-  const uint rv = cmj_hash(dimension, rng_hash);
-#endif
-#ifdef _XOR_SHUFFLE_
-#  warning "Using XOR shuffle."
-  const uint s = sample ^ rv;
-#else /* Use _OWEN_SHUFFLE_ for reordering. */
-  const uint s = nested_uniform_scramble(sample, rv);
-#endif
-
-  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
-  const uint sample_set = s / NUM_PMJ_SAMPLES;
-  const uint d = (dimension + sample_set);
-  const uint dim = d % NUM_PMJ_PATTERNS;
-
-  /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
-   *  the x part is used for even dims and the y for odd. */
-  int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1);
-
-  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
-
-#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
-  /* Use Cranley-Patterson rotation to displace the sample pattern. */
-#  ifdef _SIMPLE_HASH_
-  float dx = cmj_randfloat_simple(d, rng_hash);
-#  else
-  float dx = cmj_randfloat(d, rng_hash);
-#  endif
-  /* Jitter sample locations and map back into [0 1]. */
-  fx = fx + dx;
-  fx = fx - floorf(fx);
-#else
-#  warning "Not using Cranley-Patterson Rotation."
-#endif
-
-  return fx;
-}
-
-ccl_device void pmj_sample_2D(KernelGlobals kg,
-                              uint sample,
-                              uint rng_hash,
-                              uint dimension,
-                              ccl_private float *x,
-                              ccl_private float *y)
-{
-  /* Perform a shuffle on the sample number to reorder the samples. */
-#ifdef _SIMPLE_HASH_
-  const uint rv = cmj_hash_simple(dimension, rng_hash);
-#else /* Use a _REGULAR_HASH_. */
-  const uint rv = cmj_hash(dimension, rng_hash);
-#endif
-#ifdef _XOR_SHUFFLE_
-#  warning "Using XOR shuffle."
-  const uint s = sample ^ rv;
-#else /* Use _OWEN_SHUFFLE_ for reordering. */
-  const uint s = nested_uniform_scramble(sample, rv);
-#endif
-
-  /* Based on the sample number a sample pattern is selected and offset by the dimension. */
-  const uint sample_set = s / NUM_PMJ_SAMPLES;
-  const uint d = (dimension + sample_set);
-  uint dim = d % NUM_PMJ_PATTERNS;
-  int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
-
-  float fx = kernel_tex_fetch(__sample_pattern_lut, index);
-  float fy = kernel_tex_fetch(__sample_pattern_lut, index + 1);
-
-#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
-  /* Use Cranley-Patterson rotation to displace the sample pattern. */
-#  ifdef _SIMPLE_HASH_
-  float dx = cmj_randfloat_simple(d, rng_hash);
-  float dy = cmj_randfloat_simple(d + 1, rng_hash);
-#  else
-  float dx = cmj_randfloat(d, rng_hash);
-  float dy = cmj_randfloat(d + 1, rng_hash);
-#  endif
-  /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */
-  float sx = fx + dx;
-  float sy = fy + dy;
-  sx = sx - floorf(sx);
-  sy = sy - floorf(sy);
-#else
-#  warning "Not using Cranley Patterson Rotation."
-#endif
-
-  (*x) = sx;
-  (*y) = sy;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/sample_mapping.h b/intern/cycles/kernel/sample/sample_mapping.h
deleted file mode 100644
index 3297aa2a29a..00000000000
--- a/intern/cycles/kernel/sample/sample_mapping.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Parts adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* distribute uniform xy on [0,1] over unit disk [-1,1] */
-ccl_device void to_unit_disk(ccl_private float *x, ccl_private float *y)
-{
-  float phi = M_2PI_F * (*x);
-  float r = sqrtf(*y);
-
-  *x = r * cosf(phi);
-  *y = r * sinf(phi);
-}
-
-/* return an orthogonal tangent and bitangent given a normal and tangent that
- * may not be exactly orthogonal */
-ccl_device void make_orthonormals_tangent(const float3 N,
-                                          const float3 T,
-                                          ccl_private float3 *a,
-                                          ccl_private float3 *b)
-{
-  *b = normalize(cross(N, T));
-  *a = cross(*b, N);
-}
-
-/* sample direction with cosine weighted distributed in hemisphere */
-ccl_device_inline void sample_cos_hemisphere(
-    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
-{
-  to_unit_disk(&randu, &randv);
-  float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f));
-  float3 T, B;
-  make_orthonormals(N, &T, &B);
-  *omega_in = randu * T + randv * B + costheta * N;
-  *pdf = costheta * M_1_PI_F;
-}
-
-/* sample direction uniformly distributed in hemisphere */
-ccl_device_inline void sample_uniform_hemisphere(
-    const float3 N, float randu, float randv, ccl_private float3 *omega_in, ccl_private float *pdf)
-{
-  float z = randu;
-  float r = sqrtf(max(0.0f, 1.0f - z * z));
-  float phi = M_2PI_F * randv;
-  float x = r * cosf(phi);
-  float y = r * sinf(phi);
-
-  float3 T, B;
-  make_orthonormals(N, &T, &B);
-  *omega_in = x * T + y * B + z * N;
-  *pdf = 0.5f * M_1_PI_F;
-}
-
-/* sample direction uniformly distributed in cone */
-ccl_device_inline void sample_uniform_cone(const float3 N,
-                                           float angle,
-                                           float randu,
-                                           float randv,
-                                           ccl_private float3 *omega_in,
-                                           ccl_private float *pdf)
-{
-  float zMin = cosf(angle);
-  float z = zMin - zMin * randu + randu;
-  float r = safe_sqrtf(1.0f - sqr(z));
-  float phi = M_2PI_F * randv;
-  float x = r * cosf(phi);
-  float y = r * sinf(phi);
-
-  float3 T, B;
-  make_orthonormals(N, &T, &B);
-  *omega_in = x * T + y * B + z * N;
-  *pdf = M_1_2PI_F / (1.0f - zMin);
-}
-
-ccl_device_inline float pdf_uniform_cone(const float3 N, float3 D, float angle)
-{
-  float zMin = cosf(angle);
-  float z = dot(N, D);
-  if (z > zMin) {
-    return M_1_2PI_F / (1.0f - zMin);
-  }
-  return 0.0f;
-}
-
-/* sample uniform point on the surface of a sphere */
-ccl_device float3 sample_uniform_sphere(float u1, float u2)
-{
-  float z = 1.0f - 2.0f * u1;
-  float r = sqrtf(fmaxf(0.0f, 1.0f - z * z));
-  float phi = M_2PI_F * u2;
-  float x = r * cosf(phi);
-  float y = r * sinf(phi);
-
-  return make_float3(x, y, z);
-}
-
-/* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping
- * to better preserve stratification for some RNG sequences */
-ccl_device float2 concentric_sample_disk(float u1, float u2)
-{
-  float phi, r;
-  float a = 2.0f * u1 - 1.0f;
-  float b = 2.0f * u2 - 1.0f;
-
-  if (a == 0.0f && b == 0.0f) {
-    return zero_float2();
-  }
-  else if (a * a > b * b) {
-    r = a;
-    phi = M_PI_4_F * (b / a);
-  }
-  else {
-    r = b;
-    phi = M_PI_2_F - M_PI_4_F * (a / b);
-  }
-
-  return make_float2(r * cosf(phi), r * sinf(phi));
-}
-
-/* sample point in unit polygon with given number of corners and rotation */
-ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, float v)
-{
-  /* sample corner number and reuse u */
-  float corner = floorf(u * corners);
-  u = u * corners - corner;
-
-  /* uniform sampled triangle weights */
-  u = sqrtf(u);
-  v = v * u;
-  u = 1.0f - u;
-
-  /* point in triangle */
-  float angle = M_PI_F / corners;
-  float2 p = make_float2((u + v) * cosf(angle), (u - v) * sinf(angle));
-
-  /* rotate */
-  rotation += corner * 2.0f * angle;
-
-  float cr = cosf(rotation);
-  float sr = sinf(rotation);
-
-  return make_float2(cr * p.x - sr * p.y, sr * p.x + cr * p.y);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/sample_pattern.h b/intern/cycles/kernel/sample/sample_pattern.h
deleted file mode 100644
index 95635c2c855..00000000000
--- a/intern/cycles/kernel/sample/sample_pattern.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "kernel/sample/sample_jitter.h"
-#include "util/util_hash.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Pseudo random numbers, uncomment this for debugging correlations. Only run
- * this single threaded on a CPU for repeatable results. */
-//#define __DEBUG_CORRELATION__
-
-/* High Dimensional Sobol.
- *
- * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
- * to classic Van der Corput and Sobol sequences. */
-
-#ifdef __SOBOL__
-
-/* Skip initial numbers that for some dimensions have clear patterns that
- * don't cover the entire sample space. Ideally we would have a better
- * progressive pattern that doesn't suffer from this problem, because even
- * with this offset some dimensions are quite poor.
- */
-#  define SOBOL_SKIP 64
-
-ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension)
-{
-  uint result = 0;
-  uint i = index + SOBOL_SKIP;
-  for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
-    j += x;
-    result ^= __float_as_uint(kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j - 1));
-  }
-  return result;
-}
-
-#endif /* __SOBOL__ */
-
-ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
-                                         uint rng_hash,
-                                         int sample,
-                                         int dimension)
-{
-#ifdef __DEBUG_CORRELATION__
-  return (float)drand48();
-#endif
-
-#ifdef __SOBOL__
-  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
-#endif
-  {
-    return pmj_sample_1D(kg, sample, rng_hash, dimension);
-  }
-
-#ifdef __SOBOL__
-  /* Sobol sequence value using direction vectors. */
-  uint result = sobol_dimension(kg, sample, dimension);
-  float r = (float)result * (1.0f / (float)0xFFFFFFFF);
-
-  /* Cranly-Patterson rotation using rng seed */
-  float shift;
-
-  /* Hash rng with dimension to solve correlation issues.
-   * See T38710, T50116.
-   */
-  uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
-  shift = tmp_rng * (1.0f / (float)0xFFFFFFFF);
-
-  return r + shift - floorf(r + shift);
-#endif
-}
-
-ccl_device_forceinline void path_rng_2D(KernelGlobals kg,
-                                        uint rng_hash,
-                                        int sample,
-                                        int dimension,
-                                        ccl_private float *fx,
-                                        ccl_private float *fy)
-{
-#ifdef __DEBUG_CORRELATION__
-  *fx = (float)drand48();
-  *fy = (float)drand48();
-  return;
-#endif
-
-#ifdef __SOBOL__
-  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
-#endif
-  {
-    pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
-
-    return;
-  }
-
-#ifdef __SOBOL__
-  /* Sobol. */
-  *fx = path_rng_1D(kg, rng_hash, sample, dimension);
-  *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1);
-#endif
-}
-
-/**
- * 1D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
- * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
- * http://www.jcgt.org/published/0009/03/02/paper.pdf
- */
-ccl_device_inline uint hash_iqint1(uint n)
-{
-  n = (n << 13U) ^ n;
-  n = n * (n * n * 15731U + 789221U) + 1376312589U;
-
-  return n;
-}
-
-/**
- * 2D hash recommended from "Hash Functions for GPU Rendering" JCGT Vol. 9, No. 3, 2020
- * See https://www.shadertoy.com/view/4tXyWN and https://www.shadertoy.com/view/XlGcRh
- * http://www.jcgt.org/published/0009/03/02/paper.pdf
- */
-ccl_device_inline uint hash_iqnt2d(const uint x, const uint y)
-{
-  const uint qx = 1103515245U * ((x >> 1U) ^ (y));
-  const uint qy = 1103515245U * ((y >> 1U) ^ (x));
-  const uint n = 1103515245U * ((qx) ^ (qy >> 3U));
-
-  return n;
-}
-
-ccl_device_inline uint path_rng_hash_init(KernelGlobals kg,
-                                          const int sample,
-                                          const int x,
-                                          const int y)
-{
-  const uint rng_hash = hash_iqnt2d(x, y) ^ kernel_data.integrator.seed;
-
-#ifdef __DEBUG_CORRELATION__
-  srand48(rng_hash + sample);
-#else
-  (void)sample;
-#endif
-
-  return rng_hash;
-}
-
-ccl_device_inline bool sample_is_even(int pattern, int sample)
-{
-  if (pattern == SAMPLING_PATTERN_PMJ) {
-    /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
-     * We can use this to get divide sample sequence into two classes for easier variance
-     * estimation. */
-#if defined(__GNUC__) && !defined(__KERNEL_GPU__)
-    return __builtin_popcount(sample & 0xaaaaaaaa) & 1;
-#elif defined(__NVCC__)
-    return __popc(sample & 0xaaaaaaaa) & 1;
-#else
-    /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */
-    int i = sample & 0xaaaaaaaa;
-    i = i - ((i >> 1) & 0x55555555);
-    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
-    i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
-    return i & 1;
-#endif
-  }
-  else {
-    /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
-    return sample & 0x1;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h
new file mode 100644
index 00000000000..678f49c8ccd
--- /dev/null
+++ b/intern/cycles/kernel/svm/ao.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/bvh/bvh.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __SHADER_RAYTRACE__
+
+#  ifdef __KERNEL_OPTIX__
+extern "C" __device__ float __direct_callable__svm_node_ao(
+#  else
+ccl_device float svm_ao(
+#  endif
+    KernelGlobals kg,
+    ConstIntegratorState state,
+    ccl_private ShaderData *sd,
+    float3 N,
+    float max_dist,
+    int num_samples,
+    int flags)
+{
+  if (flags & NODE_AO_GLOBAL_RADIUS) {
+    max_dist = kernel_data.integrator.ao_bounces_distance;
+  }
+
+  /* Early out if no sampling needed. */
+  if (max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
+    return 1.0f;
+  }
+
+  /* Can't raytrace from shaders like displacement, before BVH exists. */
+  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+    return 1.0f;
+  }
+
+  if (flags & NODE_AO_INSIDE) {
+    N = -N;
+  }
+
+  float3 T, B;
+  make_orthonormals(N, &T, &B);
+
+  /* TODO: support ray-tracing in shadow shader evaluation? */
+  RNGState rng_state;
+  path_state_rng_load(state, &rng_state);
+
+  int unoccluded = 0;
+  for (int sample = 0; sample < num_samples; sample++) {
+    float disk_u, disk_v;
+    path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+
+    float2 d = concentric_sample_disk(disk_u, disk_v);
+    float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
+
+    /* Create ray. */
+    Ray ray;
+    ray.P = ray_offset(sd->P, N);
+    ray.D = D.x * T + D.y * B + D.z * N;
+    ray.t = max_dist;
+    ray.time = sd->time;
+    ray.dP = differential_zero_compact();
+    ray.dD = differential_zero_compact();
+
+    if (flags & NODE_AO_ONLY_LOCAL) {
+      if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) {
+        unoccluded++;
+      }
+    }
+    else {
+      Intersection isect;
+      if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
+        unoccluded++;
+      }
+    }
+  }
+
+  return ((float)unoccluded) / num_samples;
+}
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+#  if defined(__KERNEL_OPTIX__)
+ccl_device_inline
+#  else
+ccl_device_noinline
+#  endif
+    void
+    svm_node_ao(KernelGlobals kg,
+                ConstIntegratorGenericState state,
+                ccl_private ShaderData *sd,
+                ccl_private float *stack,
+                uint4 node)
+{
+  uint flags, dist_offset, normal_offset, out_ao_offset;
+  svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
+
+  uint color_offset, out_color_offset, samples;
+  svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples);
+
+  float ao = 1.0f;
+
+  IF_KERNEL_NODES_FEATURE(RAYTRACE)
+  {
+    float dist = stack_load_float_default(stack, dist_offset, node.w);
+    float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+
+#  ifdef __KERNEL_OPTIX__
+    ao = optixDirectCall<float>(0, kg, state, sd, normal, dist, samples, flags);
+#  else
+    ao = svm_ao(kg, state, sd, normal, dist, samples, flags);
+#  endif
+  }
+
+  if (stack_valid(out_ao_offset)) {
+    stack_store_float(stack, out_ao_offset, ao);
+  }
+
+  if (stack_valid(out_color_offset)) {
+    float3 color = stack_load_float3(stack, color_offset);
+    stack_store_float3(stack, out_color_offset, ao * color);
+  }
+}
+
+#endif /* __SHADER_RAYTRACE__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/aov.h b/intern/cycles/kernel/svm/aov.h
new file mode 100644
index 00000000000..21ee7af7639
--- /dev/null
+++ b/intern/cycles/kernel/svm/aov.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/film/write_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag,
+                                          ccl_global float *render_buffer)
+{
+  bool is_primary = (path_flag & PATH_RAY_CAMERA) && (!(path_flag & PATH_RAY_SINGLE_PASS_DONE));
+
+  return ((render_buffer != NULL) && is_primary);
+}
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+ccl_device void svm_node_aov_color(KernelGlobals kg,
+                                   ConstIntegratorGenericState state,
+                                   ccl_private ShaderData *sd,
+                                   ccl_private float *stack,
+                                   uint4 node,
+                                   ccl_global float *render_buffer)
+{
+  IF_KERNEL_NODES_FEATURE(AOV)
+  {
+    const float3 val = stack_load_float3(stack, node.y);
+    const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+    const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                          kernel_data.film.pass_stride;
+    ccl_global float *buffer = render_buffer + render_buffer_offset +
+                               (kernel_data.film.pass_aov_color + node.z);
+    kernel_write_pass_float3(buffer, make_float3(val.x, val.y, val.z));
+  }
+}
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+ccl_device void svm_node_aov_value(KernelGlobals kg,
+                                   ConstIntegratorGenericState state,
+                                   ccl_private ShaderData *sd,
+                                   ccl_private float *stack,
+                                   uint4 node,
+                                   ccl_global float *render_buffer)
+{
+  IF_KERNEL_NODES_FEATURE(AOV)
+  {
+    const float val = stack_load_float(stack, node.y);
+    const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+    const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+                                          kernel_data.film.pass_stride;
+    ccl_global float *buffer = render_buffer + render_buffer_offset +
+                               (kernel_data.film.pass_aov_value + node.z);
+    kernel_write_pass_float(buffer, val);
+  }
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/attribute.h b/intern/cycles/kernel/svm/attribute.h
new file mode 100644
index 00000000000..e9de0164c7a
--- /dev/null
+++ b/intern/cycles/kernel/svm/attribute.h
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Attribute Node */
+
+ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals kg,
+                                                  ccl_private ShaderData *sd,
+                                                  uint4 node,
+                                                  ccl_private NodeAttributeOutputType *type,
+                                                  ccl_private uint *out_offset)
+{
+  *out_offset = node.z;
+  *type = (NodeAttributeOutputType)node.w;
+
+  AttributeDescriptor desc;
+
+  if (sd->object != OBJECT_NONE) {
+    desc = find_attribute(kg, sd, node.y);
+    if (desc.offset == ATTR_STD_NOT_FOUND) {
+      desc = attribute_not_found();
+      desc.offset = 0;
+      desc.type = (NodeAttributeType)node.w;
+    }
+  }
+  else {
+    /* background */
+    desc = attribute_not_found();
+    desc.offset = 0;
+    desc.type = (NodeAttributeType)node.w;
+  }
+
+  return desc;
+}
+
+template<uint node_feature_mask>
+ccl_device_noinline void svm_node_attr(KernelGlobals kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
+                                       uint4 node)
+{
+  NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
+  uint out_offset = 0;
+  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+
+#ifdef __VOLUME__
+  IF_KERNEL_NODES_FEATURE(VOLUME)
+  {
+    /* Volumes
+     * NOTE: moving this into its own node type might help improve performance. */
+    if (primitive_is_volume_attribute(sd, desc)) {
+      const float4 value = volume_attribute_float4(kg, sd, desc);
+
+      if (type == NODE_ATTR_OUTPUT_FLOAT) {
+        const float f = volume_attribute_value_to_float(value);
+        stack_store_float(stack, out_offset, f);
+      }
+      else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+        const float3 f = volume_attribute_value_to_float3(value);
+        stack_store_float3(stack, out_offset, f);
+      }
+      else {
+        const float f = volume_attribute_value_to_alpha(value);
+        stack_store_float(stack, out_offset, f);
+      }
+      return;
+    }
+  }
+#endif
+
+  if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
+    /* No generated attribute, fall back to object coordinates. */
+    float3 f = sd->P;
+    object_inverse_position_transform(kg, sd, &f);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(f));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, f);
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+    return;
+  }
+
+  /* Surface. */
+  if (desc.type == NODE_ATTR_FLOAT) {
+    float f = primitive_surface_attribute_float(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, f);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(f, f, f));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT2) {
+    float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, f.x);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
+    float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(float4_to_float3(f)));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, float4_to_float3(f));
+    }
+    else {
+      stack_store_float(stack, out_offset, f.w);
+    }
+  }
+  else {
+    float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(f));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, f);
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+}
+
+ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
+{
+  NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
+  uint out_offset = 0;
+  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+
+#ifdef __VOLUME__
+  /* Volume */
+  if (primitive_is_volume_attribute(sd, desc)) {
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, 0.0f);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+    return;
+  }
+#endif
+
+  if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
+    /* No generated attribute, fall back to object coordinates. */
+    float3 f = sd->P + sd->dP.dx;
+    object_inverse_position_transform(kg, sd, &f);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(f));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, f);
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+    return;
+  }
+
+  /* Surface */
+  if (desc.type == NODE_ATTR_FLOAT) {
+    float dx;
+    float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, f + dx);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(f + dx, f + dx, f + dx));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT2) {
+    float2 dx;
+    float2 f = primitive_surface_attribute_float2(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, f.x + dx.x);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
+    float4 dx;
+    float4 f = primitive_surface_attribute_float4(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(float4_to_float3(f + dx)));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, float4_to_float3(f + dx));
+    }
+    else {
+      stack_store_float(stack, out_offset, f.w + dx.w);
+    }
+  }
+  else {
+    float3 dx;
+    float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(f + dx));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, f + dx);
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+}
+
+ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
+{
+  NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
+  uint out_offset = 0;
+  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
+
+#ifdef __VOLUME__
+  /* Volume */
+  if (primitive_is_volume_attribute(sd, desc)) {
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, 0.0f);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+    return;
+  }
+#endif
+
+  if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
+    /* No generated attribute, fall back to object coordinates. */
+    float3 f = sd->P + sd->dP.dy;
+    object_inverse_position_transform(kg, sd, &f);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(f));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, f);
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+    return;
+  }
+
+  /* Surface */
+  if (desc.type == NODE_ATTR_FLOAT) {
+    float dy;
+    float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, f + dy);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(f + dy, f + dy, f + dy));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT2) {
+    float2 dy;
+    float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, f.x + dy.x);
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f));
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+  else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
+    float4 dy;
+    float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(float4_to_float3(f + dy)));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, float4_to_float3(f + dy));
+    }
+    else {
+      stack_store_float(stack, out_offset, f.w + dy.w);
+    }
+  }
+  else {
+    float3 dy;
+    float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
+    if (type == NODE_ATTR_OUTPUT_FLOAT) {
+      stack_store_float(stack, out_offset, average(f + dy));
+    }
+    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
+      stack_store_float3(stack, out_offset, f + dy);
+    }
+    else {
+      stack_store_float(stack, out_offset, 1.0f);
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h
new file mode 100644
index 00000000000..37c7caf1372
--- /dev/null
+++ b/intern/cycles/kernel/svm/bevel.h
@@ -0,0 +1,327 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/bvh/bvh.h"
+#include "kernel/sample/mapping.h"
+#include "kernel/sample/pattern.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __SHADER_RAYTRACE__
+
+/* Planar Cubic BSSRDF falloff, reused for bevel.
+ *
+ * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
+ * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
+ * far as I can tell has no closed form solution. So we get an iterative solution
+ * instead with newton-raphson. */
+
+ccl_device float svm_bevel_cubic_eval(const float radius, float r)
+{
+  const float Rm = radius;
+
+  if (r >= Rm)
+    return 0.0f;
+
+  /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
+  const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
+  const float f = Rm - r;
+  const float num = f * f * f;
+
+  return (10.0f * num) / (Rm5 * M_PI_F);
+}
+
+ccl_device float svm_bevel_cubic_pdf(const float radius, float r)
+{
+  return svm_bevel_cubic_eval(radius, r);
+}
+
+/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
+ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
+{
+  /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
+   * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
+   * should not be too bad */
+  const float tolerance = 1e-6f;
+  const int max_iteration_count = 10;
+  float x = 0.25f;
+  int i;
+
+  for (i = 0; i < max_iteration_count; i++) {
+    float x2 = x * x;
+    float x3 = x2 * x;
+    float nx = (1.0f - x);
+
+    float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
+    float f_ = 20.0f * (x * nx) * (nx * nx);
+
+    if (fabsf(f) < tolerance || f_ == 0.0f)
+      break;
+
+    x = saturate(x - f / f_);
+  }
+
+  return x;
+}
+
+ccl_device void svm_bevel_cubic_sample(const float radius,
+                                       float xi,
+                                       ccl_private float *r,
+                                       ccl_private float *h)
+{
+  float Rm = radius;
+  float r_ = svm_bevel_cubic_quintic_root_find(xi);
+
+  r_ *= Rm;
+  *r = r_;
+
+  /* h^2 + r^2 = Rm^2 */
+  *h = safe_sqrtf(Rm * Rm - r_ * r_);
+}
+
+/* Bevel shader averaging normals from nearby surfaces.
+ *
+ * Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013
+ * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
+ */
+
+#  ifdef __KERNEL_OPTIX__
+extern "C" __device__ float3 __direct_callable__svm_node_bevel(
+#  else
+ccl_device float3 svm_bevel(
+#  endif
+    KernelGlobals kg,
+    ConstIntegratorState state,
+    ccl_private ShaderData *sd,
+    float radius,
+    int num_samples)
+{
+  /* Early out if no sampling needed. */
+  if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
+    return sd->N;
+  }
+
+  /* Can't raytrace from shaders like displacement, before BVH exists. */
+  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
+    return sd->N;
+  }
+
+  /* Don't bevel for blurry indirect rays. */
+  if (INTEGRATOR_STATE(state, path, min_ray_pdf) < 8.0f) {
+    return sd->N;
+  }
+
+  /* Setup for multi intersection. */
+  LocalIntersection isect;
+  uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
+                                  INTEGRATOR_STATE(state, path, rng_offset),
+                                  INTEGRATOR_STATE(state, path, sample),
+                                  0x64c6a40e);
+
+  /* Sample normals from surrounding points on surface. */
+  float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
+
+  /* TODO: support ray-tracing in shadow shader evaluation? */
+  RNGState rng_state;
+  path_state_rng_load(state, &rng_state);
+
+  for (int sample = 0; sample < num_samples; sample++) {
+    float disk_u, disk_v;
+    path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+
+    /* Pick random axis in local frame and point on disk. */
+    float3 disk_N, disk_T, disk_B;
+    float pick_pdf_N, pick_pdf_T, pick_pdf_B;
+
+    disk_N = sd->Ng;
+    make_orthonormals(disk_N, &disk_T, &disk_B);
+
+    float axisu = disk_u;
+
+    if (axisu < 0.5f) {
+      pick_pdf_N = 0.5f;
+      pick_pdf_T = 0.25f;
+      pick_pdf_B = 0.25f;
+      disk_u *= 2.0f;
+    }
+    else if (axisu < 0.75f) {
+      float3 tmp = disk_N;
+      disk_N = disk_T;
+      disk_T = tmp;
+      pick_pdf_N = 0.25f;
+      pick_pdf_T = 0.5f;
+      pick_pdf_B = 0.25f;
+      disk_u = (disk_u - 0.5f) * 4.0f;
+    }
+    else {
+      float3 tmp = disk_N;
+      disk_N = disk_B;
+      disk_B = tmp;
+      pick_pdf_N = 0.25f;
+      pick_pdf_T = 0.25f;
+      pick_pdf_B = 0.5f;
+      disk_u = (disk_u - 0.75f) * 4.0f;
+    }
+
+    /* Sample point on disk. */
+    float phi = M_2PI_F * disk_u;
+    float disk_r = disk_v;
+    float disk_height;
+
+    /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
+    svm_bevel_cubic_sample(radius, disk_r, &disk_r, &disk_height);
+
+    float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
+
+    /* Create ray. */
+    Ray ray ccl_optional_struct_init;
+    ray.P = sd->P + disk_N * disk_height + disk_P;
+    ray.D = -disk_N;
+    ray.t = 2.0f * disk_height;
+    ray.dP = differential_zero_compact();
+    ray.dD = differential_zero_compact();
+    ray.time = sd->time;
+
+    /* Intersect with the same object. if multiple intersections are found it
+     * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
+    scene_intersect_local(kg, &ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
+
+    int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
+
+    for (int hit = 0; hit < num_eval_hits; hit++) {
+      /* Quickly retrieve P and Ng without setting up ShaderData. */
+      float3 hit_P;
+      if (sd->type & PRIMITIVE_TRIANGLE) {
+        hit_P = triangle_refine_local(
+            kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim);
+      }
+#  ifdef __OBJECT_MOTION__
+      else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+        float3 verts[3];
+        motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts);
+        hit_P = motion_triangle_refine_local(
+            kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim, verts);
+      }
+#  endif /* __OBJECT_MOTION__ */
+
+      /* Get geometric normal. */
+      float3 hit_Ng = isect.Ng[hit];
+      int object = isect.hits[hit].object;
+      int object_flag = kernel_tex_fetch(__object_flag, object);
+      if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
+        hit_Ng = -hit_Ng;
+      }
+
+      /* Compute smooth normal. */
+      float3 N = hit_Ng;
+      int prim = isect.hits[hit].prim;
+      int shader = kernel_tex_fetch(__tri_shader, prim);
+
+      if (shader & SHADER_SMOOTH_NORMAL) {
+        float u = isect.hits[hit].u;
+        float v = isect.hits[hit].v;
+
+        if (sd->type & PRIMITIVE_TRIANGLE) {
+          N = triangle_smooth_normal(kg, N, prim, u, v);
+        }
+#  ifdef __OBJECT_MOTION__
+        else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
+          N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
+        }
+#  endif /* __OBJECT_MOTION__ */
+      }
+
+      /* Transform normals to world space. */
+      if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+        object_normal_transform(kg, sd, &N);
+        object_normal_transform(kg, sd, &hit_Ng);
+      }
+
+      /* Probability densities for local frame axes. */
+      float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
+      float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
+      float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
+
+      /* Multiple importance sample between 3 axes, power heuristic
+       * found to be slightly better than balance heuristic. pdf_N
+       * in the MIS weight and denominator canceled out. */
+      float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
+      if (isect.num_hits > LOCAL_MAX_HITS) {
+        w *= isect.num_hits / (float)LOCAL_MAX_HITS;
+      }
+
+      /* Real distance to sampled point. */
+      float r = len(hit_P - sd->P);
+
+      /* Compute weight. */
+      float pdf = svm_bevel_cubic_pdf(radius, r);
+      float disk_pdf = svm_bevel_cubic_pdf(radius, disk_r);
+
+      w *= pdf / disk_pdf;
+
+      /* Sum normal and weight. */
+      sum_N += w * N;
+    }
+  }
+
+  /* Normalize. */
+  float3 N = safe_normalize(sum_N);
+  return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
+}
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+#  if defined(__KERNEL_OPTIX__)
+ccl_device_inline
+#  else
+ccl_device_noinline
+#  endif
+    void
+    svm_node_bevel(KernelGlobals kg,
+                   ConstIntegratorGenericState state,
+                   ccl_private ShaderData *sd,
+                   ccl_private float *stack,
+                   uint4 node)
+{
+  uint num_samples, radius_offset, normal_offset, out_offset;
+  svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
+
+  float3 bevel_N = sd->N;
+
+  IF_KERNEL_NODES_FEATURE(RAYTRACE)
+  {
+    float radius = stack_load_float(stack, radius_offset);
+
+#  ifdef __KERNEL_OPTIX__
+    bevel_N = optixDirectCall<float3>(1, kg, state, sd, radius, num_samples);
+#  else
+    bevel_N = svm_bevel(kg, state, sd, radius, num_samples);
+#  endif
+
+    if (stack_valid(normal_offset)) {
+      /* Preserve input normal. */
+      float3 ref_N = stack_load_float3(stack, normal_offset);
+      bevel_N = normalize(ref_N + (bevel_N - sd->N));
+    }
+  }
+
+  stack_store_float3(stack, out_offset, bevel_N);
+}
+
+#endif /* __SHADER_RAYTRACE__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/blackbody.h b/intern/cycles/kernel/svm/blackbody.h
new file mode 100644
index 00000000000..da15550f918
--- /dev/null
+++ b/intern/cycles/kernel/svm/blackbody.h
@@ -0,0 +1,55 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2013, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "kernel/svm/math_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Blackbody Node */
+
+ccl_device_noinline void svm_node_blackbody(KernelGlobals kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
+                                            uint temperature_offset,
+                                            uint col_offset)
+{
+  /* Input */
+  float temperature = stack_load_float(stack, temperature_offset);
+
+  float3 color_rgb = svm_math_blackbody_color(temperature);
+
+  stack_store_float3(stack, col_offset, color_rgb);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/brick.h b/intern/cycles/kernel/svm/brick.h
new file mode 100644
index 00000000000..3c8729fa027
--- /dev/null
+++ b/intern/cycles/kernel/svm/brick.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Brick */
+
+ccl_device_inline float brick_noise(uint n) /* fast integer noise */
+{
+  uint nn;
+  n = (n + 1013) & 0x7fffffff;
+  n = (n >> 13) ^ n;
+  nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
+  return 0.5f * ((float)nn / 1073741824.0f);
+}
+
+ccl_device_noinline_cpu float2 svm_brick(float3 p,
+                                         float mortar_size,
+                                         float mortar_smooth,
+                                         float bias,
+                                         float brick_width,
+                                         float row_height,
+                                         float offset_amount,
+                                         int offset_frequency,
+                                         float squash_amount,
+                                         int squash_frequency)
+{
+  int bricknum, rownum;
+  float offset = 0.0f;
+  float x, y;
+
+  rownum = floor_to_int(p.y / row_height);
+
+  if (offset_frequency && squash_frequency) {
+    brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount;           /* squash */
+    offset = (rownum % offset_frequency) ? 0.0f : (brick_width * offset_amount); /* offset */
+  }
+
+  bricknum = floor_to_int((p.x + offset) / brick_width);
+
+  x = (p.x + offset) - brick_width * bricknum;
+  y = p.y - row_height * rownum;
+
+  float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
+  float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
+
+  float mortar;
+  if (min_dist >= mortar_size) {
+    mortar = 0.0f;
+  }
+  else if (mortar_smooth == 0.0f) {
+    mortar = 1.0f;
+  }
+  else {
+    min_dist = 1.0f - min_dist / mortar_size;
+    mortar = (min_dist < mortar_smooth) ? smoothstepf(min_dist / mortar_smooth) : 1.0f;
+  }
+
+  return make_float2(tint, mortar);
+}
+
+ccl_device_noinline int svm_node_tex_brick(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint4 node2 = read_node(kg, &offset);
+  uint4 node3 = read_node(kg, &offset);
+  uint4 node4 = read_node(kg, &offset);
+
+  /* Input and Output Sockets */
+  uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
+  uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
+  uint color_offset, fac_offset, mortar_smooth_offset;
+
+  /* RNA properties */
+  uint offset_frequency, squash_frequency;
+
+  svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
+  svm_unpack_node_uchar4(
+      node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
+  svm_unpack_node_uchar4(
+      node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
+
+  svm_unpack_node_uchar2(node2.x, &offset_frequency, &squash_frequency);
+
+  float3 co = stack_load_float3(stack, co_offset);
+
+  float3 color1 = stack_load_float3(stack, color1_offset);
+  float3 color2 = stack_load_float3(stack, color2_offset);
+  float3 mortar = stack_load_float3(stack, mortar_offset);
+
+  float scale = stack_load_float_default(stack, scale_offset, node2.y);
+  float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
+  float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
+  float bias = stack_load_float_default(stack, bias_offset, node2.w);
+  float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
+  float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
+  float offset_amount = __int_as_float(node3.z);
+  float squash_amount = __int_as_float(node3.w);
+
+  float2 f2 = svm_brick(co * scale,
+                        mortar_size,
+                        mortar_smooth,
+                        bias,
+                        brick_width,
+                        row_height,
+                        offset_amount,
+                        offset_frequency,
+                        squash_amount,
+                        squash_frequency);
+
+  float tint = f2.x;
+  float f = f2.y;
+
+  if (f != 1.0f) {
+    float facm = 1.0f - tint;
+    color1 = facm * color1 + tint * color2;
+  }
+
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f);
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/brightness.h b/intern/cycles/kernel/svm/brightness.h
new file mode 100644
index 00000000000..5c82a4347cd
--- /dev/null
+++ b/intern/cycles/kernel/svm/brightness.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/svm/color_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_brightness(
+    ccl_private ShaderData *sd, ccl_private float *stack, uint in_color, uint out_color, uint node)
+{
+  uint bright_offset, contrast_offset;
+  float3 color = stack_load_float3(stack, in_color);
+
+  svm_unpack_node_uchar2(node, &bright_offset, &contrast_offset);
+  float brightness = stack_load_float(stack, bright_offset);
+  float contrast = stack_load_float(stack, contrast_offset);
+
+  color = svm_brightness_contrast(color, brightness, contrast);
+
+  if (stack_valid(out_color))
+    stack_store_float3(stack, out_color, color);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/bump.h b/intern/cycles/kernel/svm/bump.h
new file mode 100644
index 00000000000..2fae06fa54b
--- /dev/null
+++ b/intern/cycles/kernel/svm/bump.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Bump Eval Nodes */
+
+ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint offset)
+{
+  /* save state */
+  stack_store_float3(stack, offset + 0, sd->P);
+  stack_store_float3(stack, offset + 3, sd->dP.dx);
+  stack_store_float3(stack, offset + 6, sd->dP.dy);
+
+  /* set state as if undisplaced */
+  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
+
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    float3 P, dPdx, dPdy;
+    P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
+
+    object_position_transform(kg, sd, &P);
+    object_dir_transform(kg, sd, &dPdx);
+    object_dir_transform(kg, sd, &dPdy);
+
+    sd->P = P;
+    sd->dP.dx = dPdx;
+    sd->dP.dy = dPdy;
+  }
+}
+
+ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint offset)
+{
+  /* restore state */
+  sd->P = stack_load_float3(stack, offset + 0);
+  sd->dP.dx = stack_load_float3(stack, offset + 3);
+  sd->dP.dy = stack_load_float3(stack, offset + 6);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/camera.h b/intern/cycles/kernel/svm/camera.h
new file mode 100644
index 00000000000..c71c02e6b19
--- /dev/null
+++ b/intern/cycles/kernel/svm/camera.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_camera(KernelGlobals kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint out_vector,
+                                         uint out_zdepth,
+                                         uint out_distance)
+{
+  float distance;
+  float zdepth;
+  float3 vector;
+
+  Transform tfm = kernel_data.cam.worldtocamera;
+  vector = transform_point(&tfm, sd->P);
+  zdepth = vector.z;
+  distance = len(vector);
+
+  if (stack_valid(out_vector))
+    stack_store_float3(stack, out_vector, normalize(vector));
+
+  if (stack_valid(out_zdepth))
+    stack_store_float(stack, out_zdepth, zdepth);
+
+  if (stack_valid(out_distance))
+    stack_store_float(stack, out_distance, distance);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/checker.h b/intern/cycles/kernel/svm/checker.h
new file mode 100644
index 00000000000..a79b1651f44
--- /dev/null
+++ b/intern/cycles/kernel/svm/checker.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Checker */
+
+ccl_device float svm_checker(float3 p)
+{
+  /* avoid precision issues on unit coordinates */
+  p.x = (p.x + 0.000001f) * 0.999999f;
+  p.y = (p.y + 0.000001f) * 0.999999f;
+  p.z = (p.z + 0.000001f) * 0.999999f;
+
+  int xi = abs(float_to_int(floorf(p.x)));
+  int yi = abs(float_to_int(floorf(p.y)));
+  int zi = abs(float_to_int(floorf(p.z)));
+
+  return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
+}
+
+ccl_device_noinline void svm_node_tex_checker(KernelGlobals kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node)
+{
+  uint co_offset, color1_offset, color2_offset, scale_offset;
+  uint color_offset, fac_offset;
+
+  svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
+  svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float3 color1 = stack_load_float3(stack, color1_offset);
+  float3 color2 = stack_load_float3(stack, color2_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node.w);
+
+  float f = svm_checker(co * scale);
+
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, (f == 1.0f) ? color1 : color2);
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/clamp.h b/intern/cycles/kernel/svm/clamp.h
new file mode 100644
index 00000000000..c07c0206d29
--- /dev/null
+++ b/intern/cycles/kernel/svm/clamp.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Clamp Node */
+
+ccl_device_noinline int svm_node_clamp(KernelGlobals kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
+                                       uint value_stack_offset,
+                                       uint parameters_stack_offsets,
+                                       uint result_stack_offset,
+                                       int offset)
+{
+  uint min_stack_offset, max_stack_offset, type;
+  svm_unpack_node_uchar3(parameters_stack_offsets, &min_stack_offset, &max_stack_offset, &type);
+
+  uint4 defaults = read_node(kg, &offset);
+
+  float value = stack_load_float(stack, value_stack_offset);
+  float min = stack_load_float_default(stack, min_stack_offset, defaults.x);
+  float max = stack_load_float_default(stack, max_stack_offset, defaults.y);
+
+  if (type == NODE_CLAMP_RANGE && (min > max)) {
+    stack_store_float(stack, result_stack_offset, clamp(value, max, min));
+  }
+  else {
+    stack_store_float(stack, result_stack_offset, clamp(value, min, max));
+  }
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h
new file mode 100644
index 00000000000..1dcfe003f74
--- /dev/null
+++ b/intern/cycles/kernel/svm/closure.h
@@ -0,0 +1,1260 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Closure Nodes */
+
+ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd,
+                                     ccl_private MicrofacetBsdf *bsdf,
+                                     int type,
+                                     float eta,
+                                     float roughness,
+                                     bool refract)
+{
+  if (type == CLOSURE_BSDF_SHARP_GLASS_ID) {
+    if (refract) {
+      bsdf->alpha_y = 0.0f;
+      bsdf->alpha_x = 0.0f;
+      bsdf->ior = eta;
+      sd->flag |= bsdf_refraction_setup(bsdf);
+    }
+    else {
+      bsdf->alpha_y = 0.0f;
+      bsdf->alpha_x = 0.0f;
+      bsdf->ior = 0.0f;
+      sd->flag |= bsdf_reflection_setup(bsdf);
+    }
+  }
+  else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
+    bsdf->alpha_x = roughness;
+    bsdf->alpha_y = roughness;
+    bsdf->ior = eta;
+
+    if (refract)
+      sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+    else
+      sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+  }
+  else {
+    bsdf->alpha_x = roughness;
+    bsdf->alpha_y = roughness;
+    bsdf->ior = eta;
+
+    if (refract)
+      sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+    else
+      sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+  }
+}
+
+ccl_device_inline int svm_node_closure_bsdf_skip(KernelGlobals kg, int offset, uint type)
+{
+  if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
+    /* Read all principled BSDF extra data to get the right offset. */
+    read_node(kg, &offset);
+    read_node(kg, &offset);
+    read_node(kg, &offset);
+    read_node(kg, &offset);
+  }
+
+  return offset;
+}
+
+template<uint node_feature_mask, ShaderType shader_type>
+ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node,
+                                              uint32_t path_flag,
+                                              int offset)
+{
+  uint type, param1_offset, param2_offset;
+
+  uint mix_weight_offset;
+  svm_unpack_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
+  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+                                                       1.0f);
+
+  /* note we read this extra node before weight check, so offset is added */
+  uint4 data_node = read_node(kg, &offset);
+
+  /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
+  IF_KERNEL_NODES_FEATURE(BSDF)
+  {
+    if ((shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) {
+      return svm_node_closure_bsdf_skip(kg, offset, type);
+    }
+  }
+  else
+  {
+    return svm_node_closure_bsdf_skip(kg, offset, type);
+  }
+
+  float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
+  if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+    N = ensure_valid_reflection(sd->Ng, sd->I, N);
+  }
+
+  float param1 = (stack_valid(param1_offset)) ? stack_load_float(stack, param1_offset) :
+                                                __uint_as_float(node.z);
+  float param2 = (stack_valid(param2_offset)) ? stack_load_float(stack, param2_offset) :
+                                                __uint_as_float(node.w);
+
+  switch (type) {
+#ifdef __PRINCIPLED__
+    case CLOSURE_BSDF_PRINCIPLED_ID: {
+      uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset,
+          sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset,
+          eta_offset, transmission_offset, anisotropic_rotation_offset,
+          transmission_roughness_offset;
+      uint4 data_node2 = read_node(kg, &offset);
+
+      float3 T = stack_load_float3(stack, data_node.y);
+      svm_unpack_node_uchar4(data_node.z,
+                             &specular_offset,
+                             &roughness_offset,
+                             &specular_tint_offset,
+                             &anisotropic_offset);
+      svm_unpack_node_uchar4(data_node.w,
+                             &sheen_offset,
+                             &sheen_tint_offset,
+                             &clearcoat_offset,
+                             &clearcoat_roughness_offset);
+      svm_unpack_node_uchar4(data_node2.x,
+                             &eta_offset,
+                             &transmission_offset,
+                             &anisotropic_rotation_offset,
+                             &transmission_roughness_offset);
+
+      // get Disney principled parameters
+      float metallic = param1;
+      float subsurface = param2;
+      float specular = stack_load_float(stack, specular_offset);
+      float roughness = stack_load_float(stack, roughness_offset);
+      float specular_tint = stack_load_float(stack, specular_tint_offset);
+      float anisotropic = stack_load_float(stack, anisotropic_offset);
+      float sheen = stack_load_float(stack, sheen_offset);
+      float sheen_tint = stack_load_float(stack, sheen_tint_offset);
+      float clearcoat = stack_load_float(stack, clearcoat_offset);
+      float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
+      float transmission = stack_load_float(stack, transmission_offset);
+      float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
+      float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
+      float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
+
+      ClosureType distribution = (ClosureType)data_node2.y;
+      ClosureType subsurface_method = (ClosureType)data_node2.z;
+
+      /* rotate tangent */
+      if (anisotropic_rotation != 0.0f)
+        T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F);
+
+      /* calculate ior */
+      float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+      // calculate fresnel for refraction
+      float cosNO = dot(N, sd->I);
+      float fresnel = fresnel_dielectric_cos(cosNO, ior);
+
+      // calculate weights of the diffuse and specular part
+      float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
+
+      float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
+      float specular_weight = (1.0f - final_transmission);
+
+      // get the base color
+      uint4 data_base_color = read_node(kg, &offset);
+      float3 base_color = stack_valid(data_base_color.x) ?
+                              stack_load_float3(stack, data_base_color.x) :
+                              make_float3(__uint_as_float(data_base_color.y),
+                                          __uint_as_float(data_base_color.z),
+                                          __uint_as_float(data_base_color.w));
+
+      // get the additional clearcoat normal and subsurface scattering radius
+      uint4 data_cn_ssr = read_node(kg, &offset);
+      float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ?
+                                    stack_load_float3(stack, data_cn_ssr.x) :
+                                    sd->N;
+      if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+        clearcoat_normal = ensure_valid_reflection(sd->Ng, sd->I, clearcoat_normal);
+      }
+      float3 subsurface_radius = stack_valid(data_cn_ssr.y) ?
+                                     stack_load_float3(stack, data_cn_ssr.y) :
+                                     make_float3(1.0f, 1.0f, 1.0f);
+      float subsurface_ior = stack_valid(data_cn_ssr.z) ? stack_load_float(stack, data_cn_ssr.z) :
+                                                          1.4f;
+      float subsurface_anisotropy = stack_valid(data_cn_ssr.w) ?
+                                        stack_load_float(stack, data_cn_ssr.w) :
+                                        0.0f;
+
+      // get the subsurface color
+      uint4 data_subsurface_color = read_node(kg, &offset);
+      float3 subsurface_color = stack_valid(data_subsurface_color.x) ?
+                                    stack_load_float3(stack, data_subsurface_color.x) :
+                                    make_float3(__uint_as_float(data_subsurface_color.y),
+                                                __uint_as_float(data_subsurface_color.z),
+                                                __uint_as_float(data_subsurface_color.w));
+
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+#  ifdef __SUBSURFACE__
+      float3 mixed_ss_base_color = subsurface_color * subsurface +
+                                   base_color * (1.0f - subsurface);
+      float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
+
+      /* disable in case of diffuse ancestor, can't see it well then and
+       * adds considerably noise due to probabilities of continuing path
+       * getting lower and lower */
+      if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+        subsurface = 0.0f;
+
+        /* need to set the base color in this case such that the
+         * rays get the correctly mixed color after transmitting
+         * the object */
+        base_color = mixed_ss_base_color;
+      }
+
+      /* diffuse */
+      if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
+        if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+          float3 diff_weight = weight * base_color * diffuse_weight;
+
+          ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)
+              bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+
+          if (bsdf) {
+            bsdf->N = N;
+            bsdf->roughness = roughness;
+
+            /* setup bsdf */
+            sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_FULL);
+          }
+        }
+        else if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
+          ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
+
+          if (bssrdf) {
+            bssrdf->radius = subsurface_radius * subsurface;
+            bssrdf->albedo = mixed_ss_base_color;
+            bssrdf->N = N;
+            bssrdf->roughness = roughness;
+
+            /* Clamps protecting against bad/extreme and non physical values. */
+            subsurface_ior = clamp(subsurface_ior, 1.01f, 3.8f);
+            bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f);
+
+            /* setup bsdf */
+            sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method, subsurface_ior);
+          }
+        }
+      }
+#  else
+      /* diffuse */
+      if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
+        float3 diff_weight = weight * base_color * diffuse_weight;
+
+        ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
+            sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->roughness = roughness;
+
+          /* setup bsdf */
+          sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_FULL);
+        }
+      }
+#  endif
+
+      /* sheen */
+      if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) {
+        float m_cdlum = linear_rgb_to_gray(kg, base_color);
+        float3 m_ctint = m_cdlum > 0.0f ?
+                             base_color / m_cdlum :
+                             make_float3(1.0f, 1.0f, 1.0f);  // normalize lum. to isolate hue+sat
+
+        /* color of the sheen component */
+        float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) +
+                             m_ctint * sheen_tint;
+
+        float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
+
+        ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc(
+            sd, sizeof(PrincipledSheenBsdf), sheen_weight);
+
+        if (bsdf) {
+          bsdf->N = N;
+
+          /* setup bsdf */
+          sd->flag |= bsdf_principled_sheen_setup(sd, bsdf);
+        }
+      }
+
+      /* specular reflection */
+#  ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+#  endif
+        if (specular_weight > CLOSURE_WEIGHT_CUTOFF &&
+            (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
+          float3 spec_weight = weight * specular_weight;
+
+          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+              sd, sizeof(MicrofacetBsdf), spec_weight);
+          ccl_private MicrofacetExtra *extra =
+              (bsdf != NULL) ?
+                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
+                  NULL;
+
+          if (bsdf && extra) {
+            bsdf->N = N;
+            bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
+            bsdf->T = T;
+            bsdf->extra = extra;
+
+            float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f);
+            float r2 = roughness * roughness;
+
+            bsdf->alpha_x = r2 / aspect;
+            bsdf->alpha_y = r2 * aspect;
+
+            float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y +
+                            0.1f * base_color.z;  // luminance approx.
+            float3 m_ctint = m_cdlum > 0.0f ?
+                                 base_color / m_cdlum :
+                                 make_float3(
+                                     1.0f, 1.0f, 1.0f);  // normalize lum. to isolate hue+sat
+            float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) +
+                             m_ctint * specular_tint;
+
+            bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) +
+                                  base_color * metallic;
+            bsdf->extra->color = base_color;
+            bsdf->extra->clearcoat = 0.0f;
+
+            /* setup bsdf */
+            if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID ||
+                roughness <= 0.075f) /* use single-scatter GGX */
+              sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+            else /* use multi-scatter GGX */
+              sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
+          }
+        }
+#  ifdef __CAUSTICS_TRICKS__
+      }
+#  endif
+
+      /* BSDF */
+#  ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective ||
+          kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+#  endif
+        if (final_transmission > CLOSURE_WEIGHT_CUTOFF) {
+          float3 glass_weight = weight * final_transmission;
+          float3 cspec0 = base_color * specular_tint +
+                          make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
+
+          if (roughness <= 5e-2f ||
+              distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
+            float refl_roughness = roughness;
+
+            /* reflection */
+#  ifdef __CAUSTICS_TRICKS__
+            if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#  endif
+            {
+              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+                  sd, sizeof(MicrofacetBsdf), glass_weight * fresnel);
+              ccl_private MicrofacetExtra *extra =
+                  (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
+                                       sd, sizeof(MicrofacetExtra)) :
+                                   NULL;
+
+              if (bsdf && extra) {
+                bsdf->N = N;
+                bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+                bsdf->extra = extra;
+
+                bsdf->alpha_x = refl_roughness * refl_roughness;
+                bsdf->alpha_y = refl_roughness * refl_roughness;
+                bsdf->ior = ior;
+
+                bsdf->extra->color = base_color;
+                bsdf->extra->cspec0 = cspec0;
+                bsdf->extra->clearcoat = 0.0f;
+
+                /* setup bsdf */
+                sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+              }
+            }
+
+            /* refraction */
+#  ifdef __CAUSTICS_TRICKS__
+            if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#  endif
+            {
+              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+                  sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
+              if (bsdf) {
+                bsdf->N = N;
+                bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+                bsdf->extra = NULL;
+
+                if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
+                  transmission_roughness = 1.0f - (1.0f - refl_roughness) *
+                                                      (1.0f - transmission_roughness);
+                else
+                  transmission_roughness = refl_roughness;
+
+                bsdf->alpha_x = transmission_roughness * transmission_roughness;
+                bsdf->alpha_y = transmission_roughness * transmission_roughness;
+                bsdf->ior = ior;
+
+                /* setup bsdf */
+                sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+              }
+            }
+          }
+          else { /* use multi-scatter GGX */
+            ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+                sd, sizeof(MicrofacetBsdf), glass_weight);
+            ccl_private MicrofacetExtra *extra =
+                (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
+                                     sd, sizeof(MicrofacetExtra)) :
+                                 NULL;
+
+            if (bsdf && extra) {
+              bsdf->N = N;
+              bsdf->extra = extra;
+              bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+
+              bsdf->alpha_x = roughness * roughness;
+              bsdf->alpha_y = roughness * roughness;
+              bsdf->ior = ior;
+
+              bsdf->extra->color = base_color;
+              bsdf->extra->cspec0 = cspec0;
+              bsdf->extra->clearcoat = 0.0f;
+
+              /* setup bsdf */
+              sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
+            }
+          }
+        }
+#  ifdef __CAUSTICS_TRICKS__
+      }
+#  endif
+
+      /* clearcoat */
+#  ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
+#  endif
+        if (clearcoat > CLOSURE_WEIGHT_CUTOFF) {
+          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+              sd, sizeof(MicrofacetBsdf), weight);
+          ccl_private MicrofacetExtra *extra =
+              (bsdf != NULL) ?
+                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
+                  NULL;
+
+          if (bsdf && extra) {
+            bsdf->N = clearcoat_normal;
+            bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+            bsdf->ior = 1.5f;
+            bsdf->extra = extra;
+
+            bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
+            bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
+
+            bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
+            bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
+            bsdf->extra->clearcoat = clearcoat;
+
+            /* setup bsdf */
+            sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+          }
+        }
+#  ifdef __CAUSTICS_TRICKS__
+      }
+#  endif
+
+      break;
+    }
+#endif /* __PRINCIPLED__ */
+    case CLOSURE_BSDF_DIFFUSE_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc(
+          sd, sizeof(OrenNayarBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+
+        float roughness = param1;
+
+        if (roughness == 0.0f) {
+          sd->flag |= bsdf_diffuse_setup((ccl_private DiffuseBsdf *)bsdf);
+        }
+        else {
+          bsdf->roughness = roughness;
+          sd->flag |= bsdf_oren_nayar_setup(bsdf);
+        }
+      }
+      break;
+    }
+    case CLOSURE_BSDF_TRANSLUCENT_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+          sd, sizeof(DiffuseBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        sd->flag |= bsdf_translucent_setup(bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_TRANSPARENT_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      bsdf_transparent_setup(sd, weight, path_flag);
+      break;
+    }
+    case CLOSURE_BSDF_REFLECTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: {
+#ifdef __CAUSTICS_TRICKS__
+      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+        break;
+#endif
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
+
+      if (!bsdf) {
+        break;
+      }
+
+      float roughness = sqr(param1);
+
+      bsdf->N = N;
+      bsdf->ior = 0.0f;
+      bsdf->extra = NULL;
+
+      if (data_node.y == SVM_STACK_INVALID) {
+        bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+        bsdf->alpha_x = roughness;
+        bsdf->alpha_y = roughness;
+      }
+      else {
+        bsdf->T = stack_load_float3(stack, data_node.y);
+
+        /* rotate tangent */
+        float rotation = stack_load_float(stack, data_node.z);
+        if (rotation != 0.0f)
+          bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
+
+        /* compute roughness */
+        float anisotropy = clamp(param2, -0.99f, 0.99f);
+        if (anisotropy < 0.0f) {
+          bsdf->alpha_x = roughness / (1.0f + anisotropy);
+          bsdf->alpha_y = roughness * (1.0f + anisotropy);
+        }
+        else {
+          bsdf->alpha_x = roughness * (1.0f - anisotropy);
+          bsdf->alpha_y = roughness / (1.0f - anisotropy);
+        }
+      }
+
+      /* setup bsdf */
+      if (type == CLOSURE_BSDF_REFLECTION_ID)
+        sd->flag |= bsdf_reflection_setup(bsdf);
+      else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
+        sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+      else if (type == CLOSURE_BSDF_MICROFACET_GGX_ID)
+        sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+      else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
+        kernel_assert(stack_valid(data_node.w));
+        bsdf->extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(sd,
+                                                                         sizeof(MicrofacetExtra));
+        if (bsdf->extra) {
+          bsdf->extra->color = stack_load_float3(stack, data_node.w);
+          bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+          bsdf->extra->clearcoat = 0.0f;
+          sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
+        }
+      }
+      else {
+        sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
+      }
+
+      break;
+    }
+    case CLOSURE_BSDF_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
+#ifdef __CAUSTICS_TRICKS__
+      if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+        break;
+#endif
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+        bsdf->extra = NULL;
+
+        float eta = fmaxf(param2, 1e-5f);
+        eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+        /* setup bsdf */
+        if (type == CLOSURE_BSDF_REFRACTION_ID) {
+          bsdf->alpha_x = 0.0f;
+          bsdf->alpha_y = 0.0f;
+          bsdf->ior = eta;
+
+          sd->flag |= bsdf_refraction_setup(bsdf);
+        }
+        else {
+          float roughness = sqr(param1);
+          bsdf->alpha_x = roughness;
+          bsdf->alpha_y = roughness;
+          bsdf->ior = eta;
+
+          if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
+            sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+          else
+            sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+        }
+      }
+
+      break;
+    }
+    case CLOSURE_BSDF_SHARP_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
+    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
+#ifdef __CAUSTICS_TRICKS__
+      if (!kernel_data.integrator.caustics_reflective &&
+          !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) {
+        break;
+      }
+#endif
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+      /* index of refraction */
+      float eta = fmaxf(param2, 1e-5f);
+      eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+      /* fresnel */
+      float cosNO = dot(N, sd->I);
+      float fresnel = fresnel_dielectric_cos(cosNO, eta);
+      float roughness = sqr(param1);
+
+      /* reflection */
+#ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#endif
+      {
+        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+            sd, sizeof(MicrofacetBsdf), weight * fresnel);
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+          bsdf->extra = NULL;
+          svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
+        }
+      }
+
+      /* refraction */
+#ifdef __CAUSTICS_TRICKS__
+      if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
+#endif
+      {
+        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+            sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
+
+        if (bsdf) {
+          bsdf->N = N;
+          bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+          bsdf->extra = NULL;
+          svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
+        }
+      }
+
+      break;
+    }
+    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
+#ifdef __CAUSTICS_TRICKS__
+      if (!kernel_data.integrator.caustics_reflective &&
+          !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
+        break;
+#endif
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+          sd, sizeof(MicrofacetBsdf), weight);
+      if (!bsdf) {
+        break;
+      }
+
+      ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+          sd, sizeof(MicrofacetExtra));
+      if (!extra) {
+        break;
+      }
+
+      bsdf->N = N;
+      bsdf->extra = extra;
+      bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+
+      float roughness = sqr(param1);
+      bsdf->alpha_x = roughness;
+      bsdf->alpha_y = roughness;
+      float eta = fmaxf(param2, 1e-5f);
+      bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+      kernel_assert(stack_valid(data_node.z));
+      bsdf->extra->color = stack_load_float3(stack, data_node.z);
+      bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
+      bsdf->extra->clearcoat = 0.0f;
+
+      /* setup bsdf */
+      sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
+      break;
+    }
+    case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc(
+          sd, sizeof(VelvetBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+
+        bsdf->sigma = saturate(param1);
+        sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_GLOSSY_TOON_ID:
+#ifdef __CAUSTICS_TRICKS__
+      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
+        break;
+      ATTR_FALLTHROUGH;
+#endif
+    case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
+          sd, sizeof(ToonBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        bsdf->size = param1;
+        bsdf->smooth = param2;
+
+        if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
+          sd->flag |= bsdf_diffuse_toon_setup(bsdf);
+        else
+          sd->flag |= bsdf_glossy_toon_setup(bsdf);
+      }
+      break;
+    }
+#ifdef __HAIR__
+    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
+      uint4 data_node2 = read_node(kg, &offset);
+      uint4 data_node3 = read_node(kg, &offset);
+      uint4 data_node4 = read_node(kg, &offset);
+
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+      uint offset_ofs, ior_ofs, color_ofs, parametrization;
+      svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
+      float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
+      float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
+
+      uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
+      svm_unpack_node_uchar4(data_node2.x,
+                             &coat_ofs,
+                             &melanin_ofs,
+                             &melanin_redness_ofs,
+                             &absorption_coefficient_ofs);
+
+      uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
+      svm_unpack_node_uchar4(
+          data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
+
+      const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
+      float random = 0.0f;
+      if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
+        random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
+      }
+      else {
+        random = stack_load_float_default(stack, random_ofs, data_node3.y);
+      }
+
+      ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)bsdf_alloc(
+          sd, sizeof(PrincipledHairBSDF), weight);
+      if (bsdf) {
+        ccl_private PrincipledHairExtra *extra = (ccl_private PrincipledHairExtra *)
+            closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
+
+        if (!extra)
+          break;
+
+        /* Random factors range: [-randomization/2, +randomization/2]. */
+        float random_roughness = stack_load_float_default(
+            stack, random_roughness_ofs, data_node3.w);
+        float factor_random_roughness = 1.0f + 2.0f * (random - 0.5f) * random_roughness;
+        float roughness = param1 * factor_random_roughness;
+        float radial_roughness = param2 * factor_random_roughness;
+
+        /* Remap Coat value to [0, 100]% of Roughness. */
+        float coat = stack_load_float_default(stack, coat_ofs, data_node2.y);
+        float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f);
+
+        bsdf->N = N;
+        bsdf->v = roughness;
+        bsdf->s = radial_roughness;
+        bsdf->m0_roughness = m0_roughness;
+        bsdf->alpha = alpha;
+        bsdf->eta = ior;
+        bsdf->extra = extra;
+
+        switch (parametrization) {
+          case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
+            float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
+            bsdf->sigma = absorption_coefficient;
+            break;
+          }
+          case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
+            float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z);
+            float melanin_redness = stack_load_float_default(
+                stack, melanin_redness_ofs, data_node2.w);
+
+            /* Randomize melanin. */
+            float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z);
+            random_color = clamp(random_color, 0.0f, 1.0f);
+            float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color;
+            melanin *= factor_random_color;
+
+            /* Map melanin 0..inf from more perceptually linear 0..1. */
+            melanin = -logf(fmaxf(1.0f - melanin, 0.0001f));
+
+            /* Benedikt Bitterli's melanin ratio remapping. */
+            float eumelanin = melanin * (1.0f - melanin_redness);
+            float pheomelanin = melanin * melanin_redness;
+            float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin,
+                                                                                 pheomelanin);
+
+            /* Optional tint. */
+            float3 tint = stack_load_float3(stack, tint_ofs);
+            float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint,
+                                                                            radial_roughness);
+
+            bsdf->sigma = melanin_sigma + tint_sigma;
+            break;
+          }
+          case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
+            float3 color = stack_load_float3(stack, color_ofs);
+            bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness);
+            break;
+          }
+          default: {
+            /* Fallback to brownish hair, same as defaults for melanin. */
+            kernel_assert(!"Invalid Principled Hair parametrization!");
+            bsdf->sigma = bsdf_principled_hair_sigma_from_concentration(0.0f, 0.8054375f);
+            break;
+          }
+        }
+
+        sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
+      }
+      break;
+    }
+    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+    case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+
+      ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
+          sd, sizeof(HairBsdf), weight);
+
+      if (bsdf) {
+        bsdf->N = N;
+        bsdf->roughness1 = param1;
+        bsdf->roughness2 = param2;
+        bsdf->offset = -stack_load_float(stack, data_node.z);
+
+        if (stack_valid(data_node.y)) {
+          bsdf->T = normalize(stack_load_float3(stack, data_node.y));
+        }
+        else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
+          bsdf->T = normalize(sd->dPdv);
+          bsdf->offset = 0.0f;
+        }
+        else
+          bsdf->T = normalize(sd->dPdu);
+
+        if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
+          sd->flag |= bsdf_hair_reflection_setup(bsdf);
+        }
+        else {
+          sd->flag |= bsdf_hair_transmission_setup(bsdf);
+        }
+      }
+
+      break;
+    }
+#endif /* __HAIR__ */
+
+#ifdef __SUBSURFACE__
+    case CLOSURE_BSSRDF_BURLEY_ID:
+    case CLOSURE_BSSRDF_RANDOM_WALK_ID:
+    case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: {
+      float3 weight = sd->svm_closure_weight * mix_weight;
+      ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
+
+      if (bssrdf) {
+        /* disable in case of diffuse ancestor, can't see it well then and
+         * adds considerably noise due to probabilities of continuing path
+         * getting lower and lower */
+        if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
+          param1 = 0.0f;
+
+        bssrdf->radius = stack_load_float3(stack, data_node.z) * param1;
+        bssrdf->albedo = sd->svm_closure_weight;
+        bssrdf->N = N;
+        bssrdf->roughness = FLT_MAX;
+
+        const float subsurface_ior = clamp(param2, 1.01f, 3.8f);
+        const float subsurface_anisotropy = stack_load_float(stack, data_node.w);
+        bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f);
+
+        sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, subsurface_ior);
+      }
+
+      break;
+    }
+#endif
+    default:
+      break;
+  }
+
+  return offset;
+}
+
+template<ShaderType shader_type>
+ccl_device_noinline void svm_node_closure_volume(KernelGlobals kg,
+                                                 ccl_private ShaderData *sd,
+                                                 ccl_private float *stack,
+                                                 uint4 node)
+{
+#ifdef __VOLUME__
+  /* Only sum extinction for volumes, variable is shared with surface transparency. */
+  if (shader_type != SHADER_TYPE_VOLUME) {
+    return;
+  }
+
+  uint type, density_offset, anisotropy_offset;
+
+  uint mix_weight_offset;
+  svm_unpack_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
+  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+                                                       1.0f);
+
+  if (mix_weight == 0.0f) {
+    return;
+  }
+
+  float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
+                                                  __uint_as_float(node.z);
+  density = mix_weight * fmaxf(density, 0.0f);
+
+  /* Compute scattering coefficient. */
+  float3 weight = sd->svm_closure_weight;
+
+  if (type == CLOSURE_VOLUME_ABSORPTION_ID) {
+    weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
+  }
+
+  weight *= density;
+
+  /* Add closure for volume scattering. */
+  if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
+        sd, sizeof(HenyeyGreensteinVolume), weight);
+
+    if (volume) {
+      float anisotropy = (stack_valid(anisotropy_offset)) ?
+                             stack_load_float(stack, anisotropy_offset) :
+                             __uint_as_float(node.w);
+      volume->g = anisotropy; /* g */
+      sd->flag |= volume_henyey_greenstein_setup(volume);
+    }
+  }
+
+  /* Sum total extinction weight. */
+  volume_extinction_setup(sd, weight);
+#endif
+}
+
+template<ShaderType shader_type>
+ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   uint32_t path_flag,
+                                                   int offset)
+{
+#ifdef __VOLUME__
+  uint4 value_node = read_node(kg, &offset);
+  uint4 attr_node = read_node(kg, &offset);
+
+  /* Only sum extinction for volumes, variable is shared with surface transparency. */
+  if (shader_type != SHADER_TYPE_VOLUME) {
+    return offset;
+  }
+
+  uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
+  svm_unpack_node_uchar4(
+      node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
+  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
+                                                       1.0f);
+
+  if (mix_weight == 0.0f) {
+    return offset;
+  }
+
+  /* Compute density. */
+  float primitive_density = 1.0f;
+  float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
+                                                  __uint_as_float(value_node.x);
+  density = mix_weight * fmaxf(density, 0.0f);
+
+  if (density > CLOSURE_WEIGHT_CUTOFF) {
+    /* Density and color attribute lookup if available. */
+    const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
+    if (attr_density.offset != ATTR_STD_NOT_FOUND) {
+      primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
+      density = fmaxf(density * primitive_density, 0.0f);
+    }
+  }
+
+  if (density > CLOSURE_WEIGHT_CUTOFF) {
+    /* Compute scattering color. */
+    float3 color = sd->svm_closure_weight;
+
+    const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
+    if (attr_color.offset != ATTR_STD_NOT_FOUND) {
+      color *= primitive_volume_attribute_float3(kg, sd, attr_color);
+    }
+
+    /* Add closure for volume scattering. */
+    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
+        sd, sizeof(HenyeyGreensteinVolume), color * density);
+    if (volume) {
+      float anisotropy = (stack_valid(anisotropy_offset)) ?
+                             stack_load_float(stack, anisotropy_offset) :
+                             __uint_as_float(value_node.y);
+      volume->g = anisotropy;
+      sd->flag |= volume_henyey_greenstein_setup(volume);
+    }
+
+    /* Add extinction weight. */
+    float3 zero = make_float3(0.0f, 0.0f, 0.0f);
+    float3 one = make_float3(1.0f, 1.0f, 1.0f);
+    float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
+    float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
+    volume_extinction_setup(sd, (color + absorption) * density);
+  }
+
+  /* Compute emission. */
+  if (path_flag & PATH_RAY_SHADOW) {
+    /* Don't need emission for shadows. */
+    return offset;
+  }
+
+  uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
+  svm_unpack_node_uchar4(
+      node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
+  float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) :
+                                                    __uint_as_float(value_node.z);
+  float blackbody = (stack_valid(blackbody_offset)) ? stack_load_float(stack, blackbody_offset) :
+                                                      __uint_as_float(value_node.w);
+
+  if (emission > CLOSURE_WEIGHT_CUTOFF) {
+    float3 emission_color = stack_load_float3(stack, emission_color_offset);
+    emission_setup(sd, emission * emission_color);
+  }
+
+  if (blackbody > CLOSURE_WEIGHT_CUTOFF) {
+    float T = stack_load_float(stack, temperature_offset);
+
+    /* Add flame temperature from attribute if available. */
+    const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
+    if (attr_temperature.offset != ATTR_STD_NOT_FOUND) {
+      float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
+      T *= fmaxf(temperature, 0.0f);
+    }
+
+    T = fmaxf(T, 0.0f);
+
+    /* Stefan-Boltzmann law. */
+    float T4 = sqr(sqr(T));
+    float sigma = 5.670373e-8f * 1e-6f / M_PI_F;
+    float intensity = sigma * mix(1.0f, T4, blackbody);
+
+    if (intensity > CLOSURE_WEIGHT_CUTOFF) {
+      float3 blackbody_tint = stack_load_float3(stack, node.w);
+      float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
+      emission_setup(sd, bb);
+    }
+  }
+#endif
+  return offset;
+}
+
+ccl_device_noinline void svm_node_closure_emission(ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node)
+{
+  uint mix_weight_offset = node.y;
+  float3 weight = sd->svm_closure_weight;
+
+  if (stack_valid(mix_weight_offset)) {
+    float mix_weight = stack_load_float(stack, mix_weight_offset);
+
+    if (mix_weight == 0.0f)
+      return;
+
+    weight *= mix_weight;
+  }
+
+  emission_setup(sd, weight);
+}
+
+ccl_device_noinline void svm_node_closure_background(ccl_private ShaderData *sd,
+                                                     ccl_private float *stack,
+                                                     uint4 node)
+{
+  uint mix_weight_offset = node.y;
+  float3 weight = sd->svm_closure_weight;
+
+  if (stack_valid(mix_weight_offset)) {
+    float mix_weight = stack_load_float(stack, mix_weight_offset);
+
+    if (mix_weight == 0.0f)
+      return;
+
+    weight *= mix_weight;
+  }
+
+  background_setup(sd, weight);
+}
+
+ccl_device_noinline void svm_node_closure_holdout(ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint4 node)
+{
+  uint mix_weight_offset = node.y;
+
+  if (stack_valid(mix_weight_offset)) {
+    float mix_weight = stack_load_float(stack, mix_weight_offset);
+
+    if (mix_weight == 0.0f)
+      return;
+
+    closure_alloc(
+        sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight);
+  }
+  else
+    closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight);
+
+  sd->flag |= SD_HOLDOUT;
+}
+
+/* Closure Nodes */
+
+ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, float3 weight)
+{
+  sd->svm_closure_weight = weight;
+}
+
+ccl_device void svm_node_closure_set_weight(ccl_private ShaderData *sd, uint r, uint g, uint b)
+{
+  float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
+  svm_node_closure_store_weight(sd, weight);
+}
+
+ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint weight_offset)
+{
+  float3 weight = stack_load_float3(stack, weight_offset);
+  svm_node_closure_store_weight(sd, weight);
+}
+
+ccl_device_noinline void svm_node_emission_weight(KernelGlobals kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint4 node)
+{
+  uint color_offset = node.y;
+  uint strength_offset = node.z;
+
+  float strength = stack_load_float(stack, strength_offset);
+  float3 weight = stack_load_float3(stack, color_offset) * strength;
+
+  svm_node_closure_store_weight(sd, weight);
+}
+
+ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint4 node)
+{
+  /* fetch weight from blend input, previous mix closures,
+   * and write to stack to be used by closure nodes later */
+  uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
+  svm_unpack_node_uchar4(
+      node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
+
+  float weight = stack_load_float(stack, weight_offset);
+  weight = saturate(weight);
+
+  float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) :
+                                                      1.0f;
+
+  if (stack_valid(weight1_offset))
+    stack_store_float(stack, weight1_offset, in_weight * (1.0f - weight));
+  if (stack_valid(weight2_offset))
+    stack_store_float(stack, weight2_offset, in_weight * weight);
+}
+
+/* (Bump) normal */
+
+ccl_device void svm_node_set_normal(KernelGlobals kg,
+                                    ccl_private ShaderData *sd,
+                                    ccl_private float *stack,
+                                    uint in_direction,
+                                    uint out_normal)
+{
+  float3 normal = stack_load_float3(stack, in_direction);
+  sd->N = normal;
+  stack_store_float3(stack, out_normal, normal);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h
new file mode 100644
index 00000000000..82024b61ba4
--- /dev/null
+++ b/intern/cycles/kernel/svm/color_util.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float3 svm_mix_blend(float t, float3 col1, float3 col2)
+{
+  return interp(col1, col2, t);
+}
+
+ccl_device float3 svm_mix_add(float t, float3 col1, float3 col2)
+{
+  return interp(col1, col1 + col2, t);
+}
+
+ccl_device float3 svm_mix_mul(float t, float3 col1, float3 col2)
+{
+  return interp(col1, col1 * col2, t);
+}
+
+ccl_device float3 svm_mix_screen(float t, float3 col1, float3 col2)
+{
+  float tm = 1.0f - t;
+  float3 one = make_float3(1.0f, 1.0f, 1.0f);
+  float3 tm3 = make_float3(tm, tm, tm);
+
+  return one - (tm3 + t * (one - col2)) * (one - col1);
+}
+
+ccl_device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
+{
+  float tm = 1.0f - t;
+
+  float3 outcol = col1;
+
+  if (outcol.x < 0.5f)
+    outcol.x *= tm + 2.0f * t * col2.x;
+  else
+    outcol.x = 1.0f - (tm + 2.0f * t * (1.0f - col2.x)) * (1.0f - outcol.x);
+
+  if (outcol.y < 0.5f)
+    outcol.y *= tm + 2.0f * t * col2.y;
+  else
+    outcol.y = 1.0f - (tm + 2.0f * t * (1.0f - col2.y)) * (1.0f - outcol.y);
+
+  if (outcol.z < 0.5f)
+    outcol.z *= tm + 2.0f * t * col2.z;
+  else
+    outcol.z = 1.0f - (tm + 2.0f * t * (1.0f - col2.z)) * (1.0f - outcol.z);
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_sub(float t, float3 col1, float3 col2)
+{
+  return interp(col1, col1 - col2, t);
+}
+
+ccl_device float3 svm_mix_div(float t, float3 col1, float3 col2)
+{
+  float tm = 1.0f - t;
+
+  float3 outcol = col1;
+
+  if (col2.x != 0.0f)
+    outcol.x = tm * outcol.x + t * outcol.x / col2.x;
+  if (col2.y != 0.0f)
+    outcol.y = tm * outcol.y + t * outcol.y / col2.y;
+  if (col2.z != 0.0f)
+    outcol.z = tm * outcol.z + t * outcol.z / col2.z;
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
+{
+  return interp(col1, fabs(col1 - col2), t);
+}
+
+ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
+{
+  return interp(col1, min(col1, col2), t);
+}
+
+ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
+{
+  return interp(col1, max(col1, col2), t);
+}
+
+ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2)
+{
+  float3 outcol = col1;
+
+  if (outcol.x != 0.0f) {
+    float tmp = 1.0f - t * col2.x;
+    if (tmp <= 0.0f)
+      outcol.x = 1.0f;
+    else if ((tmp = outcol.x / tmp) > 1.0f)
+      outcol.x = 1.0f;
+    else
+      outcol.x = tmp;
+  }
+  if (outcol.y != 0.0f) {
+    float tmp = 1.0f - t * col2.y;
+    if (tmp <= 0.0f)
+      outcol.y = 1.0f;
+    else if ((tmp = outcol.y / tmp) > 1.0f)
+      outcol.y = 1.0f;
+    else
+      outcol.y = tmp;
+  }
+  if (outcol.z != 0.0f) {
+    float tmp = 1.0f - t * col2.z;
+    if (tmp <= 0.0f)
+      outcol.z = 1.0f;
+    else if ((tmp = outcol.z / tmp) > 1.0f)
+      outcol.z = 1.0f;
+    else
+      outcol.z = tmp;
+  }
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_burn(float t, float3 col1, float3 col2)
+{
+  float tmp, tm = 1.0f - t;
+
+  float3 outcol = col1;
+
+  tmp = tm + t * col2.x;
+  if (tmp <= 0.0f)
+    outcol.x = 0.0f;
+  else if ((tmp = (1.0f - (1.0f - outcol.x) / tmp)) < 0.0f)
+    outcol.x = 0.0f;
+  else if (tmp > 1.0f)
+    outcol.x = 1.0f;
+  else
+    outcol.x = tmp;
+
+  tmp = tm + t * col2.y;
+  if (tmp <= 0.0f)
+    outcol.y = 0.0f;
+  else if ((tmp = (1.0f - (1.0f - outcol.y) / tmp)) < 0.0f)
+    outcol.y = 0.0f;
+  else if (tmp > 1.0f)
+    outcol.y = 1.0f;
+  else
+    outcol.y = tmp;
+
+  tmp = tm + t * col2.z;
+  if (tmp <= 0.0f)
+    outcol.z = 0.0f;
+  else if ((tmp = (1.0f - (1.0f - outcol.z) / tmp)) < 0.0f)
+    outcol.z = 0.0f;
+  else if (tmp > 1.0f)
+    outcol.z = 1.0f;
+  else
+    outcol.z = tmp;
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_hue(float t, float3 col1, float3 col2)
+{
+  float3 outcol = col1;
+
+  float3 hsv2 = rgb_to_hsv(col2);
+
+  if (hsv2.y != 0.0f) {
+    float3 hsv = rgb_to_hsv(outcol);
+    hsv.x = hsv2.x;
+    float3 tmp = hsv_to_rgb(hsv);
+
+    outcol = interp(outcol, tmp, t);
+  }
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_sat(float t, float3 col1, float3 col2)
+{
+  float tm = 1.0f - t;
+
+  float3 outcol = col1;
+
+  float3 hsv = rgb_to_hsv(outcol);
+
+  if (hsv.y != 0.0f) {
+    float3 hsv2 = rgb_to_hsv(col2);
+
+    hsv.y = tm * hsv.y + t * hsv2.y;
+    outcol = hsv_to_rgb(hsv);
+  }
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_val(float t, float3 col1, float3 col2)
+{
+  float tm = 1.0f - t;
+
+  float3 hsv = rgb_to_hsv(col1);
+  float3 hsv2 = rgb_to_hsv(col2);
+
+  hsv.z = tm * hsv.z + t * hsv2.z;
+
+  return hsv_to_rgb(hsv);
+}
+
+ccl_device float3 svm_mix_color(float t, float3 col1, float3 col2)
+{
+  float3 outcol = col1;
+  float3 hsv2 = rgb_to_hsv(col2);
+
+  if (hsv2.y != 0.0f) {
+    float3 hsv = rgb_to_hsv(outcol);
+    hsv.x = hsv2.x;
+    hsv.y = hsv2.y;
+    float3 tmp = hsv_to_rgb(hsv);
+
+    outcol = interp(outcol, tmp, t);
+  }
+
+  return outcol;
+}
+
+ccl_device float3 svm_mix_soft(float t, float3 col1, float3 col2)
+{
+  float tm = 1.0f - t;
+
+  float3 one = make_float3(1.0f, 1.0f, 1.0f);
+  float3 scr = one - (one - col2) * (one - col1);
+
+  return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
+}
+
+ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2)
+{
+  return col1 + t * (2.0f * col2 + make_float3(-1.0f, -1.0f, -1.0f));
+}
+
+ccl_device float3 svm_mix_clamp(float3 col)
+{
+  return saturate3(col);
+}
+
+ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
+{
+  float t = saturate(fac);
+
+  switch (type) {
+    case NODE_MIX_BLEND:
+      return svm_mix_blend(t, c1, c2);
+    case NODE_MIX_ADD:
+      return svm_mix_add(t, c1, c2);
+    case NODE_MIX_MUL:
+      return svm_mix_mul(t, c1, c2);
+    case NODE_MIX_SCREEN:
+      return svm_mix_screen(t, c1, c2);
+    case NODE_MIX_OVERLAY:
+      return svm_mix_overlay(t, c1, c2);
+    case NODE_MIX_SUB:
+      return svm_mix_sub(t, c1, c2);
+    case NODE_MIX_DIV:
+      return svm_mix_div(t, c1, c2);
+    case NODE_MIX_DIFF:
+      return svm_mix_diff(t, c1, c2);
+    case NODE_MIX_DARK:
+      return svm_mix_dark(t, c1, c2);
+    case NODE_MIX_LIGHT:
+      return svm_mix_light(t, c1, c2);
+    case NODE_MIX_DODGE:
+      return svm_mix_dodge(t, c1, c2);
+    case NODE_MIX_BURN:
+      return svm_mix_burn(t, c1, c2);
+    case NODE_MIX_HUE:
+      return svm_mix_hue(t, c1, c2);
+    case NODE_MIX_SAT:
+      return svm_mix_sat(t, c1, c2);
+    case NODE_MIX_VAL:
+      return svm_mix_val(t, c1, c2);
+    case NODE_MIX_COLOR:
+      return svm_mix_color(t, c1, c2);
+    case NODE_MIX_SOFT:
+      return svm_mix_soft(t, c1, c2);
+    case NODE_MIX_LINEAR:
+      return svm_mix_linear(t, c1, c2);
+    case NODE_MIX_CLAMP:
+      return svm_mix_clamp(c1);
+  }
+
+  return make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast)
+{
+  float a = 1.0f + contrast;
+  float b = brightness - contrast * 0.5f;
+
+  color.x = max(a * color.x + b, 0.0f);
+  color.y = max(a * color.y + b, 0.0f);
+  color.z = max(a * color.z + b, 0.0f);
+
+  return color;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/convert.h b/intern/cycles/kernel/svm/convert.h
new file mode 100644
index 00000000000..427ffd97f59
--- /dev/null
+++ b/intern/cycles/kernel/svm/convert.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Conversion Nodes */
+
+ccl_device_noinline void svm_node_convert(KernelGlobals kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint type,
+                                          uint from,
+                                          uint to)
+{
+  switch (type) {
+    case NODE_CONVERT_FI: {
+      float f = stack_load_float(stack, from);
+      stack_store_int(stack, to, float_to_int(f));
+      break;
+    }
+    case NODE_CONVERT_FV: {
+      float f = stack_load_float(stack, from);
+      stack_store_float3(stack, to, make_float3(f, f, f));
+      break;
+    }
+    case NODE_CONVERT_CF: {
+      float3 f = stack_load_float3(stack, from);
+      float g = linear_rgb_to_gray(kg, f);
+      stack_store_float(stack, to, g);
+      break;
+    }
+    case NODE_CONVERT_CI: {
+      float3 f = stack_load_float3(stack, from);
+      int i = (int)linear_rgb_to_gray(kg, f);
+      stack_store_int(stack, to, i);
+      break;
+    }
+    case NODE_CONVERT_VF: {
+      float3 f = stack_load_float3(stack, from);
+      float g = average(f);
+      stack_store_float(stack, to, g);
+      break;
+    }
+    case NODE_CONVERT_VI: {
+      float3 f = stack_load_float3(stack, from);
+      int i = (int)average(f);
+      stack_store_int(stack, to, i);
+      break;
+    }
+    case NODE_CONVERT_IF: {
+      float f = (float)stack_load_int(stack, from);
+      stack_store_float(stack, to, f);
+      break;
+    }
+    case NODE_CONVERT_IV: {
+      float f = (float)stack_load_int(stack, from);
+      stack_store_float3(stack, to, make_float3(f, f, f));
+      break;
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/displace.h b/intern/cycles/kernel/svm/displace.h
new file mode 100644
index 00000000000..cea1436f36d
--- /dev/null
+++ b/intern/cycles/kernel/svm/displace.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/sample/mapping.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Bump Node */
+
+ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint4 node)
+{
+#ifdef __RAY_DIFFERENTIALS__
+  /* get normal input */
+  uint normal_offset, scale_offset, invert, use_object_space;
+  svm_unpack_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
+
+  float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+
+  float3 dPdx = sd->dP.dx;
+  float3 dPdy = sd->dP.dy;
+
+  if (use_object_space) {
+    object_inverse_normal_transform(kg, sd, &normal_in);
+    object_inverse_dir_transform(kg, sd, &dPdx);
+    object_inverse_dir_transform(kg, sd, &dPdy);
+  }
+
+  /* get surface tangents from normal */
+  float3 Rx = cross(dPdy, normal_in);
+  float3 Ry = cross(normal_in, dPdx);
+
+  /* get bump values */
+  uint c_offset, x_offset, y_offset, strength_offset;
+  svm_unpack_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
+
+  float h_c = stack_load_float(stack, c_offset);
+  float h_x = stack_load_float(stack, x_offset);
+  float h_y = stack_load_float(stack, y_offset);
+
+  /* compute surface gradient and determinant */
+  float det = dot(dPdx, Rx);
+  float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry;
+
+  float absdet = fabsf(det);
+
+  float strength = stack_load_float(stack, strength_offset);
+  float scale = stack_load_float(stack, scale_offset);
+
+  if (invert)
+    scale *= -1.0f;
+
+  strength = max(strength, 0.0f);
+
+  /* compute and output perturbed normal */
+  float3 normal_out = safe_normalize(absdet * normal_in - scale * signf(det) * surfgrad);
+  if (is_zero(normal_out)) {
+    normal_out = normal_in;
+  }
+  else {
+    normal_out = normalize(strength * normal_out + (1.0f - strength) * normal_in);
+  }
+
+  if (use_object_space) {
+    object_normal_transform(kg, sd, &normal_out);
+  }
+
+  normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out);
+
+  stack_store_float3(stack, node.w, normal_out);
+#endif
+}
+
+/* Displacement Node */
+
+ccl_device void svm_node_set_displacement(KernelGlobals kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint fac_offset)
+{
+  float3 dP = stack_load_float3(stack, fac_offset);
+  sd->P += dP;
+}
+
+ccl_device_noinline void svm_node_displacement(KernelGlobals kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
+{
+  uint height_offset, midlevel_offset, scale_offset, normal_offset;
+  svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
+
+  float height = stack_load_float(stack, height_offset);
+  float midlevel = stack_load_float(stack, midlevel_offset);
+  float scale = stack_load_float(stack, scale_offset);
+  float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+  uint space = node.w;
+
+  float3 dP = normal;
+
+  if (space == NODE_NORMAL_MAP_OBJECT) {
+    /* Object space. */
+    object_inverse_normal_transform(kg, sd, &dP);
+    dP *= (height - midlevel) * scale;
+    object_dir_transform(kg, sd, &dP);
+  }
+  else {
+    /* World space. */
+    dP *= (height - midlevel) * scale;
+  }
+
+  stack_store_float3(stack, node.z, dP);
+}
+
+ccl_device_noinline int svm_node_vector_displacement(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint4 data_node = read_node(kg, &offset);
+  uint space = data_node.x;
+
+  uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
+  svm_unpack_node_uchar4(
+      node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
+
+  float3 vector = stack_load_float3(stack, vector_offset);
+  float midlevel = stack_load_float(stack, midlevel_offset);
+  float scale = stack_load_float(stack, scale_offset);
+  float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale;
+
+  if (space == NODE_NORMAL_MAP_TANGENT) {
+    /* Tangent space. */
+    float3 normal = sd->N;
+    object_inverse_normal_transform(kg, sd, &normal);
+
+    const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+    float3 tangent;
+    if (attr.offset != ATTR_STD_NOT_FOUND) {
+      tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
+    }
+    else {
+      tangent = normalize(sd->dPdu);
+    }
+
+    float3 bitangent = normalize(cross(normal, tangent));
+    const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+    if (attr_sign.offset != ATTR_STD_NOT_FOUND) {
+      float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
+      bitangent *= sign;
+    }
+
+    dP = tangent * dP.x + normal * dP.y + bitangent * dP.z;
+  }
+
+  if (space != NODE_NORMAL_MAP_WORLD) {
+    /* Tangent or object space. */
+    object_dir_transform(kg, sd, &dP);
+  }
+
+  stack_store_float3(stack, displacement_offset, dP);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/fractal_noise.h b/intern/cycles/kernel/svm/fractal_noise.h
new file mode 100644
index 00000000000..b955d626dde
--- /dev/null
+++ b/intern/cycles/kernel/svm/fractal_noise.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/svm/noise.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_1d(float p, float octaves, float roughness)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float maxamp = 0.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_1d(fscale * p);
+    sum += t * amp;
+    maxamp += amp;
+    amp *= clamp(roughness, 0.0f, 1.0f);
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_1d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum /= maxamp;
+    sum2 /= maxamp + amp;
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    return sum / maxamp;
+  }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_2d(float2 p, float octaves, float roughness)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float maxamp = 0.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_2d(fscale * p);
+    sum += t * amp;
+    maxamp += amp;
+    amp *= clamp(roughness, 0.0f, 1.0f);
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_2d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum /= maxamp;
+    sum2 /= maxamp + amp;
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    return sum / maxamp;
+  }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_3d(float3 p, float octaves, float roughness)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float maxamp = 0.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_3d(fscale * p);
+    sum += t * amp;
+    maxamp += amp;
+    amp *= clamp(roughness, 0.0f, 1.0f);
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_3d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum /= maxamp;
+    sum2 /= maxamp + amp;
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    return sum / maxamp;
+  }
+}
+
+/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
+ccl_device_noinline float fractal_noise_4d(float4 p, float octaves, float roughness)
+{
+  float fscale = 1.0f;
+  float amp = 1.0f;
+  float maxamp = 0.0f;
+  float sum = 0.0f;
+  octaves = clamp(octaves, 0.0f, 16.0f);
+  int n = float_to_int(octaves);
+  for (int i = 0; i <= n; i++) {
+    float t = noise_4d(fscale * p);
+    sum += t * amp;
+    maxamp += amp;
+    amp *= clamp(roughness, 0.0f, 1.0f);
+    fscale *= 2.0f;
+  }
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float t = noise_4d(fscale * p);
+    float sum2 = sum + t * amp;
+    sum /= maxamp;
+    sum2 /= maxamp + amp;
+    return (1.0f - rmd) * sum + rmd * sum2;
+  }
+  else {
+    return sum / maxamp;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/fresnel.h b/intern/cycles/kernel/svm/fresnel.h
new file mode 100644
index 00000000000..9dd68c3e38f
--- /dev/null
+++ b/intern/cycles/kernel/svm/fresnel.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Fresnel Node */
+
+ccl_device_noinline void svm_node_fresnel(ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint ior_offset,
+                                          uint ior_value,
+                                          uint node)
+{
+  uint normal_offset, out_offset;
+  svm_unpack_node_uchar2(node, &normal_offset, &out_offset);
+  float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) :
+                                          __uint_as_float(ior_value);
+  float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
+
+  eta = fmaxf(eta, 1e-5f);
+  eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
+
+  float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+
+  stack_store_float(stack, out_offset, f);
+}
+
+/* Layer Weight Node */
+
+ccl_device_noinline void svm_node_layer_weight(ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
+{
+  uint blend_offset = node.y;
+  uint blend_value = node.z;
+
+  uint type, normal_offset, out_offset;
+  svm_unpack_node_uchar3(node.w, &type, &normal_offset, &out_offset);
+
+  float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) :
+                                              __uint_as_float(blend_value);
+  float3 normal_in = (stack_valid(normal_offset)) ? stack_load_float3(stack, normal_offset) :
+                                                    sd->N;
+
+  float f;
+
+  if (type == NODE_LAYER_WEIGHT_FRESNEL) {
+    float eta = fmaxf(1.0f - blend, 1e-5f);
+    eta = (sd->flag & SD_BACKFACING) ? eta : 1.0f / eta;
+
+    f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
+  }
+  else {
+    f = fabsf(dot(sd->I, normal_in));
+
+    if (blend != 0.5f) {
+      blend = clamp(blend, 0.0f, 1.0f - 1e-5f);
+      blend = (blend < 0.5f) ? 2.0f * blend : 0.5f / (1.0f - blend);
+
+      f = powf(f, blend);
+    }
+
+    f = 1.0f - f;
+  }
+
+  stack_store_float(stack, out_offset, f);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/gamma.h b/intern/cycles/kernel/svm/gamma.h
new file mode 100644
index 00000000000..9f89e780be9
--- /dev/null
+++ b/intern/cycles/kernel/svm/gamma.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_gamma(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_gamma,
+                                        uint in_color,
+                                        uint out_color)
+{
+  float3 color = stack_load_float3(stack, in_color);
+  float gamma = stack_load_float(stack, in_gamma);
+
+  color = svm_math_gamma_color(color, gamma);
+
+  if (stack_valid(out_color))
+    stack_store_float3(stack, out_color, color);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/geometry.h b/intern/cycles/kernel/svm/geometry.h
new file mode 100644
index 00000000000..772942e0c08
--- /dev/null
+++ b/intern/cycles/kernel/svm/geometry.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Geometry Node */
+
+ccl_device_noinline void svm_node_geometry(KernelGlobals kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint type,
+                                           uint out_offset)
+{
+  float3 data;
+
+  switch (type) {
+    case NODE_GEOM_P:
+      data = sd->P;
+      break;
+    case NODE_GEOM_N:
+      data = sd->N;
+      break;
+#ifdef __DPDU__
+    case NODE_GEOM_T:
+      data = primitive_tangent(kg, sd);
+      break;
+#endif
+    case NODE_GEOM_I:
+      data = sd->I;
+      break;
+    case NODE_GEOM_Ng:
+      data = sd->Ng;
+      break;
+    case NODE_GEOM_uv:
+      data = make_float3(sd->u, sd->v, 0.0f);
+      break;
+    default:
+      data = make_float3(0.0f, 0.0f, 0.0f);
+  }
+
+  stack_store_float3(stack, out_offset, data);
+}
+
+ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint type,
+                                                   uint out_offset)
+{
+#ifdef __RAY_DIFFERENTIALS__
+  float3 data;
+
+  switch (type) {
+    case NODE_GEOM_P:
+      data = sd->P + sd->dP.dx;
+      break;
+    case NODE_GEOM_uv:
+      data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f);
+      break;
+    default:
+      svm_node_geometry(kg, sd, stack, type, out_offset);
+      return;
+  }
+
+  stack_store_float3(stack, out_offset, data);
+#else
+  svm_node_geometry(kg, sd, stack, type, out_offset);
+#endif
+}
+
+ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint type,
+                                                   uint out_offset)
+{
+#ifdef __RAY_DIFFERENTIALS__
+  float3 data;
+
+  switch (type) {
+    case NODE_GEOM_P:
+      data = sd->P + sd->dP.dy;
+      break;
+    case NODE_GEOM_uv:
+      data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f);
+      break;
+    default:
+      svm_node_geometry(kg, sd, stack, type, out_offset);
+      return;
+  }
+
+  stack_store_float3(stack, out_offset, data);
+#else
+  svm_node_geometry(kg, sd, stack, type, out_offset);
+#endif
+}
+
+/* Object Info */
+
+ccl_device_noinline void svm_node_object_info(KernelGlobals kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint type,
+                                              uint out_offset)
+{
+  float data;
+
+  switch (type) {
+    case NODE_INFO_OB_LOCATION: {
+      stack_store_float3(stack, out_offset, object_location(kg, sd));
+      return;
+    }
+    case NODE_INFO_OB_COLOR: {
+      stack_store_float3(stack, out_offset, object_color(kg, sd->object));
+      return;
+    }
+    case NODE_INFO_OB_INDEX:
+      data = object_pass_id(kg, sd->object);
+      break;
+    case NODE_INFO_MAT_INDEX:
+      data = shader_pass_id(kg, sd);
+      break;
+    case NODE_INFO_OB_RANDOM: {
+      if (sd->lamp != LAMP_NONE) {
+        data = lamp_random_number(kg, sd->lamp);
+      }
+      else {
+        data = object_random_number(kg, sd->object);
+      }
+      break;
+    }
+    default:
+      data = 0.0f;
+      break;
+  }
+
+  stack_store_float(stack, out_offset, data);
+}
+
+/* Particle Info */
+
+ccl_device_noinline void svm_node_particle_info(KernelGlobals kg,
+                                                ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint type,
+                                                uint out_offset)
+{
+  switch (type) {
+    case NODE_INFO_PAR_INDEX: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_index(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_RANDOM: {
+      int particle_id = object_particle_id(kg, sd->object);
+      float random = hash_uint2_to_float(particle_index(kg, particle_id), 0);
+      stack_store_float(stack, out_offset, random);
+      break;
+    }
+    case NODE_INFO_PAR_AGE: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_age(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_LIFETIME: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_LOCATION: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
+      break;
+    }
+#if 0 /* XXX float4 currently not supported in SVM stack */
+    case NODE_INFO_PAR_ROTATION: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
+      break;
+    }
+#endif
+    case NODE_INFO_PAR_SIZE: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float(stack, out_offset, particle_size(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_VELOCITY: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
+      break;
+    }
+    case NODE_INFO_PAR_ANGULAR_VELOCITY: {
+      int particle_id = object_particle_id(kg, sd->object);
+      stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
+      break;
+    }
+  }
+}
+
+#ifdef __HAIR__
+
+/* Hair Info */
+
+ccl_device_noinline void svm_node_hair_info(KernelGlobals kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
+                                            uint type,
+                                            uint out_offset)
+{
+  float data;
+  float3 data3;
+
+  switch (type) {
+    case NODE_INFO_CURVE_IS_STRAND: {
+      data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
+      stack_store_float(stack, out_offset, data);
+      break;
+    }
+    case NODE_INFO_CURVE_INTERCEPT:
+      break; /* handled as attribute */
+    case NODE_INFO_CURVE_LENGTH:
+      break; /* handled as attribute */
+    case NODE_INFO_CURVE_RANDOM:
+      break; /* handled as attribute */
+    case NODE_INFO_CURVE_THICKNESS: {
+      data = curve_thickness(kg, sd);
+      stack_store_float(stack, out_offset, data);
+      break;
+    }
+#  if 0
+    case NODE_INFO_CURVE_FADE: {
+      data = sd->curve_transparency;
+      stack_store_float(stack, out_offset, data);
+      break;
+    }
+#  endif
+    case NODE_INFO_CURVE_TANGENT_NORMAL: {
+      data3 = curve_tangent_normal(kg, sd);
+      stack_store_float3(stack, out_offset, data3);
+      break;
+    }
+  }
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/gradient.h b/intern/cycles/kernel/svm/gradient.h
new file mode 100644
index 00000000000..852196b73dc
--- /dev/null
+++ b/intern/cycles/kernel/svm/gradient.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Gradient */
+
+ccl_device float svm_gradient(float3 p, NodeGradientType type)
+{
+  float x, y, z;
+
+  x = p.x;
+  y = p.y;
+  z = p.z;
+
+  if (type == NODE_BLEND_LINEAR) {
+    return x;
+  }
+  else if (type == NODE_BLEND_QUADRATIC) {
+    float r = fmaxf(x, 0.0f);
+    return r * r;
+  }
+  else if (type == NODE_BLEND_EASING) {
+    float r = fminf(fmaxf(x, 0.0f), 1.0f);
+    float t = r * r;
+
+    return (3.0f * t - 2.0f * t * r);
+  }
+  else if (type == NODE_BLEND_DIAGONAL) {
+    return (x + y) * 0.5f;
+  }
+  else if (type == NODE_BLEND_RADIAL) {
+    return atan2f(y, x) / M_2PI_F + 0.5f;
+  }
+  else {
+    /* Bias a little bit for the case where p is a unit length vector,
+     * to get exactly zero instead of a small random value depending
+     * on float precision. */
+    float r = fmaxf(0.999999f - sqrtf(x * x + y * y + z * z), 0.0f);
+
+    if (type == NODE_BLEND_QUADRATIC_SPHERE)
+      return r * r;
+    else if (type == NODE_BLEND_SPHERICAL)
+      return r;
+  }
+
+  return 0.0f;
+}
+
+ccl_device_noinline void svm_node_tex_gradient(ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint4 node)
+{
+  uint type, co_offset, color_offset, fac_offset;
+
+  svm_unpack_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
+
+  float3 co = stack_load_float3(stack, co_offset);
+
+  float f = svm_gradient(co, (NodeGradientType)type);
+  f = saturate(f);
+
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, make_float3(f, f, f));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/hsv.h b/intern/cycles/kernel/svm/hsv.h
new file mode 100644
index 00000000000..f6881fd4512
--- /dev/null
+++ b/intern/cycles/kernel/svm/hsv.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_hsv(KernelGlobals kg,
+                                      ccl_private ShaderData *sd,
+                                      ccl_private float *stack,
+                                      uint4 node)
+{
+  uint in_color_offset, fac_offset, out_color_offset;
+  uint hue_offset, sat_offset, val_offset;
+  svm_unpack_node_uchar3(node.y, &in_color_offset, &fac_offset, &out_color_offset);
+  svm_unpack_node_uchar3(node.z, &hue_offset, &sat_offset, &val_offset);
+
+  float fac = stack_load_float(stack, fac_offset);
+  float3 in_color = stack_load_float3(stack, in_color_offset);
+  float3 color = in_color;
+
+  float hue = stack_load_float(stack, hue_offset);
+  float sat = stack_load_float(stack, sat_offset);
+  float val = stack_load_float(stack, val_offset);
+
+  color = rgb_to_hsv(color);
+
+  /* Remember: `fmodf` doesn't work for negative numbers here. */
+  color.x = fmodf(color.x + hue + 0.5f, 1.0f);
+  color.y = saturate(color.y * sat);
+  color.z *= val;
+
+  color = hsv_to_rgb(color);
+
+  color.x = fac * color.x + (1.0f - fac) * in_color.x;
+  color.y = fac * color.y + (1.0f - fac) * in_color.y;
+  color.z = fac * color.z + (1.0f - fac) * in_color.z;
+
+  /* Clamp color to prevent negative values caused by over saturation. */
+  color.x = max(color.x, 0.0f);
+  color.y = max(color.y, 0.0f);
+  color.z = max(color.z, 0.0f);
+
+  if (stack_valid(out_color_offset))
+    stack_store_float3(stack, out_color_offset, color);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/ies.h b/intern/cycles/kernel/svm/ies.h
new file mode 100644
index 00000000000..f0923720878
--- /dev/null
+++ b/intern/cycles/kernel/svm/ies.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* IES Light */
+
+ccl_device_inline float interpolate_ies_vertical(
+    KernelGlobals kg, int ofs, int v, int v_num, float v_frac, int h)
+{
+  /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
+   * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
+   * with this would be to lookup the corresponding value on the other side of the pole, but since
+   * the horizontal coordinates might be nonuniform, this would require yet another interpolation.
+   * Therefore, the assumption is made that the light is going to be symmetrical, which means that
+   * we can just take the corresponding value at the current horizontal coordinate. */
+
+#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v))
+  /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
+  float a = IES_LOOKUP((v == 0) ? 1 : v - 1);
+  float b = IES_LOOKUP(v);
+  float c = IES_LOOKUP(v + 1);
+  float d = IES_LOOKUP(min(v + 2, v_num - 1));
+#undef IES_LOOKUP
+
+  return cubic_interp(a, b, c, d, v_frac);
+}
+
+ccl_device_inline float kernel_ies_interp(KernelGlobals kg, int slot, float h_angle, float v_angle)
+{
+  /* Find offset of the IES data in the table. */
+  int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
+  if (ofs == -1) {
+    return 100.0f;
+  }
+
+  int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
+  int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
+
+#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs + (h))
+#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs + h_num + (v))
+
+  /* Check whether the angle is within the bounds of the IES texture. */
+  if (v_angle >= IES_LOOKUP_ANGLE_V(v_num - 1)) {
+    return 0.0f;
+  }
+  kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0));
+  kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0));
+  kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num - 1));
+
+  /* Lookup the angles to find the table position. */
+  int h_i, v_i;
+  /* TODO(lukas): Consider using bisection.
+   * Probably not worth it for the vast majority of IES files. */
+  for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++)
+    ;
+  for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++)
+    ;
+
+  float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i + 1), h_angle);
+  float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i + 1), v_angle);
+
+#undef IES_LOOKUP_ANGLE_H
+#undef IES_LOOKUP_ANGLE_V
+
+  /* Skip forward to the actual intensity data. */
+  ofs += h_num + v_num;
+
+  /* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
+   * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
+   * However, the last entry (360°) equals the first one, so we need to wrap around to the one
+   * before that. */
+  float a = interpolate_ies_vertical(
+      kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1);
+  float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
+  float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i + 1);
+  /* Same logic here, wrap around to the second element if necessary. */
+  float d = interpolate_ies_vertical(
+      kg, ofs, v_i, v_num, v_frac, (h_i + 2 == h_num) ? 1 : h_i + 2);
+
+  /* Cubic interpolation can result in negative values, so get rid of them. */
+  return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
+}
+
+ccl_device_noinline void svm_node_ies(KernelGlobals kg,
+                                      ccl_private ShaderData *sd,
+                                      ccl_private float *stack,
+                                      uint4 node)
+{
+  uint vector_offset, strength_offset, fac_offset, slot = node.z;
+  svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset);
+
+  float3 vector = stack_load_float3(stack, vector_offset);
+  float strength = stack_load_float_default(stack, strength_offset, node.w);
+
+  vector = normalize(vector);
+  float v_angle = safe_acosf(-vector.z);
+  float h_angle = atan2f(vector.x, vector.y) + M_PI_F;
+
+  float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle);
+
+  if (stack_valid(fac_offset)) {
+    stack_store_float(stack, fac_offset, fac);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/image.h b/intern/cycles/kernel/svm/image.h
new file mode 100644
index 00000000000..6ddf98a6ef1
--- /dev/null
+++ b/intern/cycles/kernel/svm/image.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float4 svm_image_texture(KernelGlobals kg, int id, float x, float y, uint flags)
+{
+  if (id == -1) {
+    return make_float4(
+        TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
+  }
+
+  float4 r = kernel_tex_image_interp(kg, id, x, y);
+  const float alpha = r.w;
+
+  if ((flags & NODE_IMAGE_ALPHA_UNASSOCIATE) && alpha != 1.0f && alpha != 0.0f) {
+    r /= alpha;
+    r.w = alpha;
+  }
+
+  if (flags & NODE_IMAGE_COMPRESS_AS_SRGB) {
+    r = color_srgb_to_linear_v4(r);
+  }
+
+  return r;
+}
+
+/* Remap coordinate from 0..1 box to -1..-1 */
+ccl_device_inline float3 texco_remap_square(float3 co)
+{
+  return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
+}
+
+ccl_device_noinline int svm_node_tex_image(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint co_offset, out_offset, alpha_offset, flags;
+
+  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float2 tex_co;
+  if (node.w == NODE_IMAGE_PROJ_SPHERE) {
+    co = texco_remap_square(co);
+    tex_co = map_to_sphere(co);
+  }
+  else if (node.w == NODE_IMAGE_PROJ_TUBE) {
+    co = texco_remap_square(co);
+    tex_co = map_to_tube(co);
+  }
+  else {
+    tex_co = make_float2(co.x, co.y);
+  }
+
+  /* TODO(lukas): Consider moving tile information out of the SVM node.
+   * TextureInfo seems a reasonable candidate. */
+  int id = -1;
+  int num_nodes = (int)node.y;
+  if (num_nodes > 0) {
+    /* Remember the offset of the node following the tile nodes. */
+    int next_offset = offset + num_nodes;
+
+    /* Find the tile that the UV lies in. */
+    int tx = (int)tex_co.x;
+    int ty = (int)tex_co.y;
+
+    /* Check that we're within a legitimate tile. */
+    if (tx >= 0 && ty >= 0 && tx < 10) {
+      int tile = 1001 + 10 * ty + tx;
+
+      /* Find the index of the tile. */
+      for (int i = 0; i < num_nodes; i++) {
+        uint4 tile_node = read_node(kg, &offset);
+        if (tile_node.x == tile) {
+          id = tile_node.y;
+          break;
+        }
+        if (tile_node.z == tile) {
+          id = tile_node.w;
+          break;
+        }
+      }
+
+      /* If we found the tile, offset the UVs to be relative to it. */
+      if (id != -1) {
+        tex_co.x -= tx;
+        tex_co.y -= ty;
+      }
+    }
+
+    /* Skip over the remaining nodes. */
+    offset = next_offset;
+  }
+  else {
+    id = -num_nodes;
+  }
+
+  float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, flags);
+
+  if (stack_valid(out_offset))
+    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, f.w);
+  return offset;
+}
+
+ccl_device_noinline void svm_node_tex_image_box(KernelGlobals kg,
+                                                ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint4 node)
+{
+  /* get object space normal */
+  float3 N = sd->N;
+
+  N = sd->N;
+  object_inverse_normal_transform(kg, sd, &N);
+
+  /* project from direction vector to barycentric coordinates in triangles */
+  float3 signed_N = N;
+
+  N.x = fabsf(N.x);
+  N.y = fabsf(N.y);
+  N.z = fabsf(N.z);
+
+  N /= (N.x + N.y + N.z);
+
+  /* basic idea is to think of this as a triangle, each corner representing
+   * one of the 3 faces of the cube. in the corners we have single textures,
+   * in between we blend between two textures, and in the middle we a blend
+   * between three textures.
+   *
+   * The `Nxyz` values are the barycentric coordinates in an equilateral
+   * triangle, which in case of blending, in the middle has a smaller
+   * equilateral triangle where 3 textures blend. this divides things into
+   * 7 zones, with an if() test for each zone. */
+
+  float3 weight = make_float3(0.0f, 0.0f, 0.0f);
+  float blend = __int_as_float(node.w);
+  float limit = 0.5f * (1.0f + blend);
+
+  /* first test for corners with single texture */
+  if (N.x > limit * (N.x + N.y) && N.x > limit * (N.x + N.z)) {
+    weight.x = 1.0f;
+  }
+  else if (N.y > limit * (N.x + N.y) && N.y > limit * (N.y + N.z)) {
+    weight.y = 1.0f;
+  }
+  else if (N.z > limit * (N.x + N.z) && N.z > limit * (N.y + N.z)) {
+    weight.z = 1.0f;
+  }
+  else if (blend > 0.0f) {
+    /* in case of blending, test for mixes between two textures */
+    if (N.z < (1.0f - limit) * (N.y + N.x)) {
+      weight.x = N.x / (N.x + N.y);
+      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+      weight.y = 1.0f - weight.x;
+    }
+    else if (N.x < (1.0f - limit) * (N.y + N.z)) {
+      weight.y = N.y / (N.y + N.z);
+      weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend);
+      weight.z = 1.0f - weight.y;
+    }
+    else if (N.y < (1.0f - limit) * (N.x + N.z)) {
+      weight.x = N.x / (N.x + N.z);
+      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
+      weight.z = 1.0f - weight.x;
+    }
+    else {
+      /* last case, we have a mix between three */
+      weight.x = ((2.0f - limit) * N.x + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+      weight.y = ((2.0f - limit) * N.y + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+      weight.z = ((2.0f - limit) * N.z + (limit - 1.0f)) / (2.0f * limit - 1.0f);
+    }
+  }
+  else {
+    /* Desperate mode, no valid choice anyway, fallback to one side. */
+    weight.x = 1.0f;
+  }
+
+  /* now fetch textures */
+  uint co_offset, out_offset, alpha_offset, flags;
+  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  uint id = node.y;
+
+  float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+  /* Map so that no textures are flipped, rotation is somewhat arbitrary. */
+  if (weight.x > 0.0f) {
+    float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z);
+    f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, flags);
+  }
+  if (weight.y > 0.0f) {
+    float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z);
+    f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, flags);
+  }
+  if (weight.z > 0.0f) {
+    float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x);
+    f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, flags);
+  }
+
+  if (stack_valid(out_offset))
+    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, f.w);
+}
+
+ccl_device_noinline void svm_node_tex_environment(KernelGlobals kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint4 node)
+{
+  uint id = node.y;
+  uint co_offset, out_offset, alpha_offset, flags;
+  uint projection = node.w;
+
+  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float2 uv;
+
+  co = safe_normalize(co);
+
+  if (projection == 0)
+    uv = direction_to_equirectangular(co);
+  else
+    uv = direction_to_mirrorball(co);
+
+  float4 f = svm_image_texture(kg, id, uv.x, uv.y, flags);
+
+  if (stack_valid(out_offset))
+    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, f.w);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/invert.h b/intern/cycles/kernel/svm/invert.h
new file mode 100644
index 00000000000..5a88e9df2c9
--- /dev/null
+++ b/intern/cycles/kernel/svm/invert.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float invert(float color, float factor)
+{
+  return factor * (1.0f - color) + (1.0f - factor) * color;
+}
+
+ccl_device_noinline void svm_node_invert(ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint in_fac,
+                                         uint in_color,
+                                         uint out_color)
+{
+  float factor = stack_load_float(stack, in_fac);
+  float3 color = stack_load_float3(stack, in_color);
+
+  color.x = invert(color.x, factor);
+  color.y = invert(color.y, factor);
+  color.z = invert(color.z, factor);
+
+  if (stack_valid(out_color))
+    stack_store_float3(stack, out_color, color);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/light_path.h b/intern/cycles/kernel/svm/light_path.h
new file mode 100644
index 00000000000..44a35b568fa
--- /dev/null
+++ b/intern/cycles/kernel/svm/light_path.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Light Path Node */
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+ccl_device_noinline void svm_node_light_path(KernelGlobals kg,
+                                             ConstIntegratorGenericState state,
+                                             ccl_private const ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint type,
+                                             uint out_offset,
+                                             uint32_t path_flag)
+{
+  float info = 0.0f;
+
+  switch (type) {
+    case NODE_LP_camera:
+      info = (path_flag & PATH_RAY_CAMERA) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_shadow:
+      info = (path_flag & PATH_RAY_SHADOW) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_diffuse:
+      info = (path_flag & PATH_RAY_DIFFUSE) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_glossy:
+      info = (path_flag & PATH_RAY_GLOSSY) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_singular:
+      info = (path_flag & PATH_RAY_SINGULAR) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_reflection:
+      info = (path_flag & PATH_RAY_REFLECT) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_transmission:
+      info = (path_flag & PATH_RAY_TRANSMIT) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_volume_scatter:
+      info = (path_flag & PATH_RAY_VOLUME_SCATTER) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_backfacing:
+      info = (sd->flag & SD_BACKFACING) ? 1.0f : 0.0f;
+      break;
+    case NODE_LP_ray_length:
+      info = sd->ray_length;
+      break;
+    case NODE_LP_ray_depth: {
+      /* Read bounce from difference location depending if this is a shadow
+       * path. It's a bit dubious to have integrate state details leak into
+       * this function but hard to avoid currently. */
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_bounce(state, path_flag);
+      }
+
+      /* For background, light emission and shadow evaluation we from a
+       * surface or volume we are effective one bounce further. */
+      if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+        info += 1.0f;
+      }
+      break;
+    }
+    case NODE_LP_ray_transparent: {
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_transparent_bounce(state, path_flag);
+      }
+      break;
+    }
+    case NODE_LP_ray_diffuse:
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_diffuse_bounce(state, path_flag);
+      }
+      break;
+    case NODE_LP_ray_glossy:
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_glossy_bounce(state, path_flag);
+      }
+      break;
+    case NODE_LP_ray_transmission:
+      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
+      {
+        info = (float)integrator_state_transmission_bounce(state, path_flag);
+      }
+      break;
+  }
+
+  stack_store_float(stack, out_offset, info);
+}
+
+/* Light Falloff Node */
+
+ccl_device_noinline void svm_node_light_falloff(ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint4 node)
+{
+  uint strength_offset, out_offset, smooth_offset;
+
+  svm_unpack_node_uchar3(node.z, &strength_offset, &smooth_offset, &out_offset);
+
+  float strength = stack_load_float(stack, strength_offset);
+  uint type = node.y;
+
+  switch (type) {
+    case NODE_LIGHT_FALLOFF_QUADRATIC:
+      break;
+    case NODE_LIGHT_FALLOFF_LINEAR:
+      strength *= sd->ray_length;
+      break;
+    case NODE_LIGHT_FALLOFF_CONSTANT:
+      strength *= sd->ray_length * sd->ray_length;
+      break;
+  }
+
+  float smooth = stack_load_float(stack, smooth_offset);
+
+  if (smooth > 0.0f) {
+    float squared = sd->ray_length * sd->ray_length;
+    /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */
+    if (isfinite(squared)) {
+      strength *= squared / (smooth + squared);
+    }
+  }
+
+  stack_store_float(stack, out_offset, strength);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/magic.h b/intern/cycles/kernel/svm/magic.h
new file mode 100644
index 00000000000..f103a8eebcc
--- /dev/null
+++ b/intern/cycles/kernel/svm/magic.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Magic */
+
+ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
+{
+  float x = sinf((p.x + p.y + p.z) * 5.0f);
+  float y = cosf((-p.x + p.y - p.z) * 5.0f);
+  float z = -cosf((-p.x - p.y + p.z) * 5.0f);
+
+  if (n > 0) {
+    x *= distortion;
+    y *= distortion;
+    z *= distortion;
+    y = -cosf(x - y + z);
+    y *= distortion;
+
+    if (n > 1) {
+      x = cosf(x - y - z);
+      x *= distortion;
+
+      if (n > 2) {
+        z = sinf(-x - y - z);
+        z *= distortion;
+
+        if (n > 3) {
+          x = -cosf(-x + y - z);
+          x *= distortion;
+
+          if (n > 4) {
+            y = -sinf(-x + y + z);
+            y *= distortion;
+
+            if (n > 5) {
+              y = -cosf(-x + y + z);
+              y *= distortion;
+
+              if (n > 6) {
+                x = cosf(x + y + z);
+                x *= distortion;
+
+                if (n > 7) {
+                  z = sinf(x + y - z);
+                  z *= distortion;
+
+                  if (n > 8) {
+                    x = -cosf(-x - y + z);
+                    x *= distortion;
+
+                    if (n > 9) {
+                      y = -sinf(x - y + z);
+                      y *= distortion;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (distortion != 0.0f) {
+    distortion *= 2.0f;
+    x /= distortion;
+    y /= distortion;
+    z /= distortion;
+  }
+
+  return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
+}
+
+ccl_device_noinline int svm_node_tex_magic(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint depth;
+  uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
+
+  svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset);
+  svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
+
+  uint4 node2 = read_node(kg, &offset);
+  float3 co = stack_load_float3(stack, co_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node2.x);
+  float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
+
+  float3 color = svm_magic(co * scale, depth, distortion);
+
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, average(color));
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, color);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/map_range.h b/intern/cycles/kernel/svm/map_range.h
new file mode 100644
index 00000000000..fdbfc6531c4
--- /dev/null
+++ b/intern/cycles/kernel/svm/map_range.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Map Range Node */
+
+ccl_device_inline float smootherstep(float edge0, float edge1, float x)
+{
+  x = clamp(safe_divide((x - edge0), (edge1 - edge0)), 0.0f, 1.0f);
+  return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
+}
+
+ccl_device_noinline int svm_node_map_range(KernelGlobals kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint value_stack_offset,
+                                           uint parameters_stack_offsets,
+                                           uint results_stack_offsets,
+                                           int offset)
+{
+  uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset;
+  uint type_stack_offset, steps_stack_offset, result_stack_offset;
+  svm_unpack_node_uchar4(parameters_stack_offsets,
+                         &from_min_stack_offset,
+                         &from_max_stack_offset,
+                         &to_min_stack_offset,
+                         &to_max_stack_offset);
+  svm_unpack_node_uchar3(
+      results_stack_offsets, &type_stack_offset, &steps_stack_offset, &result_stack_offset);
+
+  uint4 defaults = read_node(kg, &offset);
+  uint4 defaults2 = read_node(kg, &offset);
+
+  float value = stack_load_float(stack, value_stack_offset);
+  float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x);
+  float from_max = stack_load_float_default(stack, from_max_stack_offset, defaults.y);
+  float to_min = stack_load_float_default(stack, to_min_stack_offset, defaults.z);
+  float to_max = stack_load_float_default(stack, to_max_stack_offset, defaults.w);
+  float steps = stack_load_float_default(stack, steps_stack_offset, defaults2.x);
+
+  float result;
+
+  if (from_max != from_min) {
+    float factor = value;
+    switch (type_stack_offset) {
+      default:
+      case NODE_MAP_RANGE_LINEAR:
+        factor = (value - from_min) / (from_max - from_min);
+        break;
+      case NODE_MAP_RANGE_STEPPED: {
+        factor = (value - from_min) / (from_max - from_min);
+        factor = (steps > 0.0f) ? floorf(factor * (steps + 1.0f)) / steps : 0.0f;
+        break;
+      }
+      case NODE_MAP_RANGE_SMOOTHSTEP: {
+        factor = (from_min > from_max) ? 1.0f - smoothstep(from_max, from_min, factor) :
+                                         smoothstep(from_min, from_max, factor);
+        break;
+      }
+      case NODE_MAP_RANGE_SMOOTHERSTEP: {
+        factor = (from_min > from_max) ? 1.0f - smootherstep(from_max, from_min, factor) :
+                                         smootherstep(from_min, from_max, factor);
+        break;
+      }
+    }
+    result = to_min + factor * (to_max - to_min);
+  }
+  else {
+    result = 0.0f;
+  }
+  stack_store_float(stack, result_stack_offset, result);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/mapping.h b/intern/cycles/kernel/svm/mapping.h
new file mode 100644
index 00000000000..19f79471ad2
--- /dev/null
+++ b/intern/cycles/kernel/svm/mapping.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/svm/mapping_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Mapping Node */
+
+ccl_device_noinline void svm_node_mapping(KernelGlobals kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint type,
+                                          uint inputs_stack_offsets,
+                                          uint result_stack_offset)
+{
+  uint vector_stack_offset, location_stack_offset, rotation_stack_offset, scale_stack_offset;
+  svm_unpack_node_uchar4(inputs_stack_offsets,
+                         &vector_stack_offset,
+                         &location_stack_offset,
+                         &rotation_stack_offset,
+                         &scale_stack_offset);
+
+  float3 vector = stack_load_float3(stack, vector_stack_offset);
+  float3 location = stack_load_float3(stack, location_stack_offset);
+  float3 rotation = stack_load_float3(stack, rotation_stack_offset);
+  float3 scale = stack_load_float3(stack, scale_stack_offset);
+
+  float3 result = svm_mapping((NodeMappingType)type, vector, location, rotation, scale);
+  stack_store_float3(stack, result_stack_offset, result);
+}
+
+/* Texture Mapping */
+
+ccl_device_noinline int svm_node_texture_mapping(KernelGlobals kg,
+                                                 ccl_private ShaderData *sd,
+                                                 ccl_private float *stack,
+                                                 uint vec_offset,
+                                                 uint out_offset,
+                                                 int offset)
+{
+  float3 v = stack_load_float3(stack, vec_offset);
+
+  Transform tfm;
+  tfm.x = read_node_float(kg, &offset);
+  tfm.y = read_node_float(kg, &offset);
+  tfm.z = read_node_float(kg, &offset);
+
+  float3 r = transform_point(&tfm, v);
+  stack_store_float3(stack, out_offset, r);
+  return offset;
+}
+
+ccl_device_noinline int svm_node_min_max(KernelGlobals kg,
+                                         ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint vec_offset,
+                                         uint out_offset,
+                                         int offset)
+{
+  float3 v = stack_load_float3(stack, vec_offset);
+
+  float3 mn = float4_to_float3(read_node_float(kg, &offset));
+  float3 mx = float4_to_float3(read_node_float(kg, &offset));
+
+  float3 r = min(max(mn, v), mx);
+  stack_store_float3(stack, out_offset, r);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/mapping_util.h b/intern/cycles/kernel/svm/mapping_util.h
new file mode 100644
index 00000000000..51b13c0c264
--- /dev/null
+++ b/intern/cycles/kernel/svm/mapping_util.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float3
+svm_mapping(NodeMappingType type, float3 vector, float3 location, float3 rotation, float3 scale)
+{
+  Transform rotationTransform = euler_to_transform(rotation);
+  switch (type) {
+    case NODE_MAPPING_TYPE_POINT:
+      return transform_direction(&rotationTransform, (vector * scale)) + location;
+    case NODE_MAPPING_TYPE_TEXTURE:
+      return safe_divide_float3_float3(
+          transform_direction_transposed(&rotationTransform, (vector - location)), scale);
+    case NODE_MAPPING_TYPE_VECTOR:
+      return transform_direction(&rotationTransform, (vector * scale));
+    case NODE_MAPPING_TYPE_NORMAL:
+      return safe_normalize(
+          transform_direction(&rotationTransform, safe_divide_float3_float3(vector, scale)));
+    default:
+      return make_float3(0.0f, 0.0f, 0.0f);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/math.h b/intern/cycles/kernel/svm/math.h
new file mode 100644
index 00000000000..ff0f3683ea3
--- /dev/null
+++ b/intern/cycles/kernel/svm/math.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_math(KernelGlobals kg,
+                                       ccl_private ShaderData *sd,
+                                       ccl_private float *stack,
+                                       uint type,
+                                       uint inputs_stack_offsets,
+                                       uint result_stack_offset)
+{
+  uint a_stack_offset, b_stack_offset, c_stack_offset;
+  svm_unpack_node_uchar3(inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &c_stack_offset);
+
+  float a = stack_load_float(stack, a_stack_offset);
+  float b = stack_load_float(stack, b_stack_offset);
+  float c = stack_load_float(stack, c_stack_offset);
+  float result = svm_math((NodeMathType)type, a, b, c);
+
+  stack_store_float(stack, result_stack_offset, result);
+}
+
+ccl_device_noinline int svm_node_vector_math(KernelGlobals kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint type,
+                                             uint inputs_stack_offsets,
+                                             uint outputs_stack_offsets,
+                                             int offset)
+{
+  uint value_stack_offset, vector_stack_offset;
+  uint a_stack_offset, b_stack_offset, param1_stack_offset;
+  svm_unpack_node_uchar3(
+      inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &param1_stack_offset);
+  svm_unpack_node_uchar2(outputs_stack_offsets, &value_stack_offset, &vector_stack_offset);
+
+  float3 a = stack_load_float3(stack, a_stack_offset);
+  float3 b = stack_load_float3(stack, b_stack_offset);
+  float3 c = make_float3(0.0f, 0.0f, 0.0f);
+  float param1 = stack_load_float(stack, param1_stack_offset);
+
+  float value;
+  float3 vector;
+
+  /* 3 Vector Operators */
+  if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD ||
+      type == NODE_VECTOR_MATH_MULTIPLY_ADD) {
+    uint4 extra_node = read_node(kg, &offset);
+    c = stack_load_float3(stack, extra_node.x);
+  }
+
+  svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, c, param1);
+
+  if (stack_valid(value_stack_offset))
+    stack_store_float(stack, value_stack_offset, value);
+  if (stack_valid(vector_stack_offset))
+    stack_store_float3(stack, vector_stack_offset, vector);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h
new file mode 100644
index 00000000000..b2e539cdd1f
--- /dev/null
+++ b/intern/cycles/kernel/svm/math_util.h
@@ -0,0 +1,285 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void svm_vector_math(ccl_private float *value,
+                                ccl_private float3 *vector,
+                                NodeVectorMathType type,
+                                float3 a,
+                                float3 b,
+                                float3 c,
+                                float param1)
+{
+  switch (type) {
+    case NODE_VECTOR_MATH_ADD:
+      *vector = a + b;
+      break;
+    case NODE_VECTOR_MATH_SUBTRACT:
+      *vector = a - b;
+      break;
+    case NODE_VECTOR_MATH_MULTIPLY:
+      *vector = a * b;
+      break;
+    case NODE_VECTOR_MATH_DIVIDE:
+      *vector = safe_divide_float3_float3(a, b);
+      break;
+    case NODE_VECTOR_MATH_CROSS_PRODUCT:
+      *vector = cross(a, b);
+      break;
+    case NODE_VECTOR_MATH_PROJECT:
+      *vector = project(a, b);
+      break;
+    case NODE_VECTOR_MATH_REFLECT:
+      *vector = reflect(a, b);
+      break;
+    case NODE_VECTOR_MATH_REFRACT:
+      *vector = refract(a, normalize(b), param1);
+      break;
+    case NODE_VECTOR_MATH_FACEFORWARD:
+      *vector = faceforward(a, b, c);
+      break;
+    case NODE_VECTOR_MATH_MULTIPLY_ADD:
+      *vector = a * b + c;
+      break;
+    case NODE_VECTOR_MATH_DOT_PRODUCT:
+      *value = dot(a, b);
+      break;
+    case NODE_VECTOR_MATH_DISTANCE:
+      *value = distance(a, b);
+      break;
+    case NODE_VECTOR_MATH_LENGTH:
+      *value = len(a);
+      break;
+    case NODE_VECTOR_MATH_SCALE:
+      *vector = a * param1;
+      break;
+    case NODE_VECTOR_MATH_NORMALIZE:
+      *vector = safe_normalize(a);
+      break;
+    case NODE_VECTOR_MATH_SNAP:
+      *vector = floor(safe_divide_float3_float3(a, b)) * b;
+      break;
+    case NODE_VECTOR_MATH_FLOOR:
+      *vector = floor(a);
+      break;
+    case NODE_VECTOR_MATH_CEIL:
+      *vector = ceil(a);
+      break;
+    case NODE_VECTOR_MATH_MODULO:
+      *vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z));
+      break;
+    case NODE_VECTOR_MATH_WRAP:
+      *vector = make_float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z));
+      break;
+    case NODE_VECTOR_MATH_FRACTION:
+      *vector = a - floor(a);
+      break;
+    case NODE_VECTOR_MATH_ABSOLUTE:
+      *vector = fabs(a);
+      break;
+    case NODE_VECTOR_MATH_MINIMUM:
+      *vector = min(a, b);
+      break;
+    case NODE_VECTOR_MATH_MAXIMUM:
+      *vector = max(a, b);
+      break;
+    case NODE_VECTOR_MATH_SINE:
+      *vector = make_float3(sinf(a.x), sinf(a.y), sinf(a.z));
+      break;
+    case NODE_VECTOR_MATH_COSINE:
+      *vector = make_float3(cosf(a.x), cosf(a.y), cosf(a.z));
+      break;
+    case NODE_VECTOR_MATH_TANGENT:
+      *vector = make_float3(tanf(a.x), tanf(a.y), tanf(a.z));
+      break;
+    default:
+      *vector = zero_float3();
+      *value = 0.0f;
+  }
+}
+
+ccl_device float svm_math(NodeMathType type, float a, float b, float c)
+{
+  switch (type) {
+    case NODE_MATH_ADD:
+      return a + b;
+    case NODE_MATH_SUBTRACT:
+      return a - b;
+    case NODE_MATH_MULTIPLY:
+      return a * b;
+    case NODE_MATH_DIVIDE:
+      return safe_divide(a, b);
+    case NODE_MATH_POWER:
+      return safe_powf(a, b);
+    case NODE_MATH_LOGARITHM:
+      return safe_logf(a, b);
+    case NODE_MATH_SQRT:
+      return safe_sqrtf(a);
+    case NODE_MATH_INV_SQRT:
+      return inversesqrtf(a);
+    case NODE_MATH_ABSOLUTE:
+      return fabsf(a);
+    case NODE_MATH_RADIANS:
+      return a * (M_PI_F / 180.0f);
+    case NODE_MATH_DEGREES:
+      return a * (180.0f / M_PI_F);
+    case NODE_MATH_MINIMUM:
+      return fminf(a, b);
+    case NODE_MATH_MAXIMUM:
+      return fmaxf(a, b);
+    case NODE_MATH_LESS_THAN:
+      return a < b;
+    case NODE_MATH_GREATER_THAN:
+      return a > b;
+    case NODE_MATH_ROUND:
+      return floorf(a + 0.5f);
+    case NODE_MATH_FLOOR:
+      return floorf(a);
+    case NODE_MATH_CEIL:
+      return ceilf(a);
+    case NODE_MATH_FRACTION:
+      return a - floorf(a);
+    case NODE_MATH_MODULO:
+      return safe_modulo(a, b);
+    case NODE_MATH_TRUNC:
+      return a >= 0.0f ? floorf(a) : ceilf(a);
+    case NODE_MATH_SNAP:
+      return floorf(safe_divide(a, b)) * b;
+    case NODE_MATH_WRAP:
+      return wrapf(a, b, c);
+    case NODE_MATH_PINGPONG:
+      return pingpongf(a, b);
+    case NODE_MATH_SINE:
+      return sinf(a);
+    case NODE_MATH_COSINE:
+      return cosf(a);
+    case NODE_MATH_TANGENT:
+      return tanf(a);
+    case NODE_MATH_SINH:
+      return sinhf(a);
+    case NODE_MATH_COSH:
+      return coshf(a);
+    case NODE_MATH_TANH:
+      return tanhf(a);
+    case NODE_MATH_ARCSINE:
+      return safe_asinf(a);
+    case NODE_MATH_ARCCOSINE:
+      return safe_acosf(a);
+    case NODE_MATH_ARCTANGENT:
+      return atanf(a);
+    case NODE_MATH_ARCTAN2:
+      return atan2f(a, b);
+    case NODE_MATH_SIGN:
+      return compatible_signf(a);
+    case NODE_MATH_EXPONENT:
+      return expf(a);
+    case NODE_MATH_COMPARE:
+      return ((a == b) || (fabsf(a - b) <= fmaxf(c, FLT_EPSILON))) ? 1.0f : 0.0f;
+    case NODE_MATH_MULTIPLY_ADD:
+      return a * b + c;
+    case NODE_MATH_SMOOTH_MIN:
+      return smoothminf(a, b, c);
+    case NODE_MATH_SMOOTH_MAX:
+      return -smoothminf(-a, -b, c);
+    default:
+      return 0.0f;
+  }
+}
+
+ccl_device float3 svm_math_blackbody_color(float t)
+{
+  /* TODO(lukas): Reimplement in XYZ. */
+
+  /* Calculate color in range 800..12000 using an approximation
+   * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
+   * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
+   * which is enough to get the same 8 bit/channel color.
+   */
+
+  const float blackbody_table_r[6][3] = {
+      {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
+      {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
+      {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
+      {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
+      {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
+      {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f},
+  };
+
+  const float blackbody_table_g[6][3] = {
+      {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
+      {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
+      {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
+      {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
+      {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
+      {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f},
+  };
+
+  const float blackbody_table_b[6][4] = {
+      {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
+      {0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 0.0f, 0.0f, 0.0f},
+      {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
+      {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
+      {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f},
+  };
+
+  if (t >= 12000.0f) {
+    return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
+  }
+  else if (t < 965.0f) {
+    /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
+    return make_float3(4.70366907f, 0.0f, 0.0f);
+  }
+
+  /* Manually align for readability. */
+  /* clang-format off */
+  int i = (t >= 6365.0f) ? 5 :
+          (t >= 3315.0f) ? 4 :
+          (t >= 1902.0f) ? 3 :
+          (t >= 1449.0f) ? 2 :
+          (t >= 1167.0f) ? 1 :
+                           0;
+  /* clang-format on */
+
+  ccl_constant float *r = blackbody_table_r[i];
+  ccl_constant float *g = blackbody_table_g[i];
+  ccl_constant float *b = blackbody_table_b[i];
+
+  const float t_inv = 1.0f / t;
+  return make_float3(r[0] * t_inv + r[1] * t + r[2],
+                     g[0] * t_inv + g[1] * t + g[2],
+                     ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
+}
+
+ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
+{
+  if (gamma == 0.0f)
+    return make_float3(1.0f, 1.0f, 1.0f);
+
+  if (color.x > 0.0f)
+    color.x = powf(color.x, gamma);
+  if (color.y > 0.0f)
+    color.y = powf(color.y, gamma);
+  if (color.z > 0.0f)
+    color.z = powf(color.z, gamma);
+
+  return color;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/mix.h b/intern/cycles/kernel/svm/mix.h
new file mode 100644
index 00000000000..96e5b3f5b5e
--- /dev/null
+++ b/intern/cycles/kernel/svm/mix.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Node */
+
+ccl_device_noinline int svm_node_mix(KernelGlobals kg,
+                                     ccl_private ShaderData *sd,
+                                     ccl_private float *stack,
+                                     uint fac_offset,
+                                     uint c1_offset,
+                                     uint c2_offset,
+                                     int offset)
+{
+  /* read extra data */
+  uint4 node1 = read_node(kg, &offset);
+
+  float fac = stack_load_float(stack, fac_offset);
+  float3 c1 = stack_load_float3(stack, c1_offset);
+  float3 c2 = stack_load_float3(stack, c2_offset);
+  float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
+
+  stack_store_float3(stack, node1.z, result);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/musgrave.h b/intern/cycles/kernel/svm/musgrave.h
new file mode 100644
index 00000000000..4225c3d2d71
--- /dev/null
+++ b/intern/cycles/kernel/svm/musgrave.h
@@ -0,0 +1,854 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/svm/noise.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* 1D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_fBm_1d(float co,
+                                                    float H,
+                                                    float lacunarity,
+                                                    float octaves)
+{
+  float p = co;
+  float value = 0.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value += snoise_1d(p) * pwr;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * snoise_1d(p) * pwr;
+  }
+
+  return value;
+}
+
+/* 1D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_1d(float co,
+                                                              float H,
+                                                              float lacunarity,
+                                                              float octaves)
+{
+  float p = co;
+  float value = 1.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value *= (pwr * snoise_1d(p) + 1.0f);
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value *= (rmd * pwr * snoise_1d(p) + 1.0f); /* correct? */
+  }
+
+  return value;
+}
+
+/* 1D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_1d(
+    float co, float H, float lacunarity, float octaves, float offset)
+{
+  float p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  /* first unscaled octave of function; later octaves are scaled */
+  float value = offset + snoise_1d(p);
+  p *= lacunarity;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    float increment = (snoise_1d(p) + offset) * pwr * value;
+    value += increment;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float increment = (snoise_1d(p) + offset) * pwr * value;
+    value += rmd * increment;
+  }
+
+  return value;
+}
+
+/* 1D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d(
+    float co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float value = snoise_1d(p) + offset;
+  float weight = gain * value;
+  p *= lacunarity;
+
+  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+    if (weight > 1.0f) {
+      weight = 1.0f;
+    }
+
+    float signal = (snoise_1d(p) + offset) * pwr;
+    pwr *= pwHL;
+    value += weight * signal;
+    weight *= gain * signal;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * ((snoise_1d(p) + offset) * pwr);
+  }
+
+  return value;
+}
+
+/* 1D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_1d(
+    float co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float signal = offset - fabsf(snoise_1d(p));
+  signal *= signal;
+  float value = signal;
+  float weight = 1.0f;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    p *= lacunarity;
+    weight = saturate(signal * gain);
+    signal = offset - fabsf(snoise_1d(p));
+    signal *= signal;
+    signal *= weight;
+    value += signal * pwr;
+    pwr *= pwHL;
+  }
+
+  return value;
+}
+
+/* 2D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_fBm_2d(float2 co,
+                                                    float H,
+                                                    float lacunarity,
+                                                    float octaves)
+{
+  float2 p = co;
+  float value = 0.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value += snoise_2d(p) * pwr;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * snoise_2d(p) * pwr;
+  }
+
+  return value;
+}
+
+/* 2D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_2d(float2 co,
+                                                              float H,
+                                                              float lacunarity,
+                                                              float octaves)
+{
+  float2 p = co;
+  float value = 1.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value *= (pwr * snoise_2d(p) + 1.0f);
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value *= (rmd * pwr * snoise_2d(p) + 1.0f); /* correct? */
+  }
+
+  return value;
+}
+
+/* 2D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_2d(
+    float2 co, float H, float lacunarity, float octaves, float offset)
+{
+  float2 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  /* first unscaled octave of function; later octaves are scaled */
+  float value = offset + snoise_2d(p);
+  p *= lacunarity;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    float increment = (snoise_2d(p) + offset) * pwr * value;
+    value += increment;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float increment = (snoise_2d(p) + offset) * pwr * value;
+    value += rmd * increment;
+  }
+
+  return value;
+}
+
+/* 2D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d(
+    float2 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float2 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float value = snoise_2d(p) + offset;
+  float weight = gain * value;
+  p *= lacunarity;
+
+  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+    if (weight > 1.0f) {
+      weight = 1.0f;
+    }
+
+    float signal = (snoise_2d(p) + offset) * pwr;
+    pwr *= pwHL;
+    value += weight * signal;
+    weight *= gain * signal;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * ((snoise_2d(p) + offset) * pwr);
+  }
+
+  return value;
+}
+
+/* 2D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_2d(
+    float2 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float2 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float signal = offset - fabsf(snoise_2d(p));
+  signal *= signal;
+  float value = signal;
+  float weight = 1.0f;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    p *= lacunarity;
+    weight = saturate(signal * gain);
+    signal = offset - fabsf(snoise_2d(p));
+    signal *= signal;
+    signal *= weight;
+    value += signal * pwr;
+    pwr *= pwHL;
+  }
+
+  return value;
+}
+
+/* 3D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_fBm_3d(float3 co,
+                                                    float H,
+                                                    float lacunarity,
+                                                    float octaves)
+{
+  float3 p = co;
+  float value = 0.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value += snoise_3d(p) * pwr;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * snoise_3d(p) * pwr;
+  }
+
+  return value;
+}
+
+/* 3D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_3d(float3 co,
+                                                              float H,
+                                                              float lacunarity,
+                                                              float octaves)
+{
+  float3 p = co;
+  float value = 1.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value *= (pwr * snoise_3d(p) + 1.0f);
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value *= (rmd * pwr * snoise_3d(p) + 1.0f); /* correct? */
+  }
+
+  return value;
+}
+
+/* 3D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_3d(
+    float3 co, float H, float lacunarity, float octaves, float offset)
+{
+  float3 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  /* first unscaled octave of function; later octaves are scaled */
+  float value = offset + snoise_3d(p);
+  p *= lacunarity;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    float increment = (snoise_3d(p) + offset) * pwr * value;
+    value += increment;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float increment = (snoise_3d(p) + offset) * pwr * value;
+    value += rmd * increment;
+  }
+
+  return value;
+}
+
+/* 3D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d(
+    float3 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float3 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float value = snoise_3d(p) + offset;
+  float weight = gain * value;
+  p *= lacunarity;
+
+  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+    if (weight > 1.0f) {
+      weight = 1.0f;
+    }
+
+    float signal = (snoise_3d(p) + offset) * pwr;
+    pwr *= pwHL;
+    value += weight * signal;
+    weight *= gain * signal;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * ((snoise_3d(p) + offset) * pwr);
+  }
+
+  return value;
+}
+
+/* 3D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_3d(
+    float3 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float3 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float signal = offset - fabsf(snoise_3d(p));
+  signal *= signal;
+  float value = signal;
+  float weight = 1.0f;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    p *= lacunarity;
+    weight = saturate(signal * gain);
+    signal = offset - fabsf(snoise_3d(p));
+    signal *= signal;
+    signal *= weight;
+    value += signal * pwr;
+    pwr *= pwHL;
+  }
+
+  return value;
+}
+
+/* 4D Musgrave fBm
+ *
+ * H: fractal increment parameter
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ *
+ * from "Texturing and Modelling: A procedural approach"
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_fBm_4d(float4 co,
+                                                    float H,
+                                                    float lacunarity,
+                                                    float octaves)
+{
+  float4 p = co;
+  float value = 0.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value += snoise_4d(p) * pwr;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * snoise_4d(p) * pwr;
+  }
+
+  return value;
+}
+
+/* 4D Musgrave Multifractal
+ *
+ * H: highest fractal dimension
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal_4d(float4 co,
+                                                              float H,
+                                                              float lacunarity,
+                                                              float octaves)
+{
+  float4 p = co;
+  float value = 1.0f;
+  float pwr = 1.0f;
+  float pwHL = powf(lacunarity, -H);
+
+  for (int i = 0; i < float_to_int(octaves); i++) {
+    value *= (pwr * snoise_4d(p) + 1.0f);
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value *= (rmd * pwr * snoise_4d(p) + 1.0f); /* correct? */
+  }
+
+  return value;
+}
+
+/* 4D Musgrave Heterogeneous Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_4d(
+    float4 co, float H, float lacunarity, float octaves, float offset)
+{
+  float4 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  /* first unscaled octave of function; later octaves are scaled */
+  float value = offset + snoise_4d(p);
+  p *= lacunarity;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    float increment = (snoise_4d(p) + offset) * pwr * value;
+    value += increment;
+    pwr *= pwHL;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    float increment = (snoise_4d(p) + offset) * pwr * value;
+    value += rmd * increment;
+  }
+
+  return value;
+}
+
+/* 4D Hybrid Additive/Multiplicative Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d(
+    float4 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float4 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float value = snoise_4d(p) + offset;
+  float weight = gain * value;
+  p *= lacunarity;
+
+  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+    if (weight > 1.0f) {
+      weight = 1.0f;
+    }
+
+    float signal = (snoise_4d(p) + offset) * pwr;
+    pwr *= pwHL;
+    value += weight * signal;
+    weight *= gain * signal;
+    p *= lacunarity;
+  }
+
+  float rmd = octaves - floorf(octaves);
+  if (rmd != 0.0f) {
+    value += rmd * ((snoise_4d(p) + offset) * pwr);
+  }
+
+  return value;
+}
+
+/* 4D Ridged Multifractal Terrain
+ *
+ * H: fractal dimension of the roughest area
+ * lacunarity: gap between successive frequencies
+ * octaves: number of frequencies in the fBm
+ * offset: raises the terrain from `sea level'
+ */
+
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
+    float4 co, float H, float lacunarity, float octaves, float offset, float gain)
+{
+  float4 p = co;
+  float pwHL = powf(lacunarity, -H);
+  float pwr = pwHL;
+
+  float signal = offset - fabsf(snoise_4d(p));
+  signal *= signal;
+  float value = signal;
+  float weight = 1.0f;
+
+  for (int i = 1; i < float_to_int(octaves); i++) {
+    p *= lacunarity;
+    weight = saturate(signal * gain);
+    signal = offset - fabsf(snoise_4d(p));
+    signal *= signal;
+    signal *= weight;
+    value += signal * pwr;
+    pwr *= pwHL;
+  }
+
+  return value;
+}
+
+ccl_device_noinline int svm_node_tex_musgrave(KernelGlobals kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint offsets1,
+                                              uint offsets2,
+                                              uint offsets3,
+                                              int offset)
+{
+  uint type, dimensions, co_stack_offset, w_stack_offset;
+  uint scale_stack_offset, detail_stack_offset, dimension_stack_offset, lacunarity_stack_offset;
+  uint offset_stack_offset, gain_stack_offset, fac_stack_offset;
+
+  svm_unpack_node_uchar4(offsets1, &type, &dimensions, &co_stack_offset, &w_stack_offset);
+  svm_unpack_node_uchar4(offsets2,
+                         &scale_stack_offset,
+                         &detail_stack_offset,
+                         &dimension_stack_offset,
+                         &lacunarity_stack_offset);
+  svm_unpack_node_uchar3(offsets3, &offset_stack_offset, &gain_stack_offset, &fac_stack_offset);
+
+  uint4 defaults1 = read_node(kg, &offset);
+  uint4 defaults2 = read_node(kg, &offset);
+
+  float3 co = stack_load_float3(stack, co_stack_offset);
+  float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
+  float scale = stack_load_float_default(stack, scale_stack_offset, defaults1.y);
+  float detail = stack_load_float_default(stack, detail_stack_offset, defaults1.z);
+  float dimension = stack_load_float_default(stack, dimension_stack_offset, defaults1.w);
+  float lacunarity = stack_load_float_default(stack, lacunarity_stack_offset, defaults2.x);
+  float foffset = stack_load_float_default(stack, offset_stack_offset, defaults2.y);
+  float gain = stack_load_float_default(stack, gain_stack_offset, defaults2.z);
+
+  dimension = fmaxf(dimension, 1e-5f);
+  detail = clamp(detail, 0.0f, 16.0f);
+  lacunarity = fmaxf(lacunarity, 1e-5f);
+
+  float fac;
+
+  switch (dimensions) {
+    case 1: {
+      float p = w * scale;
+      switch ((NodeMusgraveType)type) {
+        case NODE_MUSGRAVE_MULTIFRACTAL:
+          fac = noise_musgrave_multi_fractal_1d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_FBM:
+          fac = noise_musgrave_fBm_1d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+          fac = noise_musgrave_hybrid_multi_fractal_1d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+          fac = noise_musgrave_ridged_multi_fractal_1d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_HETERO_TERRAIN:
+          fac = noise_musgrave_hetero_terrain_1d(p, dimension, lacunarity, detail, foffset);
+          break;
+        default:
+          fac = 0.0f;
+      }
+      break;
+    }
+    case 2: {
+      float2 p = make_float2(co.x, co.y) * scale;
+      switch ((NodeMusgraveType)type) {
+        case NODE_MUSGRAVE_MULTIFRACTAL:
+          fac = noise_musgrave_multi_fractal_2d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_FBM:
+          fac = noise_musgrave_fBm_2d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+          fac = noise_musgrave_hybrid_multi_fractal_2d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+          fac = noise_musgrave_ridged_multi_fractal_2d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_HETERO_TERRAIN:
+          fac = noise_musgrave_hetero_terrain_2d(p, dimension, lacunarity, detail, foffset);
+          break;
+        default:
+          fac = 0.0f;
+      }
+      break;
+    }
+    case 3: {
+      float3 p = co * scale;
+      switch ((NodeMusgraveType)type) {
+        case NODE_MUSGRAVE_MULTIFRACTAL:
+          fac = noise_musgrave_multi_fractal_3d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_FBM:
+          fac = noise_musgrave_fBm_3d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+          fac = noise_musgrave_hybrid_multi_fractal_3d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+          fac = noise_musgrave_ridged_multi_fractal_3d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_HETERO_TERRAIN:
+          fac = noise_musgrave_hetero_terrain_3d(p, dimension, lacunarity, detail, foffset);
+          break;
+        default:
+          fac = 0.0f;
+      }
+      break;
+    }
+    case 4: {
+      float4 p = make_float4(co.x, co.y, co.z, w) * scale;
+      switch ((NodeMusgraveType)type) {
+        case NODE_MUSGRAVE_MULTIFRACTAL:
+          fac = noise_musgrave_multi_fractal_4d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_FBM:
+          fac = noise_musgrave_fBm_4d(p, dimension, lacunarity, detail);
+          break;
+        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
+          fac = noise_musgrave_hybrid_multi_fractal_4d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
+          fac = noise_musgrave_ridged_multi_fractal_4d(
+              p, dimension, lacunarity, detail, foffset, gain);
+          break;
+        case NODE_MUSGRAVE_HETERO_TERRAIN:
+          fac = noise_musgrave_hetero_terrain_4d(p, dimension, lacunarity, detail, foffset);
+          break;
+        default:
+          fac = 0.0f;
+      }
+      break;
+    }
+    default:
+      fac = 0.0f;
+  }
+
+  stack_store_float(stack, fac_stack_offset, fac);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/noise.h b/intern/cycles/kernel/svm/noise.h
new file mode 100644
index 00000000000..0a1616226db
--- /dev/null
+++ b/intern/cycles/kernel/svm/noise.h
@@ -0,0 +1,744 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* **** Perlin Noise **** */
+
+ccl_device float fade(float t)
+{
+  return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+}
+
+ccl_device_inline float negate_if(float val, int condition)
+{
+  return (condition) ? -val : val;
+}
+
+ccl_device float grad1(int hash, float x)
+{
+  int h = hash & 15;
+  float g = 1 + (h & 7);
+  return negate_if(g, h & 8) * x;
+}
+
+ccl_device_noinline_cpu float perlin_1d(float x)
+{
+  int X;
+  float fx = floorfrac(x, &X);
+  float u = fade(fx);
+
+  return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
+}
+
+/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
+ * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
+ * supported, we do a standard implementation, but if it is supported, we
+ * do an implementation using SSE intrinsics.
+ */
+#if !defined(__KERNEL_SSE2__)
+
+/* ** Standard Implementation ** */
+
+/* Bilinear Interpolation:
+ *
+ * v2          v3
+ *  @ + + + + @       y
+ *  +         +       ^
+ *  +         +       |
+ *  +         +       |
+ *  @ + + + + @       @------> x
+ * v0          v1
+ *
+ */
+ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
+{
+  float x1 = 1.0f - x;
+  return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
+}
+
+/* Trilinear Interpolation:
+ *
+ *   v6               v7
+ *     @ + + + + + + @
+ *     +\            +\
+ *     + \           + \
+ *     +  \          +  \
+ *     +   \ v4      +   \ v5
+ *     +    @ + + + +++ + @          z
+ *     +    +        +    +      y   ^
+ *  v2 @ + +++ + + + @ v3 +       \  |
+ *      \   +         \   +        \ |
+ *       \  +          \  +         \|
+ *        \ +           \ +          +---------> x
+ *         \+            \+
+ *          @ + + + + + + @
+ *        v0               v1
+ */
+ccl_device float tri_mix(float v0,
+                         float v1,
+                         float v2,
+                         float v3,
+                         float v4,
+                         float v5,
+                         float v6,
+                         float v7,
+                         float x,
+                         float y,
+                         float z)
+{
+  float x1 = 1.0f - x;
+  float y1 = 1.0f - y;
+  float z1 = 1.0f - z;
+  return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
+         z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
+}
+
+ccl_device float quad_mix(float v0,
+                          float v1,
+                          float v2,
+                          float v3,
+                          float v4,
+                          float v5,
+                          float v6,
+                          float v7,
+                          float v8,
+                          float v9,
+                          float v10,
+                          float v11,
+                          float v12,
+                          float v13,
+                          float v14,
+                          float v15,
+                          float x,
+                          float y,
+                          float z,
+                          float w)
+{
+  return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
+             tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
+             w);
+}
+
+ccl_device float grad2(int hash, float x, float y)
+{
+  int h = hash & 7;
+  float u = h < 4 ? x : y;
+  float v = 2.0f * (h < 4 ? y : x);
+  return negate_if(u, h & 1) + negate_if(v, h & 2);
+}
+
+ccl_device float grad3(int hash, float x, float y, float z)
+{
+  int h = hash & 15;
+  float u = h < 8 ? x : y;
+  float vt = ((h == 12) || (h == 14)) ? x : z;
+  float v = h < 4 ? y : vt;
+  return negate_if(u, h & 1) + negate_if(v, h & 2);
+}
+
+ccl_device float grad4(int hash, float x, float y, float z, float w)
+{
+  int h = hash & 31;
+  float u = h < 24 ? x : y;
+  float v = h < 16 ? y : z;
+  float s = h < 8 ? z : w;
+  return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
+}
+
+ccl_device_noinline_cpu float perlin_2d(float x, float y)
+{
+  int X;
+  int Y;
+
+  float fx = floorfrac(x, &X);
+  float fy = floorfrac(y, &Y);
+
+  float u = fade(fx);
+  float v = fade(fy);
+
+  float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
+                   grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
+                   grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
+                   grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
+                   u,
+                   v);
+
+  return r;
+}
+
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
+{
+  int X;
+  int Y;
+  int Z;
+
+  float fx = floorfrac(x, &X);
+  float fy = floorfrac(y, &Y);
+  float fz = floorfrac(z, &Z);
+
+  float u = fade(fx);
+  float v = fade(fy);
+  float w = fade(fz);
+
+  float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
+                    grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
+                    grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
+                    grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
+                    grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
+                    grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
+                    grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
+                    grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
+                    u,
+                    v,
+                    w);
+  return r;
+}
+
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
+{
+  int X;
+  int Y;
+  int Z;
+  int W;
+
+  float fx = floorfrac(x, &X);
+  float fy = floorfrac(y, &Y);
+  float fz = floorfrac(z, &Z);
+  float fw = floorfrac(w, &W);
+
+  float u = fade(fx);
+  float v = fade(fy);
+  float t = fade(fz);
+  float s = fade(fw);
+
+  float r = quad_mix(
+      grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
+      grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
+      grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
+      grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
+      grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
+      grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
+      grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
+      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
+      grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
+      grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
+      grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
+      grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
+      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
+      u,
+      v,
+      t,
+      s);
+
+  return r;
+}
+
+#else /* SSE is supported. */
+
+/* ** SSE Implementation ** */
+
+/* SSE Bilinear Interpolation:
+ *
+ * The function takes two ssef inputs:
+ * - p : Contains the values at the points (v0, v1, v2, v3).
+ * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
+ *
+ * The interpolation is done in two steps:
+ * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
+ *    (v2, v3) is generated by moving v2 and v3 to the first and second
+ *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ *    fourth values are unused.
+ * 2. Interpolate g0 and g1 along the y axis to get the final value.
+ *    g1 is generated by populating an ssef with the second value of g.
+ *    Only the first value is important in the final ssef.
+ *
+ * v1          v3          g1
+ *  @ + + + + @            @                    y
+ *  +         +     (1)    +    (2)             ^
+ *  +         +     --->   +    --->   final    |
+ *  +         +            +                    |
+ *  @ + + + + @            @                    @------> x
+ * v0          v2          g0
+ *
+ */
+ccl_device_inline ssef bi_mix(ssef p, ssef f)
+{
+  ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
+  return mix(g, shuffle<1>(g), shuffle<1>(f));
+}
+
+ccl_device_inline ssef fade(const ssef &t)
+{
+  ssef a = madd(t, 6.0f, -15.0f);
+  ssef b = madd(t, a, 10.0f);
+  return (t * t) * (t * b);
+}
+
+/* Negate val if the nth bit of h is 1. */
+#  define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
+
+ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
+{
+  ssei h = hash & 7;
+  ssef u = select(h < 4, x, y);
+  ssef v = 2.0f * select(h < 4, y, x);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once:
+ *
+ *    Point  Offset from v0
+ *     v0       (0, 0)
+ *     v1       (0, 1)
+ *     v2       (1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
+ *     v3       (1, 1)         ^
+ *               |  |__________|       (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
+ *               |                          ^
+ *               |__________________________|
+ *
+ */
+ccl_device_noinline_cpu float perlin_2d(float x, float y)
+{
+  ssei XY;
+  ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
+  ssef uv = fade(fxy);
+
+  ssei XY1 = XY + 1;
+  ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
+  ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
+
+  ssei h = hash_ssei2(X, Y);
+
+  ssef fxy1 = fxy - 1.0f;
+  ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
+  ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
+
+  ssef g = grad(h, fx, fy);
+
+  return extract<0>(bi_mix(g, uv));
+}
+
+/* SSE Trilinear Interpolation:
+ *
+ * The function takes three ssef inputs:
+ * - p : Contains the values at the points (v0, v1, v2, v3).
+ * - q : Contains the values at the points (v4, v5, v6, v7).
+ * - f : Contains the values (x, y, z, _). The fourth value is unused.
+ *
+ * The interpolation is done in three steps:
+ * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
+ * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
+ *    (s2, s3) is generated by moving v2 and v3 to the first and second
+ *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
+ *    fourth values are unused.
+ * 3. Interpolate g0 and g1 along the z axis to get the final value.
+ *    g1 is generated by populating an ssef with the second value of g.
+ *    Only the first value is important in the final ssef.
+ *
+ *   v3               v7
+ *     @ + + + + + + @               s3 @
+ *     +\            +\                 +\
+ *     + \           + \                + \
+ *     +  \          +  \               +  \             g1
+ *     +   \ v1      +   \ v5           +   \ s1         @
+ *     +    @ + + + +++ + @             +    @           +                     z
+ *     +    +        +    +    (1)      +    +    (2)    +   (3)           y   ^
+ *  v2 @ + +++ + + + @ v6 +    --->  s2 @    +    --->   +   --->  final    \  |
+ *      \   +         \   +              \   +           +                   \ |
+ *       \  +          \  +               \  +           +                    \|
+ *        \ +           \ +                \ +           @                     +---------> x
+ *         \+            \+                 \+           g0
+ *          @ + + + + + + @                  @
+ *        v0               v4                 s0
+ */
+ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
+{
+  ssef s = mix(p, q, shuffle<0>(f));
+  ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
+  return mix(g, shuffle<1>(g), shuffle<2>(f));
+}
+
+/* 3D and 4D noise can be accelerated using AVX, so we first check if AVX
+ * is supported, that is, if __KERNEL_AVX__ is defined. If it is not
+ * supported, we do an SSE implementation, but if it is supported,
+ * we do an implementation using AVX intrinsics.
+ */
+#  if !defined(__KERNEL_AVX__)
+
+ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
+{
+  ssei h = hash & 15;
+  ssef u = select(h < 8, x, y);
+  ssef vt = select((h == 12) | (h == 14), x, z);
+  ssef v = select(h < 4, y, vt);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+ccl_device_inline ssef
+grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
+{
+  ssei h = hash & 31;
+  ssef u = select(h < 24, x, y);
+  ssef v = select(h < 16, y, z);
+  ssef s = select(h < 8, z, w);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
+}
+
+/* SSE Quadrilinear Interpolation:
+ *
+ * Quadrilinear interpolation is as simple as a linear interpolation
+ * between two trilinear interpolations.
+ *
+ */
+ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
+{
+  return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
+ * gradients in 3D, we need to compute two sets of gradients at the points:
+ *
+ *    Point  Offset from v0
+ *     v0      (0, 0, 0)
+ *     v1      (0, 0, 1)
+ *     v2      (0, 1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ *     v3      (0, 1, 1)         ^
+ *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ *                 |                          ^
+ *                 |__________________________|
+ *
+ *    Point  Offset from v0
+ *     v4      (1, 0, 0)
+ *     v5      (1, 0, 1)
+ *     v6      (1, 1, 0)
+ *     v7      (1, 1, 1)
+ *
+ */
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
+{
+  ssei XYZ;
+  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
+  ssef uvw = fade(fxyz);
+
+  ssei XYZ1 = XYZ + 1;
+  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+
+  ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
+  ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
+
+  ssef fxyz1 = fxyz - 1.0f;
+  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+
+  ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
+  ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
+
+  return extract<0>(tri_mix(g1, g2, uvw));
+}
+
+/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
+ * gradients in 4D, we need to compute four sets of gradients at the points:
+ *
+ *    Point  Offset from v0
+ *     v0     (0, 0, 0, 0)
+ *     v1     (0, 0, 1, 0)
+ *     v2     (0, 1, 0, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ *     v3     (0, 1, 1, 0)    ^
+ *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ *                |                       ^
+ *                |_______________________|
+ *
+ *    Point  Offset from v0
+ *     v4     (1, 0, 0, 0)
+ *     v5     (1, 0, 1, 0)
+ *     v6     (1, 1, 0, 0)
+ *     v7     (1, 1, 1, 0)
+ *
+ *    Point  Offset from v0
+ *     v8     (0, 0, 0, 1)
+ *     v9     (0, 0, 1, 1)
+ *     v10    (0, 1, 0, 1)
+ *     v11    (0, 1, 1, 1)
+ *
+ *    Point  Offset from v0
+ *     v12    (1, 0, 0, 1)
+ *     v13    (1, 0, 1, 1)
+ *     v14    (1, 1, 0, 1)
+ *     v15    (1, 1, 1, 1)
+ *
+ */
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
+{
+  ssei XYZW;
+  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
+  ssef uvws = fade(fxyzw);
+
+  ssei XYZW1 = XYZW + 1;
+  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+
+  ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
+  ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
+
+  ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
+  ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
+
+  ssef fxyzw1 = fxyzw - 1.0f;
+  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+
+  ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
+  ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
+
+  ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
+  ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
+
+  return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
+}
+
+#  else /* AVX is supported. */
+
+/* AVX Implementation */
+
+ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z)
+{
+  avxi h = hash & 15;
+  avxf u = select(h < 8, x, y);
+  avxf vt = select((h == 12) | (h == 14), x, z);
+  avxf v = select(h < 4, y, vt);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
+}
+
+ccl_device_inline avxf
+grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w)
+{
+  avxi h = hash & 31;
+  avxf u = select(h < 24, x, y);
+  avxf v = select(h < 16, y, z);
+  avxf s = select(h < 8, z, w);
+  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
+}
+
+/* SSE Quadrilinear Interpolation:
+ *
+ * The interpolation is done in two steps:
+ * 1. Interpolate p and q along the w axis to get s.
+ * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
+ *    value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
+ *    low and high ssef from s.
+ *
+ */
+ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
+{
+  ssef fv = shuffle<3>(f);
+  avxf s = mix(p, q, avxf(fv, fv));
+  return tri_mix(low(s), high(s), f);
+}
+
+/* We use AVX to compute and interpolate 8 gradients at once.
+ *
+ *    Point  Offset from v0
+ *     v0      (0, 0, 0)
+ *     v1      (0, 0, 1)    The full AVX type is computed by inserting the following
+ *     v2      (0, 1, 0)    SSE types into both the low and high parts of the AVX.
+ *     v3      (0, 1, 1)
+ *     v4      (1, 0, 0)
+ *     v5      (1, 0, 1)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ *     v6      (1, 1, 0)         ^
+ *     v7      (1, 1, 1)         |
+ *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ *                 |                          ^
+ *                 |__________________________|
+ *
+ */
+ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
+{
+  ssei XYZ;
+  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
+  ssef uvw = fade(fxyz);
+
+  ssei XYZ1 = XYZ + 1;
+  ssei X = shuffle<0>(XYZ);
+  ssei X1 = shuffle<0>(XYZ1);
+  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
+  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
+
+  avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z));
+
+  ssef fxyz1 = fxyz - 1.0f;
+  ssef fx = shuffle<0>(fxyz);
+  ssef fx1 = shuffle<0>(fxyz1);
+  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
+  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
+
+  avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz));
+
+  return extract<0>(tri_mix(low(g), high(g), uvw));
+}
+
+/* We use AVX to compute and interpolate 8 gradients at once. Since we have 16
+ * gradients in 4D, we need to compute two sets of gradients at the points:
+ *
+ *    Point  Offset from v0
+ *     v0     (0, 0, 0, 0)
+ *     v1     (0, 0, 1, 0)  The full AVX type is computed by inserting the following
+ *     v2     (0, 1, 0, 0)  SSE types into both the low and high parts of the AVX.
+ *     v3     (0, 1, 1, 0)
+ *     v4     (1, 0, 0, 0)
+ *     v5     (1, 0, 1, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
+ *     v6     (1, 1, 0, 0)    ^
+ *     v7     (1, 1, 1, 0)    |
+ *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
+ *                |                       ^
+ *                |_______________________|
+ *
+ *    Point  Offset from v0
+ *     v8     (0, 0, 0, 1)
+ *     v9     (0, 0, 1, 1)
+ *     v10    (0, 1, 0, 1)
+ *     v11    (0, 1, 1, 1)
+ *     v12    (1, 0, 0, 1)
+ *     v13    (1, 0, 1, 1)
+ *     v14    (1, 1, 0, 1)
+ *     v15    (1, 1, 1, 1)
+ *
+ */
+ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
+{
+  ssei XYZW;
+  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
+  ssef uvws = fade(fxyzw);
+
+  ssei XYZW1 = XYZW + 1;
+  ssei X = shuffle<0>(XYZW);
+  ssei X1 = shuffle<0>(XYZW1);
+  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
+  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
+  ssei W = shuffle<3>(XYZW);
+  ssei W1 = shuffle<3>(XYZW1);
+
+  avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W));
+  avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1));
+
+  ssef fxyzw1 = fxyzw - 1.0f;
+  ssef fx = shuffle<0>(fxyzw);
+  ssef fx1 = shuffle<0>(fxyzw1);
+  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
+  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
+  ssef fw = shuffle<3>(fxyzw);
+  ssef fw1 = shuffle<3>(fxyzw1);
+
+  avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw));
+  avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1));
+
+  return extract<0>(quad_mix(g1, g2, uvws));
+}
+#  endif
+
+#  undef negate_if_nth_bit
+
+#endif
+
+/* Remap the output of noise to a predictable range [-1, 1].
+ * The scale values were computed experimentally by the OSL developers.
+ */
+
+ccl_device_inline float noise_scale1(float result)
+{
+  return 0.2500f * result;
+}
+
+ccl_device_inline float noise_scale2(float result)
+{
+  return 0.6616f * result;
+}
+
+ccl_device_inline float noise_scale3(float result)
+{
+  return 0.9820f * result;
+}
+
+ccl_device_inline float noise_scale4(float result)
+{
+  return 0.8344f * result;
+}
+
+/* Safe Signed And Unsigned Noise */
+
+ccl_device_inline float snoise_1d(float p)
+{
+  return noise_scale1(ensure_finite(perlin_1d(p)));
+}
+
+ccl_device_inline float noise_1d(float p)
+{
+  return 0.5f * snoise_1d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_2d(float2 p)
+{
+  return noise_scale2(ensure_finite(perlin_2d(p.x, p.y)));
+}
+
+ccl_device_inline float noise_2d(float2 p)
+{
+  return 0.5f * snoise_2d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_3d(float3 p)
+{
+  return noise_scale3(ensure_finite(perlin_3d(p.x, p.y, p.z)));
+}
+
+ccl_device_inline float noise_3d(float3 p)
+{
+  return 0.5f * snoise_3d(p) + 0.5f;
+}
+
+ccl_device_inline float snoise_4d(float4 p)
+{
+  return noise_scale4(ensure_finite(perlin_4d(p.x, p.y, p.z, p.w)));
+}
+
+ccl_device_inline float noise_4d(float4 p)
+{
+  return 0.5f * snoise_4d(p) + 0.5f;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/noisetex.h b/intern/cycles/kernel/svm/noisetex.h
new file mode 100644
index 00000000000..c43c3b9f9d2
--- /dev/null
+++ b/intern/cycles/kernel/svm/noisetex.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/svm/fractal_noise.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* The following offset functions generate random offsets to be added to texture
+ * coordinates to act as a seed since the noise functions don't have seed values.
+ * A seed value is needed for generating distortion textures and color outputs.
+ * The offset's components are in the range [100, 200], not too high to cause
+ * bad precision and not too small to be noticeable. We use float seed because
+ * OSL only support float hashes.
+ */
+
+ccl_device_inline float random_float_offset(float seed)
+{
+  return 100.0f + hash_float_to_float(seed) * 100.0f;
+}
+
+ccl_device_inline float2 random_float2_offset(float seed)
+{
+  return make_float2(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f);
+}
+
+ccl_device_inline float3 random_float3_offset(float seed)
+{
+  return make_float3(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f);
+}
+
+ccl_device_inline float4 random_float4_offset(float seed)
+{
+  return make_float4(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f,
+                     100.0f + hash_float2_to_float(make_float2(seed, 3.0f)) * 100.0f);
+}
+
+ccl_device void noise_texture_1d(float co,
+                                 float detail,
+                                 float roughness,
+                                 float distortion,
+                                 bool color_is_needed,
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
+{
+  float p = co;
+  if (distortion != 0.0f) {
+    p += snoise_1d(p + random_float_offset(0.0f)) * distortion;
+  }
+
+  *value = fractal_noise_1d(p, detail, roughness);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_1d(p + random_float_offset(1.0f), detail, roughness),
+                         fractal_noise_1d(p + random_float_offset(2.0f), detail, roughness));
+  }
+}
+
+ccl_device void noise_texture_2d(float2 co,
+                                 float detail,
+                                 float roughness,
+                                 float distortion,
+                                 bool color_is_needed,
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
+{
+  float2 p = co;
+  if (distortion != 0.0f) {
+    p += make_float2(snoise_2d(p + random_float2_offset(0.0f)) * distortion,
+                     snoise_2d(p + random_float2_offset(1.0f)) * distortion);
+  }
+
+  *value = fractal_noise_2d(p, detail, roughness);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_2d(p + random_float2_offset(2.0f), detail, roughness),
+                         fractal_noise_2d(p + random_float2_offset(3.0f), detail, roughness));
+  }
+}
+
+ccl_device void noise_texture_3d(float3 co,
+                                 float detail,
+                                 float roughness,
+                                 float distortion,
+                                 bool color_is_needed,
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
+{
+  float3 p = co;
+  if (distortion != 0.0f) {
+    p += make_float3(snoise_3d(p + random_float3_offset(0.0f)) * distortion,
+                     snoise_3d(p + random_float3_offset(1.0f)) * distortion,
+                     snoise_3d(p + random_float3_offset(2.0f)) * distortion);
+  }
+
+  *value = fractal_noise_3d(p, detail, roughness);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_3d(p + random_float3_offset(3.0f), detail, roughness),
+                         fractal_noise_3d(p + random_float3_offset(4.0f), detail, roughness));
+  }
+}
+
+ccl_device void noise_texture_4d(float4 co,
+                                 float detail,
+                                 float roughness,
+                                 float distortion,
+                                 bool color_is_needed,
+                                 ccl_private float *value,
+                                 ccl_private float3 *color)
+{
+  float4 p = co;
+  if (distortion != 0.0f) {
+    p += make_float4(snoise_4d(p + random_float4_offset(0.0f)) * distortion,
+                     snoise_4d(p + random_float4_offset(1.0f)) * distortion,
+                     snoise_4d(p + random_float4_offset(2.0f)) * distortion,
+                     snoise_4d(p + random_float4_offset(3.0f)) * distortion);
+  }
+
+  *value = fractal_noise_4d(p, detail, roughness);
+  if (color_is_needed) {
+    *color = make_float3(*value,
+                         fractal_noise_4d(p + random_float4_offset(4.0f), detail, roughness),
+                         fractal_noise_4d(p + random_float4_offset(5.0f), detail, roughness));
+  }
+}
+
+ccl_device_noinline int svm_node_tex_noise(KernelGlobals kg,
+                                           ccl_private ShaderData *sd,
+                                           ccl_private float *stack,
+                                           uint dimensions,
+                                           uint offsets1,
+                                           uint offsets2,
+                                           int offset)
+{
+  uint vector_stack_offset, w_stack_offset, scale_stack_offset;
+  uint detail_stack_offset, roughness_stack_offset, distortion_stack_offset;
+  uint value_stack_offset, color_stack_offset;
+
+  svm_unpack_node_uchar4(
+      offsets1, &vector_stack_offset, &w_stack_offset, &scale_stack_offset, &detail_stack_offset);
+  svm_unpack_node_uchar4(offsets2,
+                         &roughness_stack_offset,
+                         &distortion_stack_offset,
+                         &value_stack_offset,
+                         &color_stack_offset);
+
+  uint4 defaults1 = read_node(kg, &offset);
+  uint4 defaults2 = read_node(kg, &offset);
+
+  float3 vector = stack_load_float3(stack, vector_stack_offset);
+  float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
+  float scale = stack_load_float_default(stack, scale_stack_offset, defaults1.y);
+  float detail = stack_load_float_default(stack, detail_stack_offset, defaults1.z);
+  float roughness = stack_load_float_default(stack, roughness_stack_offset, defaults1.w);
+  float distortion = stack_load_float_default(stack, distortion_stack_offset, defaults2.x);
+
+  vector *= scale;
+  w *= scale;
+
+  float value;
+  float3 color;
+  switch (dimensions) {
+    case 1:
+      noise_texture_1d(
+          w, detail, roughness, distortion, stack_valid(color_stack_offset), &value, &color);
+      break;
+    case 2:
+      noise_texture_2d(make_float2(vector.x, vector.y),
+                       detail,
+                       roughness,
+                       distortion,
+                       stack_valid(color_stack_offset),
+                       &value,
+                       &color);
+      break;
+    case 3:
+      noise_texture_3d(
+          vector, detail, roughness, distortion, stack_valid(color_stack_offset), &value, &color);
+      break;
+    case 4:
+      noise_texture_4d(make_float4(vector.x, vector.y, vector.z, w),
+                       detail,
+                       roughness,
+                       distortion,
+                       stack_valid(color_stack_offset),
+                       &value,
+                       &color);
+      break;
+    default:
+      kernel_assert(0);
+  }
+
+  if (stack_valid(value_stack_offset)) {
+    stack_store_float(stack, value_stack_offset, value);
+  }
+  if (stack_valid(color_stack_offset)) {
+    stack_store_float3(stack, color_stack_offset, color);
+  }
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/normal.h b/intern/cycles/kernel/svm/normal.h
new file mode 100644
index 00000000000..6a2d88b68a6
--- /dev/null
+++ b/intern/cycles/kernel/svm/normal.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline int svm_node_normal(KernelGlobals kg,
+                                        ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_normal_offset,
+                                        uint out_normal_offset,
+                                        uint out_dot_offset,
+                                        int offset)
+{
+  /* read extra data */
+  uint4 node1 = read_node(kg, &offset);
+  float3 normal = stack_load_float3(stack, in_normal_offset);
+
+  float3 direction;
+  direction.x = __int_as_float(node1.x);
+  direction.y = __int_as_float(node1.y);
+  direction.z = __int_as_float(node1.z);
+  direction = normalize(direction);
+
+  if (stack_valid(out_normal_offset))
+    stack_store_float3(stack, out_normal_offset, direction);
+
+  if (stack_valid(out_dot_offset))
+    stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/ramp.h b/intern/cycles/kernel/svm/ramp.h
new file mode 100644
index 00000000000..1dc3383956d
--- /dev/null
+++ b/intern/cycles/kernel/svm/ramp.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
+
+ccl_device_inline float fetch_float(KernelGlobals kg, int offset)
+{
+  uint4 node = kernel_tex_fetch(__svm_nodes, offset);
+  return __uint_as_float(node.x);
+}
+
+ccl_device_inline float float_ramp_lookup(
+    KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
+{
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float t0, dy;
+    if (f < 0.0f) {
+      t0 = fetch_float(kg, offset);
+      dy = t0 - fetch_float(kg, offset + 1);
+      f = -f;
+    }
+    else {
+      t0 = fetch_float(kg, offset + table_size - 1);
+      dy = t0 - fetch_float(kg, offset + table_size - 2);
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = saturate(f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float a = fetch_float(kg, offset + i);
+
+  if (interpolate && t > 0.0f)
+    a = (1.0f - t) * a + t * fetch_float(kg, offset + i + 1);
+
+  return a;
+}
+
+ccl_device_inline float4 rgb_ramp_lookup(
+    KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
+{
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float4 t0, dy;
+    if (f < 0.0f) {
+      t0 = fetch_node_float(kg, offset);
+      dy = t0 - fetch_node_float(kg, offset + 1);
+      f = -f;
+    }
+    else {
+      t0 = fetch_node_float(kg, offset + table_size - 1);
+      dy = t0 - fetch_node_float(kg, offset + table_size - 2);
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = saturate(f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float4 a = fetch_node_float(kg, offset + i);
+
+  if (interpolate && t > 0.0f)
+    a = (1.0f - t) * a + t * fetch_node_float(kg, offset + i + 1);
+
+  return a;
+}
+
+ccl_device_noinline int svm_node_rgb_ramp(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint fac_offset, color_offset, alpha_offset;
+  uint interpolate = node.z;
+
+  svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset);
+
+  uint table_size = read_node(kg, &offset).x;
+
+  float fac = stack_load_float(stack, fac_offset);
+  float4 color = rgb_ramp_lookup(kg, offset, fac, interpolate, false, table_size);
+
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, float4_to_float3(color));
+  if (stack_valid(alpha_offset))
+    stack_store_float(stack, alpha_offset, color.w);
+
+  offset += table_size;
+  return offset;
+}
+
+ccl_device_noinline int svm_node_curves(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint fac_offset, color_offset, out_offset;
+  svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
+
+  uint table_size = read_node(kg, &offset).x;
+
+  float fac = stack_load_float(stack, fac_offset);
+  float3 color = stack_load_float3(stack, color_offset);
+
+  const float min_x = __int_as_float(node.z), max_x = __int_as_float(node.w);
+  const float range_x = max_x - min_x;
+  const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
+
+  float r = rgb_ramp_lookup(kg, offset, relpos.x, true, true, table_size).x;
+  float g = rgb_ramp_lookup(kg, offset, relpos.y, true, true, table_size).y;
+  float b = rgb_ramp_lookup(kg, offset, relpos.z, true, true, table_size).z;
+
+  color = (1.0f - fac) * color + fac * make_float3(r, g, b);
+  stack_store_float3(stack, out_offset, color);
+
+  offset += table_size;
+  return offset;
+}
+
+ccl_device_noinline int svm_node_curve(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint fac_offset, value_in_offset, out_offset;
+  svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset);
+
+  uint table_size = read_node(kg, &offset).x;
+
+  float fac = stack_load_float(stack, fac_offset);
+  float in = stack_load_float(stack, value_in_offset);
+
+  const float min = __int_as_float(node.z), max = __int_as_float(node.w);
+  const float range = max - min;
+  const float relpos = (in - min) / range;
+
+  float v = float_ramp_lookup(kg, offset, relpos, true, true, table_size);
+
+  in = (1.0f - fac) * in + fac * v;
+  stack_store_float(stack, out_offset, in);
+
+  offset += table_size;
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/ramp_util.h b/intern/cycles/kernel/svm/ramp_util.h
new file mode 100644
index 00000000000..f5951f7e283
--- /dev/null
+++ b/intern/cycles/kernel/svm/ramp_util.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
+
+ccl_device_inline float3
+rgb_ramp_lookup(const float3 *ramp, float f, bool interpolate, bool extrapolate, int table_size)
+{
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float3 t0, dy;
+    if (f < 0.0f) {
+      t0 = ramp[0];
+      dy = t0 - ramp[1], f = -f;
+    }
+    else {
+      t0 = ramp[table_size - 1];
+      dy = t0 - ramp[table_size - 2];
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float3 result = ramp[i];
+
+  if (interpolate && t > 0.0f) {
+    result = (1.0f - t) * result + t * ramp[i + 1];
+  }
+
+  return result;
+}
+
+ccl_device float float_ramp_lookup(
+    const float *ramp, float f, bool interpolate, bool extrapolate, int table_size)
+{
+  if ((f < 0.0f || f > 1.0f) && extrapolate) {
+    float t0, dy;
+    if (f < 0.0f) {
+      t0 = ramp[0];
+      dy = t0 - ramp[1], f = -f;
+    }
+    else {
+      t0 = ramp[table_size - 1];
+      dy = t0 - ramp[table_size - 2];
+      f = f - 1.0f;
+    }
+    return t0 + dy * f * (table_size - 1);
+  }
+
+  f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
+
+  /* clamp int as well in case of NaN */
+  int i = clamp(float_to_int(f), 0, table_size - 1);
+  float t = f - (float)i;
+
+  float result = ramp[i];
+
+  if (interpolate && t > 0.0f) {
+    result = (1.0f - t) * result + t * ramp[i + 1];
+  }
+
+  return result;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/sepcomb_hsv.h b/intern/cycles/kernel/svm/sepcomb_hsv.h
new file mode 100644
index 00000000000..941a83e85b3
--- /dev/null
+++ b/intern/cycles/kernel/svm/sepcomb_hsv.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline int svm_node_combine_hsv(KernelGlobals kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint hue_in,
+                                             uint saturation_in,
+                                             uint value_in,
+                                             int offset)
+{
+  uint4 node1 = read_node(kg, &offset);
+  uint color_out = node1.y;
+
+  float hue = stack_load_float(stack, hue_in);
+  float saturation = stack_load_float(stack, saturation_in);
+  float value = stack_load_float(stack, value_in);
+
+  /* Combine, and convert back to RGB */
+  float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
+
+  if (stack_valid(color_out))
+    stack_store_float3(stack, color_out, color);
+  return offset;
+}
+
+ccl_device_noinline int svm_node_separate_hsv(KernelGlobals kg,
+                                              ccl_private ShaderData *sd,
+                                              ccl_private float *stack,
+                                              uint color_in,
+                                              uint hue_out,
+                                              uint saturation_out,
+                                              int offset)
+{
+  uint4 node1 = read_node(kg, &offset);
+  uint value_out = node1.y;
+
+  float3 color = stack_load_float3(stack, color_in);
+
+  /* Convert to HSV */
+  color = rgb_to_hsv(color);
+
+  if (stack_valid(hue_out))
+    stack_store_float(stack, hue_out, color.x);
+  if (stack_valid(saturation_out))
+    stack_store_float(stack, saturation_out, color.y);
+  if (stack_valid(value_out))
+    stack_store_float(stack, value_out, color.z);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/sepcomb_vector.h b/intern/cycles/kernel/svm/sepcomb_vector.h
new file mode 100644
index 00000000000..acdea741aed
--- /dev/null
+++ b/intern/cycles/kernel/svm/sepcomb_vector.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Vector combine / separate, used for the RGB and XYZ nodes */
+
+ccl_device void svm_node_combine_vector(ccl_private ShaderData *sd,
+                                        ccl_private float *stack,
+                                        uint in_offset,
+                                        uint vector_index,
+                                        uint out_offset)
+{
+  float vector = stack_load_float(stack, in_offset);
+
+  if (stack_valid(out_offset))
+    stack_store_float(stack, out_offset + vector_index, vector);
+}
+
+ccl_device void svm_node_separate_vector(ccl_private ShaderData *sd,
+                                         ccl_private float *stack,
+                                         uint ivector_offset,
+                                         uint vector_index,
+                                         uint out_offset)
+{
+  float3 vector = stack_load_float3(stack, ivector_offset);
+
+  if (stack_valid(out_offset)) {
+    if (vector_index == 0)
+      stack_store_float(stack, out_offset, vector.x);
+    else if (vector_index == 1)
+      stack_store_float(stack, out_offset, vector.y);
+    else
+      stack_store_float(stack, out_offset, vector.z);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/sky.h b/intern/cycles/kernel/svm/sky.h
new file mode 100644
index 00000000000..867fdfc2a3f
--- /dev/null
+++ b/intern/cycles/kernel/svm/sky.h
@@ -0,0 +1,335 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Sky texture */
+
+ccl_device float sky_angle_between(float thetav, float phiv, float theta, float phi)
+{
+  float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta);
+  return safe_acosf(cospsi);
+}
+
+/*
+ * "A Practical Analytic Model for Daylight"
+ * A. J. Preetham, Peter Shirley, Brian Smits
+ */
+ccl_device float sky_perez_function(ccl_private float *lam, float theta, float gamma)
+{
+  float ctheta = cosf(theta);
+  float cgamma = cosf(gamma);
+
+  return (1.0f + lam[0] * expf(lam[1] / ctheta)) *
+         (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
+}
+
+ccl_device float3 sky_radiance_preetham(KernelGlobals kg,
+                                        float3 dir,
+                                        float sunphi,
+                                        float suntheta,
+                                        float radiance_x,
+                                        float radiance_y,
+                                        float radiance_z,
+                                        ccl_private float *config_x,
+                                        ccl_private float *config_y,
+                                        ccl_private float *config_z)
+{
+  /* convert vector to spherical coordinates */
+  float2 spherical = direction_to_spherical(dir);
+  float theta = spherical.x;
+  float phi = spherical.y;
+
+  /* angle between sun direction and dir */
+  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+
+  /* clamp theta to horizon */
+  theta = min(theta, M_PI_2_F - 0.001f);
+
+  /* compute xyY color space values */
+  float x = radiance_y * sky_perez_function(config_y, theta, gamma);
+  float y = radiance_z * sky_perez_function(config_z, theta, gamma);
+  float Y = radiance_x * sky_perez_function(config_x, theta, gamma);
+
+  /* convert to RGB */
+  float3 xyz = xyY_to_xyz(x, y, Y);
+  return xyz_to_rgb(kg, xyz);
+}
+
+/*
+ * "An Analytic Model for Full Spectral Sky-Dome Radiance"
+ * Lukas Hosek, Alexander Wilkie
+ */
+ccl_device float sky_radiance_internal(ccl_private float *configuration, float theta, float gamma)
+{
+  float ctheta = cosf(theta);
+  float cgamma = cosf(gamma);
+
+  float expM = expf(configuration[4] * gamma);
+  float rayM = cgamma * cgamma;
+  float mieM = (1.0f + rayM) / powf((1.0f + configuration[8] * configuration[8] -
+                                     2.0f * configuration[8] * cgamma),
+                                    1.5f);
+  float zenith = sqrtf(ctheta);
+
+  return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
+         (configuration[2] + configuration[3] * expM + configuration[5] * rayM +
+          configuration[6] * mieM + configuration[7] * zenith);
+}
+
+ccl_device float3 sky_radiance_hosek(KernelGlobals kg,
+                                     float3 dir,
+                                     float sunphi,
+                                     float suntheta,
+                                     float radiance_x,
+                                     float radiance_y,
+                                     float radiance_z,
+                                     ccl_private float *config_x,
+                                     ccl_private float *config_y,
+                                     ccl_private float *config_z)
+{
+  /* convert vector to spherical coordinates */
+  float2 spherical = direction_to_spherical(dir);
+  float theta = spherical.x;
+  float phi = spherical.y;
+
+  /* angle between sun direction and dir */
+  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
+
+  /* clamp theta to horizon */
+  theta = min(theta, M_PI_2_F - 0.001f);
+
+  /* compute xyz color space values */
+  float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x;
+  float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y;
+  float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z;
+
+  /* convert to RGB and adjust strength */
+  return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683);
+}
+
+/* Nishita improved sky model */
+ccl_device float3 geographical_to_direction(float lat, float lon)
+{
+  return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
+}
+
+ccl_device float3 sky_radiance_nishita(KernelGlobals kg,
+                                       float3 dir,
+                                       ccl_private float *nishita_data,
+                                       uint texture_id)
+{
+  /* definitions */
+  float sun_elevation = nishita_data[6];
+  float sun_rotation = nishita_data[7];
+  float angular_diameter = nishita_data[8];
+  float sun_intensity = nishita_data[9];
+  bool sun_disc = (angular_diameter >= 0.0f);
+  float3 xyz;
+  /* convert dir to spherical coordinates */
+  float2 direction = direction_to_spherical(dir);
+
+  /* render above the horizon */
+  if (dir.z >= 0.0f) {
+    /* definitions */
+    float3 sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2_F);
+    float sun_dir_angle = precise_angle(dir, sun_dir);
+    float half_angular = angular_diameter / 2.0f;
+    float dir_elevation = M_PI_2_F - direction.x;
+
+    /* if ray inside sun disc render it, otherwise render sky */
+    if (sun_disc && sun_dir_angle < half_angular) {
+      /* get 2 pixels data */
+      float3 pixel_bottom = make_float3(nishita_data[0], nishita_data[1], nishita_data[2]);
+      float3 pixel_top = make_float3(nishita_data[3], nishita_data[4], nishita_data[5]);
+      float y;
+
+      /* sun interpolation */
+      if (sun_elevation - half_angular > 0.0f) {
+        if (sun_elevation + half_angular > 0.0f) {
+          y = ((dir_elevation - sun_elevation) / angular_diameter) + 0.5f;
+          xyz = interp(pixel_bottom, pixel_top, y) * sun_intensity;
+        }
+      }
+      else {
+        if (sun_elevation + half_angular > 0.0f) {
+          y = dir_elevation / (sun_elevation + half_angular);
+          xyz = interp(pixel_bottom, pixel_top, y) * sun_intensity;
+        }
+      }
+      /* limb darkening, coefficient is 0.6f */
+      float limb_darkening = (1.0f -
+                              0.6f * (1.0f - sqrtf(1.0f - sqr(sun_dir_angle / half_angular))));
+      xyz *= limb_darkening;
+    }
+    /* sky */
+    else {
+      /* sky interpolation */
+      float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
+      /* more pixels toward horizon compensation */
+      float y = safe_sqrtf(dir_elevation / M_PI_2_F);
+      if (x > 1.0f) {
+        x -= 1.0f;
+      }
+      xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, y));
+    }
+  }
+  /* ground */
+  else {
+    if (dir.z < -0.4f) {
+      xyz = make_float3(0.0f, 0.0f, 0.0f);
+    }
+    else {
+      /* black ground fade */
+      float fade = 1.0f + dir.z * 2.5f;
+      fade = sqr(fade) * fade;
+      /* interpolation */
+      float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
+      if (x > 1.0f) {
+        x -= 1.0f;
+      }
+      xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, -0.5)) * fade;
+    }
+  }
+
+  /* convert to RGB */
+  return xyz_to_rgb(kg, xyz);
+}
+
+ccl_device_noinline int svm_node_tex_sky(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  /* Load data */
+  uint dir_offset = node.y;
+  uint out_offset = node.z;
+  int sky_model = node.w;
+
+  float3 dir = stack_load_float3(stack, dir_offset);
+  float3 f;
+
+  /* Preetham and Hosek share the same data */
+  if (sky_model == 0 || sky_model == 1) {
+    /* Define variables */
+    float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
+    float config_x[9], config_y[9], config_z[9];
+
+    float4 data = read_node_float(kg, &offset);
+    sunphi = data.x;
+    suntheta = data.y;
+    radiance_x = data.z;
+    radiance_y = data.w;
+
+    data = read_node_float(kg, &offset);
+    radiance_z = data.x;
+    config_x[0] = data.y;
+    config_x[1] = data.z;
+    config_x[2] = data.w;
+
+    data = read_node_float(kg, &offset);
+    config_x[3] = data.x;
+    config_x[4] = data.y;
+    config_x[5] = data.z;
+    config_x[6] = data.w;
+
+    data = read_node_float(kg, &offset);
+    config_x[7] = data.x;
+    config_x[8] = data.y;
+    config_y[0] = data.z;
+    config_y[1] = data.w;
+
+    data = read_node_float(kg, &offset);
+    config_y[2] = data.x;
+    config_y[3] = data.y;
+    config_y[4] = data.z;
+    config_y[5] = data.w;
+
+    data = read_node_float(kg, &offset);
+    config_y[6] = data.x;
+    config_y[7] = data.y;
+    config_y[8] = data.z;
+    config_z[0] = data.w;
+
+    data = read_node_float(kg, &offset);
+    config_z[1] = data.x;
+    config_z[2] = data.y;
+    config_z[3] = data.z;
+    config_z[4] = data.w;
+
+    data = read_node_float(kg, &offset);
+    config_z[5] = data.x;
+    config_z[6] = data.y;
+    config_z[7] = data.z;
+    config_z[8] = data.w;
+
+    /* Compute Sky */
+    if (sky_model == 0) {
+      f = sky_radiance_preetham(kg,
+                                dir,
+                                sunphi,
+                                suntheta,
+                                radiance_x,
+                                radiance_y,
+                                radiance_z,
+                                config_x,
+                                config_y,
+                                config_z);
+    }
+    else {
+      f = sky_radiance_hosek(kg,
+                             dir,
+                             sunphi,
+                             suntheta,
+                             radiance_x,
+                             radiance_y,
+                             radiance_z,
+                             config_x,
+                             config_y,
+                             config_z);
+    }
+  }
+  /* Nishita */
+  else {
+    /* Define variables */
+    float nishita_data[10];
+
+    float4 data = read_node_float(kg, &offset);
+    nishita_data[0] = data.x;
+    nishita_data[1] = data.y;
+    nishita_data[2] = data.z;
+    nishita_data[3] = data.w;
+
+    data = read_node_float(kg, &offset);
+    nishita_data[4] = data.x;
+    nishita_data[5] = data.y;
+    nishita_data[6] = data.z;
+    nishita_data[7] = data.w;
+
+    data = read_node_float(kg, &offset);
+    nishita_data[8] = data.x;
+    nishita_data[9] = data.y;
+    uint texture_id = __float_as_uint(data.z);
+
+    /* Compute Sky */
+    f = sky_radiance_nishita(kg, dir, nishita_data, texture_id);
+  }
+
+  stack_store_float3(stack, out_offset, f);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 472f3517839..62ba5bf04e3 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#ifndef __SVM_H__
-#define __SVM_H__
+#pragma once
 
 /* Shader Virtual Machine
  *
@@ -38,7 +37,7 @@
  * mostly taken care of in the SVM compiler.
  */
 
-#include "kernel/svm/svm_types.h"
+#include "kernel/svm/types.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -165,61 +164,54 @@ CCL_NAMESPACE_END
 
 /* Nodes */
 
-#include "kernel/svm/svm_noise.h"
-#include "svm_fractal_noise.h"
-
-#include "kernel/svm/svm_color_util.h"
-#include "kernel/svm/svm_mapping_util.h"
-#include "kernel/svm/svm_math_util.h"
-
-#include "kernel/svm/svm_aov.h"
-#include "kernel/svm/svm_attribute.h"
-#include "kernel/svm/svm_blackbody.h"
-#include "kernel/svm/svm_brick.h"
-#include "kernel/svm/svm_brightness.h"
-#include "kernel/svm/svm_bump.h"
-#include "kernel/svm/svm_camera.h"
-#include "kernel/svm/svm_checker.h"
-#include "kernel/svm/svm_clamp.h"
-#include "kernel/svm/svm_closure.h"
-#include "kernel/svm/svm_convert.h"
-#include "kernel/svm/svm_displace.h"
-#include "kernel/svm/svm_fresnel.h"
-#include "kernel/svm/svm_gamma.h"
-#include "kernel/svm/svm_geometry.h"
-#include "kernel/svm/svm_gradient.h"
-#include "kernel/svm/svm_hsv.h"
-#include "kernel/svm/svm_ies.h"
-#include "kernel/svm/svm_image.h"
-#include "kernel/svm/svm_invert.h"
-#include "kernel/svm/svm_light_path.h"
-#include "kernel/svm/svm_magic.h"
-#include "kernel/svm/svm_map_range.h"
-#include "kernel/svm/svm_mapping.h"
-#include "kernel/svm/svm_math.h"
-#include "kernel/svm/svm_mix.h"
-#include "kernel/svm/svm_musgrave.h"
-#include "kernel/svm/svm_noisetex.h"
-#include "kernel/svm/svm_normal.h"
-#include "kernel/svm/svm_ramp.h"
-#include "kernel/svm/svm_sepcomb_hsv.h"
-#include "kernel/svm/svm_sepcomb_vector.h"
-#include "kernel/svm/svm_sky.h"
-#include "kernel/svm/svm_tex_coord.h"
-#include "kernel/svm/svm_value.h"
-#include "kernel/svm/svm_vector_rotate.h"
-#include "kernel/svm/svm_vector_transform.h"
-#include "kernel/svm/svm_vertex_color.h"
-#include "kernel/svm/svm_voronoi.h"
-#include "kernel/svm/svm_voxel.h"
-#include "kernel/svm/svm_wave.h"
-#include "kernel/svm/svm_wavelength.h"
-#include "kernel/svm/svm_white_noise.h"
-#include "kernel/svm/svm_wireframe.h"
+#include "kernel/svm/aov.h"
+#include "kernel/svm/attribute.h"
+#include "kernel/svm/blackbody.h"
+#include "kernel/svm/brick.h"
+#include "kernel/svm/brightness.h"
+#include "kernel/svm/bump.h"
+#include "kernel/svm/camera.h"
+#include "kernel/svm/checker.h"
+#include "kernel/svm/clamp.h"
+#include "kernel/svm/closure.h"
+#include "kernel/svm/convert.h"
+#include "kernel/svm/displace.h"
+#include "kernel/svm/fresnel.h"
+#include "kernel/svm/gamma.h"
+#include "kernel/svm/geometry.h"
+#include "kernel/svm/gradient.h"
+#include "kernel/svm/hsv.h"
+#include "kernel/svm/ies.h"
+#include "kernel/svm/image.h"
+#include "kernel/svm/invert.h"
+#include "kernel/svm/light_path.h"
+#include "kernel/svm/magic.h"
+#include "kernel/svm/map_range.h"
+#include "kernel/svm/mapping.h"
+#include "kernel/svm/math.h"
+#include "kernel/svm/mix.h"
+#include "kernel/svm/musgrave.h"
+#include "kernel/svm/noisetex.h"
+#include "kernel/svm/normal.h"
+#include "kernel/svm/ramp.h"
+#include "kernel/svm/sepcomb_hsv.h"
+#include "kernel/svm/sepcomb_vector.h"
+#include "kernel/svm/sky.h"
+#include "kernel/svm/tex_coord.h"
+#include "kernel/svm/value.h"
+#include "kernel/svm/vector_rotate.h"
+#include "kernel/svm/vector_transform.h"
+#include "kernel/svm/vertex_color.h"
+#include "kernel/svm/voronoi.h"
+#include "kernel/svm/voxel.h"
+#include "kernel/svm/wave.h"
+#include "kernel/svm/wavelength.h"
+#include "kernel/svm/white_noise.h"
+#include "kernel/svm/wireframe.h"
 
 #ifdef __SHADER_RAYTRACE__
-#  include "kernel/svm/svm_ao.h"
-#  include "kernel/svm/svm_bevel.h"
+#  include "kernel/svm/ao.h"
+#  include "kernel/svm/bevel.h"
 #endif
 
 CCL_NAMESPACE_BEGIN
@@ -607,5 +599,3 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
 }
 
 CCL_NAMESPACE_END
-
-#endif /* __SVM_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
deleted file mode 100644
index 4cfef7bc204..00000000000
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/bvh/bvh.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __SHADER_RAYTRACE__
-
-#  ifdef __KERNEL_OPTIX__
-extern "C" __device__ float __direct_callable__svm_node_ao(
-#  else
-ccl_device float svm_ao(
-#  endif
-    KernelGlobals kg,
-    ConstIntegratorState state,
-    ccl_private ShaderData *sd,
-    float3 N,
-    float max_dist,
-    int num_samples,
-    int flags)
-{
-  if (flags & NODE_AO_GLOBAL_RADIUS) {
-    max_dist = kernel_data.integrator.ao_bounces_distance;
-  }
-
-  /* Early out if no sampling needed. */
-  if (max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
-    return 1.0f;
-  }
-
-  /* Can't raytrace from shaders like displacement, before BVH exists. */
-  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
-    return 1.0f;
-  }
-
-  if (flags & NODE_AO_INSIDE) {
-    N = -N;
-  }
-
-  float3 T, B;
-  make_orthonormals(N, &T, &B);
-
-  /* TODO: support ray-tracing in shadow shader evaluation? */
-  RNGState rng_state;
-  path_state_rng_load(state, &rng_state);
-
-  int unoccluded = 0;
-  for (int sample = 0; sample < num_samples; sample++) {
-    float disk_u, disk_v;
-    path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
-
-    float2 d = concentric_sample_disk(disk_u, disk_v);
-    float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
-
-    /* Create ray. */
-    Ray ray;
-    ray.P = ray_offset(sd->P, N);
-    ray.D = D.x * T + D.y * B + D.z * N;
-    ray.t = max_dist;
-    ray.time = sd->time;
-    ray.dP = differential_zero_compact();
-    ray.dD = differential_zero_compact();
-
-    if (flags & NODE_AO_ONLY_LOCAL) {
-      if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) {
-        unoccluded++;
-      }
-    }
-    else {
-      Intersection isect;
-      if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
-        unoccluded++;
-      }
-    }
-  }
-
-  return ((float)unoccluded) / num_samples;
-}
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-#  if defined(__KERNEL_OPTIX__)
-ccl_device_inline
-#  else
-ccl_device_noinline
-#  endif
-    void
-    svm_node_ao(KernelGlobals kg,
-                ConstIntegratorGenericState state,
-                ccl_private ShaderData *sd,
-                ccl_private float *stack,
-                uint4 node)
-{
-  uint flags, dist_offset, normal_offset, out_ao_offset;
-  svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
-
-  uint color_offset, out_color_offset, samples;
-  svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples);
-
-  float ao = 1.0f;
-
-  IF_KERNEL_NODES_FEATURE(RAYTRACE)
-  {
-    float dist = stack_load_float_default(stack, dist_offset, node.w);
-    float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
-
-#  ifdef __KERNEL_OPTIX__
-    ao = optixDirectCall<float>(0, kg, state, sd, normal, dist, samples, flags);
-#  else
-    ao = svm_ao(kg, state, sd, normal, dist, samples, flags);
-#  endif
-  }
-
-  if (stack_valid(out_ao_offset)) {
-    stack_store_float(stack, out_ao_offset, ao);
-  }
-
-  if (stack_valid(out_color_offset)) {
-    float3 color = stack_load_float3(stack, color_offset);
-    stack_store_float3(stack, out_color_offset, ao * color);
-  }
-}
-
-#endif /* __SHADER_RAYTRACE__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_aov.h b/intern/cycles/kernel/svm/svm_aov.h
deleted file mode 100644
index 92be7fb6906..00000000000
--- a/intern/cycles/kernel/svm/svm_aov.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/film/film_write_passes.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline bool svm_node_aov_check(const uint32_t path_flag,
-                                          ccl_global float *render_buffer)
-{
-  bool is_primary = (path_flag & PATH_RAY_CAMERA) && (!(path_flag & PATH_RAY_SINGLE_PASS_DONE));
-
-  return ((render_buffer != NULL) && is_primary);
-}
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-ccl_device void svm_node_aov_color(KernelGlobals kg,
-                                   ConstIntegratorGenericState state,
-                                   ccl_private ShaderData *sd,
-                                   ccl_private float *stack,
-                                   uint4 node,
-                                   ccl_global float *render_buffer)
-{
-  IF_KERNEL_NODES_FEATURE(AOV)
-  {
-    const float3 val = stack_load_float3(stack, node.y);
-    const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
-    const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
-                                          kernel_data.film.pass_stride;
-    ccl_global float *buffer = render_buffer + render_buffer_offset +
-                               (kernel_data.film.pass_aov_color + node.z);
-    kernel_write_pass_float3(buffer, make_float3(val.x, val.y, val.z));
-  }
-}
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-ccl_device void svm_node_aov_value(KernelGlobals kg,
-                                   ConstIntegratorGenericState state,
-                                   ccl_private ShaderData *sd,
-                                   ccl_private float *stack,
-                                   uint4 node,
-                                   ccl_global float *render_buffer)
-{
-  IF_KERNEL_NODES_FEATURE(AOV)
-  {
-    const float val = stack_load_float(stack, node.y);
-    const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
-    const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
-                                          kernel_data.film.pass_stride;
-    ccl_global float *buffer = render_buffer + render_buffer_offset +
-                               (kernel_data.film.pass_aov_value + node.z);
-    kernel_write_pass_float(buffer, val);
-  }
-}
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
deleted file mode 100644
index b3c66d29f5c..00000000000
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Attribute Node */
-
-ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals kg,
-                                                  ccl_private ShaderData *sd,
-                                                  uint4 node,
-                                                  ccl_private NodeAttributeOutputType *type,
-                                                  ccl_private uint *out_offset)
-{
-  *out_offset = node.z;
-  *type = (NodeAttributeOutputType)node.w;
-
-  AttributeDescriptor desc;
-
-  if (sd->object != OBJECT_NONE) {
-    desc = find_attribute(kg, sd, node.y);
-    if (desc.offset == ATTR_STD_NOT_FOUND) {
-      desc = attribute_not_found();
-      desc.offset = 0;
-      desc.type = (NodeAttributeType)node.w;
-    }
-  }
-  else {
-    /* background */
-    desc = attribute_not_found();
-    desc.offset = 0;
-    desc.type = (NodeAttributeType)node.w;
-  }
-
-  return desc;
-}
-
-template<uint node_feature_mask>
-ccl_device_noinline void svm_node_attr(KernelGlobals kg,
-                                       ccl_private ShaderData *sd,
-                                       ccl_private float *stack,
-                                       uint4 node)
-{
-  NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
-  uint out_offset = 0;
-  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
-
-#ifdef __VOLUME__
-  IF_KERNEL_NODES_FEATURE(VOLUME)
-  {
-    /* Volumes
-     * NOTE: moving this into its own node type might help improve performance. */
-    if (primitive_is_volume_attribute(sd, desc)) {
-      const float4 value = volume_attribute_float4(kg, sd, desc);
-
-      if (type == NODE_ATTR_OUTPUT_FLOAT) {
-        const float f = volume_attribute_value_to_float(value);
-        stack_store_float(stack, out_offset, f);
-      }
-      else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-        const float3 f = volume_attribute_value_to_float3(value);
-        stack_store_float3(stack, out_offset, f);
-      }
-      else {
-        const float f = volume_attribute_value_to_alpha(value);
-        stack_store_float(stack, out_offset, f);
-      }
-      return;
-    }
-  }
-#endif
-
-  if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
-    /* No generated attribute, fall back to object coordinates. */
-    float3 f = sd->P;
-    object_inverse_position_transform(kg, sd, &f);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(f));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, f);
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-    return;
-  }
-
-  /* Surface. */
-  if (desc.type == NODE_ATTR_FLOAT) {
-    float f = primitive_surface_attribute_float(kg, sd, desc, NULL, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, f);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(f, f, f));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-  else if (desc.type == NODE_ATTR_FLOAT2) {
-    float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, f.x);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-  else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
-    float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(float4_to_float3(f)));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, float4_to_float3(f));
-    }
-    else {
-      stack_store_float(stack, out_offset, f.w);
-    }
-  }
-  else {
-    float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(f));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, f);
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-}
-
-ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
-                                               ccl_private ShaderData *sd,
-                                               ccl_private float *stack,
-                                               uint4 node)
-{
-  NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
-  uint out_offset = 0;
-  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
-
-#ifdef __VOLUME__
-  /* Volume */
-  if (primitive_is_volume_attribute(sd, desc)) {
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, 0.0f);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-    return;
-  }
-#endif
-
-  if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
-    /* No generated attribute, fall back to object coordinates. */
-    float3 f = sd->P + sd->dP.dx;
-    object_inverse_position_transform(kg, sd, &f);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(f));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, f);
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-    return;
-  }
-
-  /* Surface */
-  if (desc.type == NODE_ATTR_FLOAT) {
-    float dx;
-    float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, f + dx);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(f + dx, f + dx, f + dx));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-  else if (desc.type == NODE_ATTR_FLOAT2) {
-    float2 dx;
-    float2 f = primitive_surface_attribute_float2(kg, sd, desc, &dx, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, f.x + dx.x);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-  else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
-    float4 dx;
-    float4 f = primitive_surface_attribute_float4(kg, sd, desc, &dx, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(float4_to_float3(f + dx)));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, float4_to_float3(f + dx));
-    }
-    else {
-      stack_store_float(stack, out_offset, f.w + dx.w);
-    }
-  }
-  else {
-    float3 dx;
-    float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(f + dx));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, f + dx);
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-}
-
-ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg,
-                                               ccl_private ShaderData *sd,
-                                               ccl_private float *stack,
-                                               uint4 node)
-{
-  NodeAttributeOutputType type = NODE_ATTR_OUTPUT_FLOAT;
-  uint out_offset = 0;
-  AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
-
-#ifdef __VOLUME__
-  /* Volume */
-  if (primitive_is_volume_attribute(sd, desc)) {
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, 0.0f);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(0.0f, 0.0f, 0.0f));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-    return;
-  }
-#endif
-
-  if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
-    /* No generated attribute, fall back to object coordinates. */
-    float3 f = sd->P + sd->dP.dy;
-    object_inverse_position_transform(kg, sd, &f);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(f));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, f);
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-    return;
-  }
-
-  /* Surface */
-  if (desc.type == NODE_ATTR_FLOAT) {
-    float dy;
-    float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, f + dy);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(f + dy, f + dy, f + dy));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-  else if (desc.type == NODE_ATTR_FLOAT2) {
-    float2 dy;
-    float2 f = primitive_surface_attribute_float2(kg, sd, desc, NULL, &dy);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, f.x + dy.x);
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f));
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-  else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
-    float4 dy;
-    float4 f = primitive_surface_attribute_float4(kg, sd, desc, NULL, &dy);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(float4_to_float3(f + dy)));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, float4_to_float3(f + dy));
-    }
-    else {
-      stack_store_float(stack, out_offset, f.w + dy.w);
-    }
-  }
-  else {
-    float3 dy;
-    float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy);
-    if (type == NODE_ATTR_OUTPUT_FLOAT) {
-      stack_store_float(stack, out_offset, average(f + dy));
-    }
-    else if (type == NODE_ATTR_OUTPUT_FLOAT3) {
-      stack_store_float3(stack, out_offset, f + dy);
-    }
-    else {
-      stack_store_float(stack, out_offset, 1.0f);
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
deleted file mode 100644
index 0a30822aa68..00000000000
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/bvh/bvh.h"
-#include "kernel/sample/sample_mapping.h"
-#include "kernel/sample/sample_pattern.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __SHADER_RAYTRACE__
-
-/* Planar Cubic BSSRDF falloff, reused for bevel.
- *
- * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
- * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
- * far as I can tell has no closed form solution. So we get an iterative solution
- * instead with newton-raphson. */
-
-ccl_device float svm_bevel_cubic_eval(const float radius, float r)
-{
-  const float Rm = radius;
-
-  if (r >= Rm)
-    return 0.0f;
-
-  /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
-  const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
-  const float f = Rm - r;
-  const float num = f * f * f;
-
-  return (10.0f * num) / (Rm5 * M_PI_F);
-}
-
-ccl_device float svm_bevel_cubic_pdf(const float radius, float r)
-{
-  return svm_bevel_cubic_eval(radius, r);
-}
-
-/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
-ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
-{
-  /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
-   * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
-   * should not be too bad */
-  const float tolerance = 1e-6f;
-  const int max_iteration_count = 10;
-  float x = 0.25f;
-  int i;
-
-  for (i = 0; i < max_iteration_count; i++) {
-    float x2 = x * x;
-    float x3 = x2 * x;
-    float nx = (1.0f - x);
-
-    float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
-    float f_ = 20.0f * (x * nx) * (nx * nx);
-
-    if (fabsf(f) < tolerance || f_ == 0.0f)
-      break;
-
-    x = saturate(x - f / f_);
-  }
-
-  return x;
-}
-
-ccl_device void svm_bevel_cubic_sample(const float radius,
-                                       float xi,
-                                       ccl_private float *r,
-                                       ccl_private float *h)
-{
-  float Rm = radius;
-  float r_ = svm_bevel_cubic_quintic_root_find(xi);
-
-  r_ *= Rm;
-  *r = r_;
-
-  /* h^2 + r^2 = Rm^2 */
-  *h = safe_sqrtf(Rm * Rm - r_ * r_);
-}
-
-/* Bevel shader averaging normals from nearby surfaces.
- *
- * Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013
- * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
- */
-
-#  ifdef __KERNEL_OPTIX__
-extern "C" __device__ float3 __direct_callable__svm_node_bevel(
-#  else
-ccl_device float3 svm_bevel(
-#  endif
-    KernelGlobals kg,
-    ConstIntegratorState state,
-    ccl_private ShaderData *sd,
-    float radius,
-    int num_samples)
-{
-  /* Early out if no sampling needed. */
-  if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
-    return sd->N;
-  }
-
-  /* Can't raytrace from shaders like displacement, before BVH exists. */
-  if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
-    return sd->N;
-  }
-
-  /* Don't bevel for blurry indirect rays. */
-  if (INTEGRATOR_STATE(state, path, min_ray_pdf) < 8.0f) {
-    return sd->N;
-  }
-
-  /* Setup for multi intersection. */
-  LocalIntersection isect;
-  uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_hash),
-                                  INTEGRATOR_STATE(state, path, rng_offset),
-                                  INTEGRATOR_STATE(state, path, sample),
-                                  0x64c6a40e);
-
-  /* Sample normals from surrounding points on surface. */
-  float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
-
-  /* TODO: support ray-tracing in shadow shader evaluation? */
-  RNGState rng_state;
-  path_state_rng_load(state, &rng_state);
-
-  for (int sample = 0; sample < num_samples; sample++) {
-    float disk_u, disk_v;
-    path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
-
-    /* Pick random axis in local frame and point on disk. */
-    float3 disk_N, disk_T, disk_B;
-    float pick_pdf_N, pick_pdf_T, pick_pdf_B;
-
-    disk_N = sd->Ng;
-    make_orthonormals(disk_N, &disk_T, &disk_B);
-
-    float axisu = disk_u;
-
-    if (axisu < 0.5f) {
-      pick_pdf_N = 0.5f;
-      pick_pdf_T = 0.25f;
-      pick_pdf_B = 0.25f;
-      disk_u *= 2.0f;
-    }
-    else if (axisu < 0.75f) {
-      float3 tmp = disk_N;
-      disk_N = disk_T;
-      disk_T = tmp;
-      pick_pdf_N = 0.25f;
-      pick_pdf_T = 0.5f;
-      pick_pdf_B = 0.25f;
-      disk_u = (disk_u - 0.5f) * 4.0f;
-    }
-    else {
-      float3 tmp = disk_N;
-      disk_N = disk_B;
-      disk_B = tmp;
-      pick_pdf_N = 0.25f;
-      pick_pdf_T = 0.25f;
-      pick_pdf_B = 0.5f;
-      disk_u = (disk_u - 0.75f) * 4.0f;
-    }
-
-    /* Sample point on disk. */
-    float phi = M_2PI_F * disk_u;
-    float disk_r = disk_v;
-    float disk_height;
-
-    /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
-    svm_bevel_cubic_sample(radius, disk_r, &disk_r, &disk_height);
-
-    float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
-
-    /* Create ray. */
-    Ray ray ccl_optional_struct_init;
-    ray.P = sd->P + disk_N * disk_height + disk_P;
-    ray.D = -disk_N;
-    ray.t = 2.0f * disk_height;
-    ray.dP = differential_zero_compact();
-    ray.dD = differential_zero_compact();
-    ray.time = sd->time;
-
-    /* Intersect with the same object. if multiple intersections are found it
-     * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
-    scene_intersect_local(kg, &ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
-
-    int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
-
-    for (int hit = 0; hit < num_eval_hits; hit++) {
-      /* Quickly retrieve P and Ng without setting up ShaderData. */
-      float3 hit_P;
-      if (sd->type & PRIMITIVE_TRIANGLE) {
-        hit_P = triangle_refine_local(
-            kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim);
-      }
-#  ifdef __OBJECT_MOTION__
-      else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
-        float3 verts[3];
-        motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts);
-        hit_P = motion_triangle_refine_local(
-            kg, sd, ray.P, ray.D, ray.t, isect.hits[hit].object, isect.hits[hit].prim, verts);
-      }
-#  endif /* __OBJECT_MOTION__ */
-
-      /* Get geometric normal. */
-      float3 hit_Ng = isect.Ng[hit];
-      int object = isect.hits[hit].object;
-      int object_flag = kernel_tex_fetch(__object_flag, object);
-      if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
-        hit_Ng = -hit_Ng;
-      }
-
-      /* Compute smooth normal. */
-      float3 N = hit_Ng;
-      int prim = isect.hits[hit].prim;
-      int shader = kernel_tex_fetch(__tri_shader, prim);
-
-      if (shader & SHADER_SMOOTH_NORMAL) {
-        float u = isect.hits[hit].u;
-        float v = isect.hits[hit].v;
-
-        if (sd->type & PRIMITIVE_TRIANGLE) {
-          N = triangle_smooth_normal(kg, N, prim, u, v);
-        }
-#  ifdef __OBJECT_MOTION__
-        else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
-          N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
-        }
-#  endif /* __OBJECT_MOTION__ */
-      }
-
-      /* Transform normals to world space. */
-      if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-        object_normal_transform(kg, sd, &N);
-        object_normal_transform(kg, sd, &hit_Ng);
-      }
-
-      /* Probability densities for local frame axes. */
-      float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
-      float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
-      float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
-
-      /* Multiple importance sample between 3 axes, power heuristic
-       * found to be slightly better than balance heuristic. pdf_N
-       * in the MIS weight and denominator canceled out. */
-      float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
-      if (isect.num_hits > LOCAL_MAX_HITS) {
-        w *= isect.num_hits / (float)LOCAL_MAX_HITS;
-      }
-
-      /* Real distance to sampled point. */
-      float r = len(hit_P - sd->P);
-
-      /* Compute weight. */
-      float pdf = svm_bevel_cubic_pdf(radius, r);
-      float disk_pdf = svm_bevel_cubic_pdf(radius, disk_r);
-
-      w *= pdf / disk_pdf;
-
-      /* Sum normal and weight. */
-      sum_N += w * N;
-    }
-  }
-
-  /* Normalize. */
-  float3 N = safe_normalize(sum_N);
-  return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
-}
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-#  if defined(__KERNEL_OPTIX__)
-ccl_device_inline
-#  else
-ccl_device_noinline
-#  endif
-    void
-    svm_node_bevel(KernelGlobals kg,
-                   ConstIntegratorGenericState state,
-                   ccl_private ShaderData *sd,
-                   ccl_private float *stack,
-                   uint4 node)
-{
-  uint num_samples, radius_offset, normal_offset, out_offset;
-  svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
-
-  float3 bevel_N = sd->N;
-
-  IF_KERNEL_NODES_FEATURE(RAYTRACE)
-  {
-    float radius = stack_load_float(stack, radius_offset);
-
-#  ifdef __KERNEL_OPTIX__
-    bevel_N = optixDirectCall<float3>(1, kg, state, sd, radius, num_samples);
-#  else
-    bevel_N = svm_bevel(kg, state, sd, radius, num_samples);
-#  endif
-
-    if (stack_valid(normal_offset)) {
-      /* Preserve input normal. */
-      float3 ref_N = stack_load_float3(stack, normal_offset);
-      bevel_N = normalize(ref_N + (bevel_N - sd->N));
-    }
-  }
-
-  stack_store_float3(stack, out_offset, bevel_N);
-}
-
-#endif /* __SHADER_RAYTRACE__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
deleted file mode 100644
index f1adb0e76af..00000000000
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2013, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Blackbody Node */
-
-ccl_device_noinline void svm_node_blackbody(KernelGlobals kg,
-                                            ccl_private ShaderData *sd,
-                                            ccl_private float *stack,
-                                            uint temperature_offset,
-                                            uint col_offset)
-{
-  /* Input */
-  float temperature = stack_load_float(stack, temperature_offset);
-
-  float3 color_rgb = svm_math_blackbody_color(temperature);
-
-  stack_store_float3(stack, col_offset, color_rgb);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
deleted file mode 100644
index 9dc31ef37ec..00000000000
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Brick */
-
-ccl_device_inline float brick_noise(uint n) /* fast integer noise */
-{
-  uint nn;
-  n = (n + 1013) & 0x7fffffff;
-  n = (n >> 13) ^ n;
-  nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
-  return 0.5f * ((float)nn / 1073741824.0f);
-}
-
-ccl_device_noinline_cpu float2 svm_brick(float3 p,
-                                         float mortar_size,
-                                         float mortar_smooth,
-                                         float bias,
-                                         float brick_width,
-                                         float row_height,
-                                         float offset_amount,
-                                         int offset_frequency,
-                                         float squash_amount,
-                                         int squash_frequency)
-{
-  int bricknum, rownum;
-  float offset = 0.0f;
-  float x, y;
-
-  rownum = floor_to_int(p.y / row_height);
-
-  if (offset_frequency && squash_frequency) {
-    brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount;           /* squash */
-    offset = (rownum % offset_frequency) ? 0.0f : (brick_width * offset_amount); /* offset */
-  }
-
-  bricknum = floor_to_int((p.x + offset) / brick_width);
-
-  x = (p.x + offset) - brick_width * bricknum;
-  y = p.y - row_height * rownum;
-
-  float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias));
-  float min_dist = min(min(x, y), min(brick_width - x, row_height - y));
-
-  float mortar;
-  if (min_dist >= mortar_size) {
-    mortar = 0.0f;
-  }
-  else if (mortar_smooth == 0.0f) {
-    mortar = 1.0f;
-  }
-  else {
-    min_dist = 1.0f - min_dist / mortar_size;
-    mortar = (min_dist < mortar_smooth) ? smoothstepf(min_dist / mortar_smooth) : 1.0f;
-  }
-
-  return make_float2(tint, mortar);
-}
-
-ccl_device_noinline int svm_node_tex_brick(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint4 node2 = read_node(kg, &offset);
-  uint4 node3 = read_node(kg, &offset);
-  uint4 node4 = read_node(kg, &offset);
-
-  /* Input and Output Sockets */
-  uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset;
-  uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset;
-  uint color_offset, fac_offset, mortar_smooth_offset;
-
-  /* RNA properties */
-  uint offset_frequency, squash_frequency;
-
-  svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
-  svm_unpack_node_uchar4(
-      node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
-  svm_unpack_node_uchar4(
-      node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
-
-  svm_unpack_node_uchar2(node2.x, &offset_frequency, &squash_frequency);
-
-  float3 co = stack_load_float3(stack, co_offset);
-
-  float3 color1 = stack_load_float3(stack, color1_offset);
-  float3 color2 = stack_load_float3(stack, color2_offset);
-  float3 mortar = stack_load_float3(stack, mortar_offset);
-
-  float scale = stack_load_float_default(stack, scale_offset, node2.y);
-  float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z);
-  float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x);
-  float bias = stack_load_float_default(stack, bias_offset, node2.w);
-  float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x);
-  float row_height = stack_load_float_default(stack, row_height_offset, node3.y);
-  float offset_amount = __int_as_float(node3.z);
-  float squash_amount = __int_as_float(node3.w);
-
-  float2 f2 = svm_brick(co * scale,
-                        mortar_size,
-                        mortar_smooth,
-                        bias,
-                        brick_width,
-                        row_height,
-                        offset_amount,
-                        offset_frequency,
-                        squash_amount,
-                        squash_frequency);
-
-  float tint = f2.x;
-  float f = f2.y;
-
-  if (f != 1.0f) {
-    float facm = 1.0f - tint;
-    color1 = facm * color1 + tint * color2;
-  }
-
-  if (stack_valid(color_offset))
-    stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f);
-  if (stack_valid(fac_offset))
-    stack_store_float(stack, fac_offset, f);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
deleted file mode 100644
index 0a44ffe6359..00000000000
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_brightness(
-    ccl_private ShaderData *sd, ccl_private float *stack, uint in_color, uint out_color, uint node)
-{
-  uint bright_offset, contrast_offset;
-  float3 color = stack_load_float3(stack, in_color);
-
-  svm_unpack_node_uchar2(node, &bright_offset, &contrast_offset);
-  float brightness = stack_load_float(stack, bright_offset);
-  float contrast = stack_load_float(stack, contrast_offset);
-
-  color = svm_brightness_contrast(color, brightness, contrast);
-
-  if (stack_valid(out_color))
-    stack_store_float3(stack, out_color, color);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h
deleted file mode 100644
index 66e5b665532..00000000000
--- a/intern/cycles/kernel/svm/svm_bump.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Bump Eval Nodes */
-
-ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
-                                                  ccl_private ShaderData *sd,
-                                                  ccl_private float *stack,
-                                                  uint offset)
-{
-  /* save state */
-  stack_store_float3(stack, offset + 0, sd->P);
-  stack_store_float3(stack, offset + 3, sd->dP.dx);
-  stack_store_float3(stack, offset + 6, sd->dP.dy);
-
-  /* set state as if undisplaced */
-  const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
-
-  if (desc.offset != ATTR_STD_NOT_FOUND) {
-    float3 P, dPdx, dPdy;
-    P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
-
-    object_position_transform(kg, sd, &P);
-    object_dir_transform(kg, sd, &dPdx);
-    object_dir_transform(kg, sd, &dPdy);
-
-    sd->P = P;
-    sd->dP.dx = dPdx;
-    sd->dP.dy = dPdy;
-  }
-}
-
-ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg,
-                                                  ccl_private ShaderData *sd,
-                                                  ccl_private float *stack,
-                                                  uint offset)
-{
-  /* restore state */
-  sd->P = stack_load_float3(stack, offset + 0);
-  sd->dP.dx = stack_load_float3(stack, offset + 3);
-  sd->dP.dy = stack_load_float3(stack, offset + 6);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h
deleted file mode 100644
index 787f11f38b5..00000000000
--- a/intern/cycles/kernel/svm/svm_camera.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_camera(KernelGlobals kg,
-                                         ccl_private ShaderData *sd,
-                                         ccl_private float *stack,
-                                         uint out_vector,
-                                         uint out_zdepth,
-                                         uint out_distance)
-{
-  float distance;
-  float zdepth;
-  float3 vector;
-
-  Transform tfm = kernel_data.cam.worldtocamera;
-  vector = transform_point(&tfm, sd->P);
-  zdepth = vector.z;
-  distance = len(vector);
-
-  if (stack_valid(out_vector))
-    stack_store_float3(stack, out_vector, normalize(vector));
-
-  if (stack_valid(out_zdepth))
-    stack_store_float(stack, out_zdepth, zdepth);
-
-  if (stack_valid(out_distance))
-    stack_store_float(stack, out_distance, distance);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
deleted file mode 100644
index 9251d90c0e1..00000000000
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Checker */
-
-ccl_device float svm_checker(float3 p)
-{
-  /* avoid precision issues on unit coordinates */
-  p.x = (p.x + 0.000001f) * 0.999999f;
-  p.y = (p.y + 0.000001f) * 0.999999f;
-  p.z = (p.z + 0.000001f) * 0.999999f;
-
-  int xi = abs(float_to_int(floorf(p.x)));
-  int yi = abs(float_to_int(floorf(p.y)));
-  int zi = abs(float_to_int(floorf(p.z)));
-
-  return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f;
-}
-
-ccl_device_noinline void svm_node_tex_checker(KernelGlobals kg,
-                                              ccl_private ShaderData *sd,
-                                              ccl_private float *stack,
-                                              uint4 node)
-{
-  uint co_offset, color1_offset, color2_offset, scale_offset;
-  uint color_offset, fac_offset;
-
-  svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
-  svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
-
-  float3 co = stack_load_float3(stack, co_offset);
-  float3 color1 = stack_load_float3(stack, color1_offset);
-  float3 color2 = stack_load_float3(stack, color2_offset);
-  float scale = stack_load_float_default(stack, scale_offset, node.w);
-
-  float f = svm_checker(co * scale);
-
-  if (stack_valid(color_offset))
-    stack_store_float3(stack, color_offset, (f == 1.0f) ? color1 : color2);
-  if (stack_valid(fac_offset))
-    stack_store_float(stack, fac_offset, f);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
deleted file mode 100644
index 5b5ea784f4a..00000000000
--- a/intern/cycles/kernel/svm/svm_clamp.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Clamp Node */
-
-ccl_device_noinline int svm_node_clamp(KernelGlobals kg,
-                                       ccl_private ShaderData *sd,
-                                       ccl_private float *stack,
-                                       uint value_stack_offset,
-                                       uint parameters_stack_offsets,
-                                       uint result_stack_offset,
-                                       int offset)
-{
-  uint min_stack_offset, max_stack_offset, type;
-  svm_unpack_node_uchar3(parameters_stack_offsets, &min_stack_offset, &max_stack_offset, &type);
-
-  uint4 defaults = read_node(kg, &offset);
-
-  float value = stack_load_float(stack, value_stack_offset);
-  float min = stack_load_float_default(stack, min_stack_offset, defaults.x);
-  float max = stack_load_float_default(stack, max_stack_offset, defaults.y);
-
-  if (type == NODE_CLAMP_RANGE && (min > max)) {
-    stack_store_float(stack, result_stack_offset, clamp(value, max, min));
-  }
-  else {
-    stack_store_float(stack, result_stack_offset, clamp(value, min, max));
-  }
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
deleted file mode 100644
index 3378832c233..00000000000
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ /dev/null
@@ -1,1258 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Closure Nodes */
-
-ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd,
-                                     ccl_private MicrofacetBsdf *bsdf,
-                                     int type,
-                                     float eta,
-                                     float roughness,
-                                     bool refract)
-{
-  if (type == CLOSURE_BSDF_SHARP_GLASS_ID) {
-    if (refract) {
-      bsdf->alpha_y = 0.0f;
-      bsdf->alpha_x = 0.0f;
-      bsdf->ior = eta;
-      sd->flag |= bsdf_refraction_setup(bsdf);
-    }
-    else {
-      bsdf->alpha_y = 0.0f;
-      bsdf->alpha_x = 0.0f;
-      bsdf->ior = 0.0f;
-      sd->flag |= bsdf_reflection_setup(bsdf);
-    }
-  }
-  else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) {
-    bsdf->alpha_x = roughness;
-    bsdf->alpha_y = roughness;
-    bsdf->ior = eta;
-
-    if (refract)
-      sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
-    else
-      sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
-  }
-  else {
-    bsdf->alpha_x = roughness;
-    bsdf->alpha_y = roughness;
-    bsdf->ior = eta;
-
-    if (refract)
-      sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-    else
-      sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
-  }
-}
-
-ccl_device_inline int svm_node_closure_bsdf_skip(KernelGlobals kg, int offset, uint type)
-{
-  if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
-    /* Read all principled BSDF extra data to get the right offset. */
-    read_node(kg, &offset);
-    read_node(kg, &offset);
-    read_node(kg, &offset);
-    read_node(kg, &offset);
-  }
-
-  return offset;
-}
-
-template<uint node_feature_mask, ShaderType shader_type>
-ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
-                                              ccl_private ShaderData *sd,
-                                              ccl_private float *stack,
-                                              uint4 node,
-                                              uint32_t path_flag,
-                                              int offset)
-{
-  uint type, param1_offset, param2_offset;
-
-  uint mix_weight_offset;
-  svm_unpack_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
-  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
-                                                       1.0f);
-
-  /* note we read this extra node before weight check, so offset is added */
-  uint4 data_node = read_node(kg, &offset);
-
-  /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */
-  IF_KERNEL_NODES_FEATURE(BSDF)
-  {
-    if ((shader_type != SHADER_TYPE_SURFACE) || mix_weight == 0.0f) {
-      return svm_node_closure_bsdf_skip(kg, offset, type);
-    }
-  }
-  else
-  {
-    return svm_node_closure_bsdf_skip(kg, offset, type);
-  }
-
-  float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
-  if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
-    N = ensure_valid_reflection(sd->Ng, sd->I, N);
-  }
-
-  float param1 = (stack_valid(param1_offset)) ? stack_load_float(stack, param1_offset) :
-                                                __uint_as_float(node.z);
-  float param2 = (stack_valid(param2_offset)) ? stack_load_float(stack, param2_offset) :
-                                                __uint_as_float(node.w);
-
-  switch (type) {
-#ifdef __PRINCIPLED__
-    case CLOSURE_BSDF_PRINCIPLED_ID: {
-      uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset,
-          sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset,
-          eta_offset, transmission_offset, anisotropic_rotation_offset,
-          transmission_roughness_offset;
-      uint4 data_node2 = read_node(kg, &offset);
-
-      float3 T = stack_load_float3(stack, data_node.y);
-      svm_unpack_node_uchar4(data_node.z,
-                             &specular_offset,
-                             &roughness_offset,
-                             &specular_tint_offset,
-                             &anisotropic_offset);
-      svm_unpack_node_uchar4(data_node.w,
-                             &sheen_offset,
-                             &sheen_tint_offset,
-                             &clearcoat_offset,
-                             &clearcoat_roughness_offset);
-      svm_unpack_node_uchar4(data_node2.x,
-                             &eta_offset,
-                             &transmission_offset,
-                             &anisotropic_rotation_offset,
-                             &transmission_roughness_offset);
-
-      // get Disney principled parameters
-      float metallic = param1;
-      float subsurface = param2;
-      float specular = stack_load_float(stack, specular_offset);
-      float roughness = stack_load_float(stack, roughness_offset);
-      float specular_tint = stack_load_float(stack, specular_tint_offset);
-      float anisotropic = stack_load_float(stack, anisotropic_offset);
-      float sheen = stack_load_float(stack, sheen_offset);
-      float sheen_tint = stack_load_float(stack, sheen_tint_offset);
-      float clearcoat = stack_load_float(stack, clearcoat_offset);
-      float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset);
-      float transmission = stack_load_float(stack, transmission_offset);
-      float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset);
-      float transmission_roughness = stack_load_float(stack, transmission_roughness_offset);
-      float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f);
-
-      ClosureType distribution = (ClosureType)data_node2.y;
-      ClosureType subsurface_method = (ClosureType)data_node2.z;
-
-      /* rotate tangent */
-      if (anisotropic_rotation != 0.0f)
-        T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F);
-
-      /* calculate ior */
-      float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
-      // calculate fresnel for refraction
-      float cosNO = dot(N, sd->I);
-      float fresnel = fresnel_dielectric_cos(cosNO, ior);
-
-      // calculate weights of the diffuse and specular part
-      float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission));
-
-      float final_transmission = saturate(transmission) * (1.0f - saturate(metallic));
-      float specular_weight = (1.0f - final_transmission);
-
-      // get the base color
-      uint4 data_base_color = read_node(kg, &offset);
-      float3 base_color = stack_valid(data_base_color.x) ?
-                              stack_load_float3(stack, data_base_color.x) :
-                              make_float3(__uint_as_float(data_base_color.y),
-                                          __uint_as_float(data_base_color.z),
-                                          __uint_as_float(data_base_color.w));
-
-      // get the additional clearcoat normal and subsurface scattering radius
-      uint4 data_cn_ssr = read_node(kg, &offset);
-      float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ?
-                                    stack_load_float3(stack, data_cn_ssr.x) :
-                                    sd->N;
-      if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
-        clearcoat_normal = ensure_valid_reflection(sd->Ng, sd->I, clearcoat_normal);
-      }
-      float3 subsurface_radius = stack_valid(data_cn_ssr.y) ?
-                                     stack_load_float3(stack, data_cn_ssr.y) :
-                                     make_float3(1.0f, 1.0f, 1.0f);
-      float subsurface_ior = stack_valid(data_cn_ssr.z) ? stack_load_float(stack, data_cn_ssr.z) :
-                                                          1.4f;
-      float subsurface_anisotropy = stack_valid(data_cn_ssr.w) ?
-                                        stack_load_float(stack, data_cn_ssr.w) :
-                                        0.0f;
-
-      // get the subsurface color
-      uint4 data_subsurface_color = read_node(kg, &offset);
-      float3 subsurface_color = stack_valid(data_subsurface_color.x) ?
-                                    stack_load_float3(stack, data_subsurface_color.x) :
-                                    make_float3(__uint_as_float(data_subsurface_color.y),
-                                                __uint_as_float(data_subsurface_color.z),
-                                                __uint_as_float(data_subsurface_color.w));
-
-      float3 weight = sd->svm_closure_weight * mix_weight;
-
-#  ifdef __SUBSURFACE__
-      float3 mixed_ss_base_color = subsurface_color * subsurface +
-                                   base_color * (1.0f - subsurface);
-      float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
-
-      /* disable in case of diffuse ancestor, can't see it well then and
-       * adds considerably noise due to probabilities of continuing path
-       * getting lower and lower */
-      if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
-        subsurface = 0.0f;
-
-        /* need to set the base color in this case such that the
-         * rays get the correctly mixed color after transmitting
-         * the object */
-        base_color = mixed_ss_base_color;
-      }
-
-      /* diffuse */
-      if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
-        if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
-          float3 diff_weight = weight * base_color * diffuse_weight;
-
-          ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)
-              bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
-
-          if (bsdf) {
-            bsdf->N = N;
-            bsdf->roughness = roughness;
-
-            /* setup bsdf */
-            sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_FULL);
-          }
-        }
-        else if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
-          ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
-
-          if (bssrdf) {
-            bssrdf->radius = subsurface_radius * subsurface;
-            bssrdf->albedo = mixed_ss_base_color;
-            bssrdf->N = N;
-            bssrdf->roughness = roughness;
-
-            /* Clamps protecting against bad/extreme and non physical values. */
-            subsurface_ior = clamp(subsurface_ior, 1.01f, 3.8f);
-            bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f);
-
-            /* setup bsdf */
-            sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method, subsurface_ior);
-          }
-        }
-      }
-#  else
-      /* diffuse */
-      if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) {
-        float3 diff_weight = weight * base_color * diffuse_weight;
-
-        ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
-            sd, sizeof(PrincipledDiffuseBsdf), diff_weight);
-
-        if (bsdf) {
-          bsdf->N = N;
-          bsdf->roughness = roughness;
-
-          /* setup bsdf */
-          sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_FULL);
-        }
-      }
-#  endif
-
-      /* sheen */
-      if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) {
-        float m_cdlum = linear_rgb_to_gray(kg, base_color);
-        float3 m_ctint = m_cdlum > 0.0f ?
-                             base_color / m_cdlum :
-                             make_float3(1.0f, 1.0f, 1.0f);  // normalize lum. to isolate hue+sat
-
-        /* color of the sheen component */
-        float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) +
-                             m_ctint * sheen_tint;
-
-        float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight;
-
-        ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc(
-            sd, sizeof(PrincipledSheenBsdf), sheen_weight);
-
-        if (bsdf) {
-          bsdf->N = N;
-
-          /* setup bsdf */
-          sd->flag |= bsdf_principled_sheen_setup(sd, bsdf);
-        }
-      }
-
-      /* specular reflection */
-#  ifdef __CAUSTICS_TRICKS__
-      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
-#  endif
-        if (specular_weight > CLOSURE_WEIGHT_CUTOFF &&
-            (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) {
-          float3 spec_weight = weight * specular_weight;
-
-          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-              sd, sizeof(MicrofacetBsdf), spec_weight);
-          ccl_private MicrofacetExtra *extra =
-              (bsdf != NULL) ?
-                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
-                  NULL;
-
-          if (bsdf && extra) {
-            bsdf->N = N;
-            bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f;
-            bsdf->T = T;
-            bsdf->extra = extra;
-
-            float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f);
-            float r2 = roughness * roughness;
-
-            bsdf->alpha_x = r2 / aspect;
-            bsdf->alpha_y = r2 * aspect;
-
-            float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y +
-                            0.1f * base_color.z;  // luminance approx.
-            float3 m_ctint = m_cdlum > 0.0f ?
-                                 base_color / m_cdlum :
-                                 make_float3(
-                                     1.0f, 1.0f, 1.0f);  // normalize lum. to isolate hue+sat
-            float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) +
-                             m_ctint * specular_tint;
-
-            bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) +
-                                  base_color * metallic;
-            bsdf->extra->color = base_color;
-            bsdf->extra->clearcoat = 0.0f;
-
-            /* setup bsdf */
-            if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID ||
-                roughness <= 0.075f) /* use single-scatter GGX */
-              sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
-            else /* use multi-scatter GGX */
-              sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd);
-          }
-        }
-#  ifdef __CAUSTICS_TRICKS__
-      }
-#  endif
-
-      /* BSDF */
-#  ifdef __CAUSTICS_TRICKS__
-      if (kernel_data.integrator.caustics_reflective ||
-          kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) {
-#  endif
-        if (final_transmission > CLOSURE_WEIGHT_CUTOFF) {
-          float3 glass_weight = weight * final_transmission;
-          float3 cspec0 = base_color * specular_tint +
-                          make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint);
-
-          if (roughness <= 5e-2f ||
-              distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */
-            float refl_roughness = roughness;
-
-            /* reflection */
-#  ifdef __CAUSTICS_TRICKS__
-            if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#  endif
-            {
-              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-                  sd, sizeof(MicrofacetBsdf), glass_weight * fresnel);
-              ccl_private MicrofacetExtra *extra =
-                  (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
-                                       sd, sizeof(MicrofacetExtra)) :
-                                   NULL;
-
-              if (bsdf && extra) {
-                bsdf->N = N;
-                bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-                bsdf->extra = extra;
-
-                bsdf->alpha_x = refl_roughness * refl_roughness;
-                bsdf->alpha_y = refl_roughness * refl_roughness;
-                bsdf->ior = ior;
-
-                bsdf->extra->color = base_color;
-                bsdf->extra->cspec0 = cspec0;
-                bsdf->extra->clearcoat = 0.0f;
-
-                /* setup bsdf */
-                sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
-              }
-            }
-
-            /* refraction */
-#  ifdef __CAUSTICS_TRICKS__
-            if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#  endif
-            {
-              ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-                  sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel));
-              if (bsdf) {
-                bsdf->N = N;
-                bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-                bsdf->extra = NULL;
-
-                if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID)
-                  transmission_roughness = 1.0f - (1.0f - refl_roughness) *
-                                                      (1.0f - transmission_roughness);
-                else
-                  transmission_roughness = refl_roughness;
-
-                bsdf->alpha_x = transmission_roughness * transmission_roughness;
-                bsdf->alpha_y = transmission_roughness * transmission_roughness;
-                bsdf->ior = ior;
-
-                /* setup bsdf */
-                sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-              }
-            }
-          }
-          else { /* use multi-scatter GGX */
-            ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-                sd, sizeof(MicrofacetBsdf), glass_weight);
-            ccl_private MicrofacetExtra *extra =
-                (bsdf != NULL) ? (ccl_private MicrofacetExtra *)closure_alloc_extra(
-                                     sd, sizeof(MicrofacetExtra)) :
-                                 NULL;
-
-            if (bsdf && extra) {
-              bsdf->N = N;
-              bsdf->extra = extra;
-              bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-
-              bsdf->alpha_x = roughness * roughness;
-              bsdf->alpha_y = roughness * roughness;
-              bsdf->ior = ior;
-
-              bsdf->extra->color = base_color;
-              bsdf->extra->cspec0 = cspec0;
-              bsdf->extra->clearcoat = 0.0f;
-
-              /* setup bsdf */
-              sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd);
-            }
-          }
-        }
-#  ifdef __CAUSTICS_TRICKS__
-      }
-#  endif
-
-      /* clearcoat */
-#  ifdef __CAUSTICS_TRICKS__
-      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) {
-#  endif
-        if (clearcoat > CLOSURE_WEIGHT_CUTOFF) {
-          ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-              sd, sizeof(MicrofacetBsdf), weight);
-          ccl_private MicrofacetExtra *extra =
-              (bsdf != NULL) ?
-                  (ccl_private MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) :
-                  NULL;
-
-          if (bsdf && extra) {
-            bsdf->N = clearcoat_normal;
-            bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-            bsdf->ior = 1.5f;
-            bsdf->extra = extra;
-
-            bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness;
-            bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness;
-
-            bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f);
-            bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f);
-            bsdf->extra->clearcoat = clearcoat;
-
-            /* setup bsdf */
-            sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
-          }
-        }
-#  ifdef __CAUSTICS_TRICKS__
-      }
-#  endif
-
-      break;
-    }
-#endif /* __PRINCIPLED__ */
-    case CLOSURE_BSDF_DIFFUSE_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc(
-          sd, sizeof(OrenNayarBsdf), weight);
-
-      if (bsdf) {
-        bsdf->N = N;
-
-        float roughness = param1;
-
-        if (roughness == 0.0f) {
-          sd->flag |= bsdf_diffuse_setup((ccl_private DiffuseBsdf *)bsdf);
-        }
-        else {
-          bsdf->roughness = roughness;
-          sd->flag |= bsdf_oren_nayar_setup(bsdf);
-        }
-      }
-      break;
-    }
-    case CLOSURE_BSDF_TRANSLUCENT_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
-          sd, sizeof(DiffuseBsdf), weight);
-
-      if (bsdf) {
-        bsdf->N = N;
-        sd->flag |= bsdf_translucent_setup(bsdf);
-      }
-      break;
-    }
-    case CLOSURE_BSDF_TRANSPARENT_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      bsdf_transparent_setup(sd, weight, path_flag);
-      break;
-    }
-    case CLOSURE_BSDF_REFLECTION_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_ID:
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
-    case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: {
-#ifdef __CAUSTICS_TRICKS__
-      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
-        break;
-#endif
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-          sd, sizeof(MicrofacetBsdf), weight);
-
-      if (!bsdf) {
-        break;
-      }
-
-      float roughness = sqr(param1);
-
-      bsdf->N = N;
-      bsdf->ior = 0.0f;
-      bsdf->extra = NULL;
-
-      if (data_node.y == SVM_STACK_INVALID) {
-        bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-        bsdf->alpha_x = roughness;
-        bsdf->alpha_y = roughness;
-      }
-      else {
-        bsdf->T = stack_load_float3(stack, data_node.y);
-
-        /* rotate tangent */
-        float rotation = stack_load_float(stack, data_node.z);
-        if (rotation != 0.0f)
-          bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F);
-
-        /* compute roughness */
-        float anisotropy = clamp(param2, -0.99f, 0.99f);
-        if (anisotropy < 0.0f) {
-          bsdf->alpha_x = roughness / (1.0f + anisotropy);
-          bsdf->alpha_y = roughness * (1.0f + anisotropy);
-        }
-        else {
-          bsdf->alpha_x = roughness * (1.0f - anisotropy);
-          bsdf->alpha_y = roughness / (1.0f - anisotropy);
-        }
-      }
-
-      /* setup bsdf */
-      if (type == CLOSURE_BSDF_REFLECTION_ID)
-        sd->flag |= bsdf_reflection_setup(bsdf);
-      else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID)
-        sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
-      else if (type == CLOSURE_BSDF_MICROFACET_GGX_ID)
-        sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
-      else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
-        kernel_assert(stack_valid(data_node.w));
-        bsdf->extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(sd,
-                                                                         sizeof(MicrofacetExtra));
-        if (bsdf->extra) {
-          bsdf->extra->color = stack_load_float3(stack, data_node.w);
-          bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-          bsdf->extra->clearcoat = 0.0f;
-          sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf);
-        }
-      }
-      else {
-        sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
-      }
-
-      break;
-    }
-    case CLOSURE_BSDF_REFRACTION_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
-#ifdef __CAUSTICS_TRICKS__
-      if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
-        break;
-#endif
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-          sd, sizeof(MicrofacetBsdf), weight);
-
-      if (bsdf) {
-        bsdf->N = N;
-        bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-        bsdf->extra = NULL;
-
-        float eta = fmaxf(param2, 1e-5f);
-        eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
-        /* setup bsdf */
-        if (type == CLOSURE_BSDF_REFRACTION_ID) {
-          bsdf->alpha_x = 0.0f;
-          bsdf->alpha_y = 0.0f;
-          bsdf->ior = eta;
-
-          sd->flag |= bsdf_refraction_setup(bsdf);
-        }
-        else {
-          float roughness = sqr(param1);
-          bsdf->alpha_x = roughness;
-          bsdf->alpha_y = roughness;
-          bsdf->ior = eta;
-
-          if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID)
-            sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
-          else
-            sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
-        }
-      }
-
-      break;
-    }
-    case CLOSURE_BSDF_SHARP_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID:
-    case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: {
-#ifdef __CAUSTICS_TRICKS__
-      if (!kernel_data.integrator.caustics_reflective &&
-          !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) {
-        break;
-      }
-#endif
-      float3 weight = sd->svm_closure_weight * mix_weight;
-
-      /* index of refraction */
-      float eta = fmaxf(param2, 1e-5f);
-      eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
-      /* fresnel */
-      float cosNO = dot(N, sd->I);
-      float fresnel = fresnel_dielectric_cos(cosNO, eta);
-      float roughness = sqr(param1);
-
-      /* reflection */
-#ifdef __CAUSTICS_TRICKS__
-      if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
-      {
-        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-            sd, sizeof(MicrofacetBsdf), weight * fresnel);
-
-        if (bsdf) {
-          bsdf->N = N;
-          bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-          bsdf->extra = NULL;
-          svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
-        }
-      }
-
-      /* refraction */
-#ifdef __CAUSTICS_TRICKS__
-      if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0)
-#endif
-      {
-        ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-            sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel));
-
-        if (bsdf) {
-          bsdf->N = N;
-          bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-          bsdf->extra = NULL;
-          svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
-        }
-      }
-
-      break;
-    }
-    case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: {
-#ifdef __CAUSTICS_TRICKS__
-      if (!kernel_data.integrator.caustics_reflective &&
-          !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
-        break;
-#endif
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
-          sd, sizeof(MicrofacetBsdf), weight);
-      if (!bsdf) {
-        break;
-      }
-
-      ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
-          sd, sizeof(MicrofacetExtra));
-      if (!extra) {
-        break;
-      }
-
-      bsdf->N = N;
-      bsdf->extra = extra;
-      bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
-
-      float roughness = sqr(param1);
-      bsdf->alpha_x = roughness;
-      bsdf->alpha_y = roughness;
-      float eta = fmaxf(param2, 1e-5f);
-      bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
-      kernel_assert(stack_valid(data_node.z));
-      bsdf->extra->color = stack_load_float3(stack, data_node.z);
-      bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f);
-      bsdf->extra->clearcoat = 0.0f;
-
-      /* setup bsdf */
-      sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf);
-      break;
-    }
-    case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc(
-          sd, sizeof(VelvetBsdf), weight);
-
-      if (bsdf) {
-        bsdf->N = N;
-
-        bsdf->sigma = saturate(param1);
-        sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
-      }
-      break;
-    }
-    case CLOSURE_BSDF_GLOSSY_TOON_ID:
-#ifdef __CAUSTICS_TRICKS__
-      if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
-        break;
-      ATTR_FALLTHROUGH;
-#endif
-    case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
-          sd, sizeof(ToonBsdf), weight);
-
-      if (bsdf) {
-        bsdf->N = N;
-        bsdf->size = param1;
-        bsdf->smooth = param2;
-
-        if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID)
-          sd->flag |= bsdf_diffuse_toon_setup(bsdf);
-        else
-          sd->flag |= bsdf_glossy_toon_setup(bsdf);
-      }
-      break;
-    }
-#ifdef __HAIR__
-    case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: {
-      uint4 data_node2 = read_node(kg, &offset);
-      uint4 data_node3 = read_node(kg, &offset);
-      uint4 data_node4 = read_node(kg, &offset);
-
-      float3 weight = sd->svm_closure_weight * mix_weight;
-
-      uint offset_ofs, ior_ofs, color_ofs, parametrization;
-      svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
-      float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
-      float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
-
-      uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
-      svm_unpack_node_uchar4(data_node2.x,
-                             &coat_ofs,
-                             &melanin_ofs,
-                             &melanin_redness_ofs,
-                             &absorption_coefficient_ofs);
-
-      uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
-      svm_unpack_node_uchar4(
-          data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
-
-      const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
-      float random = 0.0f;
-      if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
-        random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL);
-      }
-      else {
-        random = stack_load_float_default(stack, random_ofs, data_node3.y);
-      }
-
-      ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)bsdf_alloc(
-          sd, sizeof(PrincipledHairBSDF), weight);
-      if (bsdf) {
-        ccl_private PrincipledHairExtra *extra = (ccl_private PrincipledHairExtra *)
-            closure_alloc_extra(sd, sizeof(PrincipledHairExtra));
-
-        if (!extra)
-          break;
-
-        /* Random factors range: [-randomization/2, +randomization/2]. */
-        float random_roughness = stack_load_float_default(
-            stack, random_roughness_ofs, data_node3.w);
-        float factor_random_roughness = 1.0f + 2.0f * (random - 0.5f) * random_roughness;
-        float roughness = param1 * factor_random_roughness;
-        float radial_roughness = param2 * factor_random_roughness;
-
-        /* Remap Coat value to [0, 100]% of Roughness. */
-        float coat = stack_load_float_default(stack, coat_ofs, data_node2.y);
-        float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f);
-
-        bsdf->N = N;
-        bsdf->v = roughness;
-        bsdf->s = radial_roughness;
-        bsdf->m0_roughness = m0_roughness;
-        bsdf->alpha = alpha;
-        bsdf->eta = ior;
-        bsdf->extra = extra;
-
-        switch (parametrization) {
-          case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
-            float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
-            bsdf->sigma = absorption_coefficient;
-            break;
-          }
-          case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
-            float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z);
-            float melanin_redness = stack_load_float_default(
-                stack, melanin_redness_ofs, data_node2.w);
-
-            /* Randomize melanin. */
-            float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z);
-            random_color = clamp(random_color, 0.0f, 1.0f);
-            float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color;
-            melanin *= factor_random_color;
-
-            /* Map melanin 0..inf from more perceptually linear 0..1. */
-            melanin = -logf(fmaxf(1.0f - melanin, 0.0001f));
-
-            /* Benedikt Bitterli's melanin ratio remapping. */
-            float eumelanin = melanin * (1.0f - melanin_redness);
-            float pheomelanin = melanin * melanin_redness;
-            float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin,
-                                                                                 pheomelanin);
-
-            /* Optional tint. */
-            float3 tint = stack_load_float3(stack, tint_ofs);
-            float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint,
-                                                                            radial_roughness);
-
-            bsdf->sigma = melanin_sigma + tint_sigma;
-            break;
-          }
-          case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
-            float3 color = stack_load_float3(stack, color_ofs);
-            bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness);
-            break;
-          }
-          default: {
-            /* Fallback to brownish hair, same as defaults for melanin. */
-            kernel_assert(!"Invalid Principled Hair parametrization!");
-            bsdf->sigma = bsdf_principled_hair_sigma_from_concentration(0.0f, 0.8054375f);
-            break;
-          }
-        }
-
-        sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
-      }
-      break;
-    }
-    case CLOSURE_BSDF_HAIR_REFLECTION_ID:
-    case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-
-      ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
-          sd, sizeof(HairBsdf), weight);
-
-      if (bsdf) {
-        bsdf->N = N;
-        bsdf->roughness1 = param1;
-        bsdf->roughness2 = param2;
-        bsdf->offset = -stack_load_float(stack, data_node.z);
-
-        if (stack_valid(data_node.y)) {
-          bsdf->T = normalize(stack_load_float3(stack, data_node.y));
-        }
-        else if (!(sd->type & PRIMITIVE_ALL_CURVE)) {
-          bsdf->T = normalize(sd->dPdv);
-          bsdf->offset = 0.0f;
-        }
-        else
-          bsdf->T = normalize(sd->dPdu);
-
-        if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
-          sd->flag |= bsdf_hair_reflection_setup(bsdf);
-        }
-        else {
-          sd->flag |= bsdf_hair_transmission_setup(bsdf);
-        }
-      }
-
-      break;
-    }
-#endif /* __HAIR__ */
-
-#ifdef __SUBSURFACE__
-    case CLOSURE_BSSRDF_BURLEY_ID:
-    case CLOSURE_BSSRDF_RANDOM_WALK_ID:
-    case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: {
-      float3 weight = sd->svm_closure_weight * mix_weight;
-      ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
-      if (bssrdf) {
-        /* disable in case of diffuse ancestor, can't see it well then and
-         * adds considerably noise due to probabilities of continuing path
-         * getting lower and lower */
-        if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
-          param1 = 0.0f;
-
-        bssrdf->radius = stack_load_float3(stack, data_node.z) * param1;
-        bssrdf->albedo = sd->svm_closure_weight;
-        bssrdf->N = N;
-        bssrdf->roughness = FLT_MAX;
-
-        const float subsurface_ior = clamp(param2, 1.01f, 3.8f);
-        const float subsurface_anisotropy = stack_load_float(stack, data_node.w);
-        bssrdf->anisotropy = clamp(subsurface_anisotropy, 0.0f, 0.9f);
-
-        sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, subsurface_ior);
-      }
-
-      break;
-    }
-#endif
-    default:
-      break;
-  }
-
-  return offset;
-}
-
-template<ShaderType shader_type>
-ccl_device_noinline void svm_node_closure_volume(KernelGlobals kg,
-                                                 ccl_private ShaderData *sd,
-                                                 ccl_private float *stack,
-                                                 uint4 node)
-{
-#ifdef __VOLUME__
-  /* Only sum extinction for volumes, variable is shared with surface transparency. */
-  if (shader_type != SHADER_TYPE_VOLUME) {
-    return;
-  }
-
-  uint type, density_offset, anisotropy_offset;
-
-  uint mix_weight_offset;
-  svm_unpack_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
-  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
-                                                       1.0f);
-
-  if (mix_weight == 0.0f) {
-    return;
-  }
-
-  float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
-                                                  __uint_as_float(node.z);
-  density = mix_weight * fmaxf(density, 0.0f);
-
-  /* Compute scattering coefficient. */
-  float3 weight = sd->svm_closure_weight;
-
-  if (type == CLOSURE_VOLUME_ABSORPTION_ID) {
-    weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
-  }
-
-  weight *= density;
-
-  /* Add closure for volume scattering. */
-  if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
-    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
-        sd, sizeof(HenyeyGreensteinVolume), weight);
-
-    if (volume) {
-      float anisotropy = (stack_valid(anisotropy_offset)) ?
-                             stack_load_float(stack, anisotropy_offset) :
-                             __uint_as_float(node.w);
-      volume->g = anisotropy; /* g */
-      sd->flag |= volume_henyey_greenstein_setup(volume);
-    }
-  }
-
-  /* Sum total extinction weight. */
-  volume_extinction_setup(sd, weight);
-#endif
-}
-
-template<ShaderType shader_type>
-ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
-                                                   ccl_private ShaderData *sd,
-                                                   ccl_private float *stack,
-                                                   uint4 node,
-                                                   uint32_t path_flag,
-                                                   int offset)
-{
-#ifdef __VOLUME__
-  uint4 value_node = read_node(kg, &offset);
-  uint4 attr_node = read_node(kg, &offset);
-
-  /* Only sum extinction for volumes, variable is shared with surface transparency. */
-  if (shader_type != SHADER_TYPE_VOLUME) {
-    return offset;
-  }
-
-  uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
-  svm_unpack_node_uchar4(
-      node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
-  float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
-                                                       1.0f);
-
-  if (mix_weight == 0.0f) {
-    return offset;
-  }
-
-  /* Compute density. */
-  float primitive_density = 1.0f;
-  float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) :
-                                                  __uint_as_float(value_node.x);
-  density = mix_weight * fmaxf(density, 0.0f);
-
-  if (density > CLOSURE_WEIGHT_CUTOFF) {
-    /* Density and color attribute lookup if available. */
-    const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x);
-    if (attr_density.offset != ATTR_STD_NOT_FOUND) {
-      primitive_density = primitive_volume_attribute_float(kg, sd, attr_density);
-      density = fmaxf(density * primitive_density, 0.0f);
-    }
-  }
-
-  if (density > CLOSURE_WEIGHT_CUTOFF) {
-    /* Compute scattering color. */
-    float3 color = sd->svm_closure_weight;
-
-    const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
-    if (attr_color.offset != ATTR_STD_NOT_FOUND) {
-      color *= primitive_volume_attribute_float3(kg, sd, attr_color);
-    }
-
-    /* Add closure for volume scattering. */
-    ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
-        sd, sizeof(HenyeyGreensteinVolume), color * density);
-    if (volume) {
-      float anisotropy = (stack_valid(anisotropy_offset)) ?
-                             stack_load_float(stack, anisotropy_offset) :
-                             __uint_as_float(value_node.y);
-      volume->g = anisotropy;
-      sd->flag |= volume_henyey_greenstein_setup(volume);
-    }
-
-    /* Add extinction weight. */
-    float3 zero = make_float3(0.0f, 0.0f, 0.0f);
-    float3 one = make_float3(1.0f, 1.0f, 1.0f);
-    float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
-    float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
-    volume_extinction_setup(sd, (color + absorption) * density);
-  }
-
-  /* Compute emission. */
-  if (path_flag & PATH_RAY_SHADOW) {
-    /* Don't need emission for shadows. */
-    return offset;
-  }
-
-  uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
-  svm_unpack_node_uchar4(
-      node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
-  float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) :
-                                                    __uint_as_float(value_node.z);
-  float blackbody = (stack_valid(blackbody_offset)) ? stack_load_float(stack, blackbody_offset) :
-                                                      __uint_as_float(value_node.w);
-
-  if (emission > CLOSURE_WEIGHT_CUTOFF) {
-    float3 emission_color = stack_load_float3(stack, emission_color_offset);
-    emission_setup(sd, emission * emission_color);
-  }
-
-  if (blackbody > CLOSURE_WEIGHT_CUTOFF) {
-    float T = stack_load_float(stack, temperature_offset);
-
-    /* Add flame temperature from attribute if available. */
-    const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z);
-    if (attr_temperature.offset != ATTR_STD_NOT_FOUND) {
-      float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature);
-      T *= fmaxf(temperature, 0.0f);
-    }
-
-    T = fmaxf(T, 0.0f);
-
-    /* Stefan-Boltzmann law. */
-    float T4 = sqr(sqr(T));
-    float sigma = 5.670373e-8f * 1e-6f / M_PI_F;
-    float intensity = sigma * mix(1.0f, T4, blackbody);
-
-    if (intensity > CLOSURE_WEIGHT_CUTOFF) {
-      float3 blackbody_tint = stack_load_float3(stack, node.w);
-      float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
-      emission_setup(sd, bb);
-    }
-  }
-#endif
-  return offset;
-}
-
-ccl_device_noinline void svm_node_closure_emission(ccl_private ShaderData *sd,
-                                                   ccl_private float *stack,
-                                                   uint4 node)
-{
-  uint mix_weight_offset = node.y;
-  float3 weight = sd->svm_closure_weight;
-
-  if (stack_valid(mix_weight_offset)) {
-    float mix_weight = stack_load_float(stack, mix_weight_offset);
-
-    if (mix_weight == 0.0f)
-      return;
-
-    weight *= mix_weight;
-  }
-
-  emission_setup(sd, weight);
-}
-
-ccl_device_noinline void svm_node_closure_background(ccl_private ShaderData *sd,
-                                                     ccl_private float *stack,
-                                                     uint4 node)
-{
-  uint mix_weight_offset = node.y;
-  float3 weight = sd->svm_closure_weight;
-
-  if (stack_valid(mix_weight_offset)) {
-    float mix_weight = stack_load_float(stack, mix_weight_offset);
-
-    if (mix_weight == 0.0f)
-      return;
-
-    weight *= mix_weight;
-  }
-
-  background_setup(sd, weight);
-}
-
-ccl_device_noinline void svm_node_closure_holdout(ccl_private ShaderData *sd,
-                                                  ccl_private float *stack,
-                                                  uint4 node)
-{
-  uint mix_weight_offset = node.y;
-
-  if (stack_valid(mix_weight_offset)) {
-    float mix_weight = stack_load_float(stack, mix_weight_offset);
-
-    if (mix_weight == 0.0f)
-      return;
-
-    closure_alloc(
-        sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight);
-  }
-  else
-    closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight);
-
-  sd->flag |= SD_HOLDOUT;
-}
-
-/* Closure Nodes */
-
-ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, float3 weight)
-{
-  sd->svm_closure_weight = weight;
-}
-
-ccl_device void svm_node_closure_set_weight(ccl_private ShaderData *sd, uint r, uint g, uint b)
-{
-  float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
-  svm_node_closure_store_weight(sd, weight);
-}
-
-ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd,
-                                        ccl_private float *stack,
-                                        uint weight_offset)
-{
-  float3 weight = stack_load_float3(stack, weight_offset);
-  svm_node_closure_store_weight(sd, weight);
-}
-
-ccl_device_noinline void svm_node_emission_weight(KernelGlobals kg,
-                                                  ccl_private ShaderData *sd,
-                                                  ccl_private float *stack,
-                                                  uint4 node)
-{
-  uint color_offset = node.y;
-  uint strength_offset = node.z;
-
-  float strength = stack_load_float(stack, strength_offset);
-  float3 weight = stack_load_float3(stack, color_offset) * strength;
-
-  svm_node_closure_store_weight(sd, weight);
-}
-
-ccl_device_noinline void svm_node_mix_closure(ccl_private ShaderData *sd,
-                                              ccl_private float *stack,
-                                              uint4 node)
-{
-  /* fetch weight from blend input, previous mix closures,
-   * and write to stack to be used by closure nodes later */
-  uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
-  svm_unpack_node_uchar4(
-      node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
-
-  float weight = stack_load_float(stack, weight_offset);
-  weight = saturate(weight);
-
-  float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) :
-                                                      1.0f;
-
-  if (stack_valid(weight1_offset))
-    stack_store_float(stack, weight1_offset, in_weight * (1.0f - weight));
-  if (stack_valid(weight2_offset))
-    stack_store_float(stack, weight2_offset, in_weight * weight);
-}
-
-/* (Bump) normal */
-
-ccl_device void svm_node_set_normal(KernelGlobals kg,
-                                    ccl_private ShaderData *sd,
-                                    ccl_private float *stack,
-                                    uint in_direction,
-                                    uint out_normal)
-{
-  float3 normal = stack_load_float3(stack, in_direction);
-  sd->N = normal;
-  stack_store_float3(stack, out_normal, normal);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h
deleted file mode 100644
index 1a0fa03305e..00000000000
--- a/intern/cycles/kernel/svm/svm_color_util.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float3 svm_mix_blend(float t, float3 col1, float3 col2)
-{
-  return interp(col1, col2, t);
-}
-
-ccl_device float3 svm_mix_add(float t, float3 col1, float3 col2)
-{
-  return interp(col1, col1 + col2, t);
-}
-
-ccl_device float3 svm_mix_mul(float t, float3 col1, float3 col2)
-{
-  return interp(col1, col1 * col2, t);
-}
-
-ccl_device float3 svm_mix_screen(float t, float3 col1, float3 col2)
-{
-  float tm = 1.0f - t;
-  float3 one = make_float3(1.0f, 1.0f, 1.0f);
-  float3 tm3 = make_float3(tm, tm, tm);
-
-  return one - (tm3 + t * (one - col2)) * (one - col1);
-}
-
-ccl_device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
-{
-  float tm = 1.0f - t;
-
-  float3 outcol = col1;
-
-  if (outcol.x < 0.5f)
-    outcol.x *= tm + 2.0f * t * col2.x;
-  else
-    outcol.x = 1.0f - (tm + 2.0f * t * (1.0f - col2.x)) * (1.0f - outcol.x);
-
-  if (outcol.y < 0.5f)
-    outcol.y *= tm + 2.0f * t * col2.y;
-  else
-    outcol.y = 1.0f - (tm + 2.0f * t * (1.0f - col2.y)) * (1.0f - outcol.y);
-
-  if (outcol.z < 0.5f)
-    outcol.z *= tm + 2.0f * t * col2.z;
-  else
-    outcol.z = 1.0f - (tm + 2.0f * t * (1.0f - col2.z)) * (1.0f - outcol.z);
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_sub(float t, float3 col1, float3 col2)
-{
-  return interp(col1, col1 - col2, t);
-}
-
-ccl_device float3 svm_mix_div(float t, float3 col1, float3 col2)
-{
-  float tm = 1.0f - t;
-
-  float3 outcol = col1;
-
-  if (col2.x != 0.0f)
-    outcol.x = tm * outcol.x + t * outcol.x / col2.x;
-  if (col2.y != 0.0f)
-    outcol.y = tm * outcol.y + t * outcol.y / col2.y;
-  if (col2.z != 0.0f)
-    outcol.z = tm * outcol.z + t * outcol.z / col2.z;
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
-{
-  return interp(col1, fabs(col1 - col2), t);
-}
-
-ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
-{
-  return interp(col1, min(col1, col2), t);
-}
-
-ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
-{
-  return interp(col1, max(col1, col2), t);
-}
-
-ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2)
-{
-  float3 outcol = col1;
-
-  if (outcol.x != 0.0f) {
-    float tmp = 1.0f - t * col2.x;
-    if (tmp <= 0.0f)
-      outcol.x = 1.0f;
-    else if ((tmp = outcol.x / tmp) > 1.0f)
-      outcol.x = 1.0f;
-    else
-      outcol.x = tmp;
-  }
-  if (outcol.y != 0.0f) {
-    float tmp = 1.0f - t * col2.y;
-    if (tmp <= 0.0f)
-      outcol.y = 1.0f;
-    else if ((tmp = outcol.y / tmp) > 1.0f)
-      outcol.y = 1.0f;
-    else
-      outcol.y = tmp;
-  }
-  if (outcol.z != 0.0f) {
-    float tmp = 1.0f - t * col2.z;
-    if (tmp <= 0.0f)
-      outcol.z = 1.0f;
-    else if ((tmp = outcol.z / tmp) > 1.0f)
-      outcol.z = 1.0f;
-    else
-      outcol.z = tmp;
-  }
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_burn(float t, float3 col1, float3 col2)
-{
-  float tmp, tm = 1.0f - t;
-
-  float3 outcol = col1;
-
-  tmp = tm + t * col2.x;
-  if (tmp <= 0.0f)
-    outcol.x = 0.0f;
-  else if ((tmp = (1.0f - (1.0f - outcol.x) / tmp)) < 0.0f)
-    outcol.x = 0.0f;
-  else if (tmp > 1.0f)
-    outcol.x = 1.0f;
-  else
-    outcol.x = tmp;
-
-  tmp = tm + t * col2.y;
-  if (tmp <= 0.0f)
-    outcol.y = 0.0f;
-  else if ((tmp = (1.0f - (1.0f - outcol.y) / tmp)) < 0.0f)
-    outcol.y = 0.0f;
-  else if (tmp > 1.0f)
-    outcol.y = 1.0f;
-  else
-    outcol.y = tmp;
-
-  tmp = tm + t * col2.z;
-  if (tmp <= 0.0f)
-    outcol.z = 0.0f;
-  else if ((tmp = (1.0f - (1.0f - outcol.z) / tmp)) < 0.0f)
-    outcol.z = 0.0f;
-  else if (tmp > 1.0f)
-    outcol.z = 1.0f;
-  else
-    outcol.z = tmp;
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_hue(float t, float3 col1, float3 col2)
-{
-  float3 outcol = col1;
-
-  float3 hsv2 = rgb_to_hsv(col2);
-
-  if (hsv2.y != 0.0f) {
-    float3 hsv = rgb_to_hsv(outcol);
-    hsv.x = hsv2.x;
-    float3 tmp = hsv_to_rgb(hsv);
-
-    outcol = interp(outcol, tmp, t);
-  }
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_sat(float t, float3 col1, float3 col2)
-{
-  float tm = 1.0f - t;
-
-  float3 outcol = col1;
-
-  float3 hsv = rgb_to_hsv(outcol);
-
-  if (hsv.y != 0.0f) {
-    float3 hsv2 = rgb_to_hsv(col2);
-
-    hsv.y = tm * hsv.y + t * hsv2.y;
-    outcol = hsv_to_rgb(hsv);
-  }
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_val(float t, float3 col1, float3 col2)
-{
-  float tm = 1.0f - t;
-
-  float3 hsv = rgb_to_hsv(col1);
-  float3 hsv2 = rgb_to_hsv(col2);
-
-  hsv.z = tm * hsv.z + t * hsv2.z;
-
-  return hsv_to_rgb(hsv);
-}
-
-ccl_device float3 svm_mix_color(float t, float3 col1, float3 col2)
-{
-  float3 outcol = col1;
-  float3 hsv2 = rgb_to_hsv(col2);
-
-  if (hsv2.y != 0.0f) {
-    float3 hsv = rgb_to_hsv(outcol);
-    hsv.x = hsv2.x;
-    hsv.y = hsv2.y;
-    float3 tmp = hsv_to_rgb(hsv);
-
-    outcol = interp(outcol, tmp, t);
-  }
-
-  return outcol;
-}
-
-ccl_device float3 svm_mix_soft(float t, float3 col1, float3 col2)
-{
-  float tm = 1.0f - t;
-
-  float3 one = make_float3(1.0f, 1.0f, 1.0f);
-  float3 scr = one - (one - col2) * (one - col1);
-
-  return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
-}
-
-ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2)
-{
-  return col1 + t * (2.0f * col2 + make_float3(-1.0f, -1.0f, -1.0f));
-}
-
-ccl_device float3 svm_mix_clamp(float3 col)
-{
-  return saturate3(col);
-}
-
-ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
-{
-  float t = saturate(fac);
-
-  switch (type) {
-    case NODE_MIX_BLEND:
-      return svm_mix_blend(t, c1, c2);
-    case NODE_MIX_ADD:
-      return svm_mix_add(t, c1, c2);
-    case NODE_MIX_MUL:
-      return svm_mix_mul(t, c1, c2);
-    case NODE_MIX_SCREEN:
-      return svm_mix_screen(t, c1, c2);
-    case NODE_MIX_OVERLAY:
-      return svm_mix_overlay(t, c1, c2);
-    case NODE_MIX_SUB:
-      return svm_mix_sub(t, c1, c2);
-    case NODE_MIX_DIV:
-      return svm_mix_div(t, c1, c2);
-    case NODE_MIX_DIFF:
-      return svm_mix_diff(t, c1, c2);
-    case NODE_MIX_DARK:
-      return svm_mix_dark(t, c1, c2);
-    case NODE_MIX_LIGHT:
-      return svm_mix_light(t, c1, c2);
-    case NODE_MIX_DODGE:
-      return svm_mix_dodge(t, c1, c2);
-    case NODE_MIX_BURN:
-      return svm_mix_burn(t, c1, c2);
-    case NODE_MIX_HUE:
-      return svm_mix_hue(t, c1, c2);
-    case NODE_MIX_SAT:
-      return svm_mix_sat(t, c1, c2);
-    case NODE_MIX_VAL:
-      return svm_mix_val(t, c1, c2);
-    case NODE_MIX_COLOR:
-      return svm_mix_color(t, c1, c2);
-    case NODE_MIX_SOFT:
-      return svm_mix_soft(t, c1, c2);
-    case NODE_MIX_LINEAR:
-      return svm_mix_linear(t, c1, c2);
-    case NODE_MIX_CLAMP:
-      return svm_mix_clamp(c1);
-  }
-
-  return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast)
-{
-  float a = 1.0f + contrast;
-  float b = brightness - contrast * 0.5f;
-
-  color.x = max(a * color.x + b, 0.0f);
-  color.y = max(a * color.y + b, 0.0f);
-  color.z = max(a * color.z + b, 0.0f);
-
-  return color;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h
deleted file mode 100644
index ec5745dc78a..00000000000
--- a/intern/cycles/kernel/svm/svm_convert.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Conversion Nodes */
-
-ccl_device_noinline void svm_node_convert(KernelGlobals kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint type,
-                                          uint from,
-                                          uint to)
-{
-  switch (type) {
-    case NODE_CONVERT_FI: {
-      float f = stack_load_float(stack, from);
-      stack_store_int(stack, to, float_to_int(f));
-      break;
-    }
-    case NODE_CONVERT_FV: {
-      float f = stack_load_float(stack, from);
-      stack_store_float3(stack, to, make_float3(f, f, f));
-      break;
-    }
-    case NODE_CONVERT_CF: {
-      float3 f = stack_load_float3(stack, from);
-      float g = linear_rgb_to_gray(kg, f);
-      stack_store_float(stack, to, g);
-      break;
-    }
-    case NODE_CONVERT_CI: {
-      float3 f = stack_load_float3(stack, from);
-      int i = (int)linear_rgb_to_gray(kg, f);
-      stack_store_int(stack, to, i);
-      break;
-    }
-    case NODE_CONVERT_VF: {
-      float3 f = stack_load_float3(stack, from);
-      float g = average(f);
-      stack_store_float(stack, to, g);
-      break;
-    }
-    case NODE_CONVERT_VI: {
-      float3 f = stack_load_float3(stack, from);
-      int i = (int)average(f);
-      stack_store_int(stack, to, i);
-      break;
-    }
-    case NODE_CONVERT_IF: {
-      float f = (float)stack_load_int(stack, from);
-      stack_store_float(stack, to, f);
-      break;
-    }
-    case NODE_CONVERT_IV: {
-      float f = (float)stack_load_int(stack, from);
-      stack_store_float3(stack, to, make_float3(f, f, f));
-      break;
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
deleted file mode 100644
index 8429fe1d3e0..00000000000
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/sample/sample_mapping.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Bump Node */
-
-ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint4 node)
-{
-#ifdef __RAY_DIFFERENTIALS__
-  /* get normal input */
-  uint normal_offset, scale_offset, invert, use_object_space;
-  svm_unpack_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
-
-  float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
-
-  float3 dPdx = sd->dP.dx;
-  float3 dPdy = sd->dP.dy;
-
-  if (use_object_space) {
-    object_inverse_normal_transform(kg, sd, &normal_in);
-    object_inverse_dir_transform(kg, sd, &dPdx);
-    object_inverse_dir_transform(kg, sd, &dPdy);
-  }
-
-  /* get surface tangents from normal */
-  float3 Rx = cross(dPdy, normal_in);
-  float3 Ry = cross(normal_in, dPdx);
-
-  /* get bump values */
-  uint c_offset, x_offset, y_offset, strength_offset;
-  svm_unpack_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
-
-  float h_c = stack_load_float(stack, c_offset);
-  float h_x = stack_load_float(stack, x_offset);
-  float h_y = stack_load_float(stack, y_offset);
-
-  /* compute surface gradient and determinant */
-  float det = dot(dPdx, Rx);
-  float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry;
-
-  float absdet = fabsf(det);
-
-  float strength = stack_load_float(stack, strength_offset);
-  float scale = stack_load_float(stack, scale_offset);
-
-  if (invert)
-    scale *= -1.0f;
-
-  strength = max(strength, 0.0f);
-
-  /* compute and output perturbed normal */
-  float3 normal_out = safe_normalize(absdet * normal_in - scale * signf(det) * surfgrad);
-  if (is_zero(normal_out)) {
-    normal_out = normal_in;
-  }
-  else {
-    normal_out = normalize(strength * normal_out + (1.0f - strength) * normal_in);
-  }
-
-  if (use_object_space) {
-    object_normal_transform(kg, sd, &normal_out);
-  }
-
-  normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out);
-
-  stack_store_float3(stack, node.w, normal_out);
-#endif
-}
-
-/* Displacement Node */
-
-ccl_device void svm_node_set_displacement(KernelGlobals kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint fac_offset)
-{
-  float3 dP = stack_load_float3(stack, fac_offset);
-  sd->P += dP;
-}
-
-ccl_device_noinline void svm_node_displacement(KernelGlobals kg,
-                                               ccl_private ShaderData *sd,
-                                               ccl_private float *stack,
-                                               uint4 node)
-{
-  uint height_offset, midlevel_offset, scale_offset, normal_offset;
-  svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
-
-  float height = stack_load_float(stack, height_offset);
-  float midlevel = stack_load_float(stack, midlevel_offset);
-  float scale = stack_load_float(stack, scale_offset);
-  float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
-  uint space = node.w;
-
-  float3 dP = normal;
-
-  if (space == NODE_NORMAL_MAP_OBJECT) {
-    /* Object space. */
-    object_inverse_normal_transform(kg, sd, &dP);
-    dP *= (height - midlevel) * scale;
-    object_dir_transform(kg, sd, &dP);
-  }
-  else {
-    /* World space. */
-    dP *= (height - midlevel) * scale;
-  }
-
-  stack_store_float3(stack, node.z, dP);
-}
-
-ccl_device_noinline int svm_node_vector_displacement(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint4 data_node = read_node(kg, &offset);
-  uint space = data_node.x;
-
-  uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
-  svm_unpack_node_uchar4(
-      node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
-
-  float3 vector = stack_load_float3(stack, vector_offset);
-  float midlevel = stack_load_float(stack, midlevel_offset);
-  float scale = stack_load_float(stack, scale_offset);
-  float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale;
-
-  if (space == NODE_NORMAL_MAP_TANGENT) {
-    /* Tangent space. */
-    float3 normal = sd->N;
-    object_inverse_normal_transform(kg, sd, &normal);
-
-    const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
-    float3 tangent;
-    if (attr.offset != ATTR_STD_NOT_FOUND) {
-      tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
-    }
-    else {
-      tangent = normalize(sd->dPdu);
-    }
-
-    float3 bitangent = normalize(cross(normal, tangent));
-    const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
-    if (attr_sign.offset != ATTR_STD_NOT_FOUND) {
-      float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
-      bitangent *= sign;
-    }
-
-    dP = tangent * dP.x + normal * dP.y + bitangent * dP.z;
-  }
-
-  if (space != NODE_NORMAL_MAP_WORLD) {
-    /* Tangent or object space. */
-    object_dir_transform(kg, sd, &dP);
-  }
-
-  stack_store_float3(stack, displacement_offset, dP);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fractal_noise.h b/intern/cycles/kernel/svm/svm_fractal_noise.h
deleted file mode 100644
index 57fa8c690ac..00000000000
--- a/intern/cycles/kernel/svm/svm_fractal_noise.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
-ccl_device_noinline float fractal_noise_1d(float p, float octaves, float roughness)
-{
-  float fscale = 1.0f;
-  float amp = 1.0f;
-  float maxamp = 0.0f;
-  float sum = 0.0f;
-  octaves = clamp(octaves, 0.0f, 16.0f);
-  int n = float_to_int(octaves);
-  for (int i = 0; i <= n; i++) {
-    float t = noise_1d(fscale * p);
-    sum += t * amp;
-    maxamp += amp;
-    amp *= clamp(roughness, 0.0f, 1.0f);
-    fscale *= 2.0f;
-  }
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float t = noise_1d(fscale * p);
-    float sum2 = sum + t * amp;
-    sum /= maxamp;
-    sum2 /= maxamp + amp;
-    return (1.0f - rmd) * sum + rmd * sum2;
-  }
-  else {
-    return sum / maxamp;
-  }
-}
-
-/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
-ccl_device_noinline float fractal_noise_2d(float2 p, float octaves, float roughness)
-{
-  float fscale = 1.0f;
-  float amp = 1.0f;
-  float maxamp = 0.0f;
-  float sum = 0.0f;
-  octaves = clamp(octaves, 0.0f, 16.0f);
-  int n = float_to_int(octaves);
-  for (int i = 0; i <= n; i++) {
-    float t = noise_2d(fscale * p);
-    sum += t * amp;
-    maxamp += amp;
-    amp *= clamp(roughness, 0.0f, 1.0f);
-    fscale *= 2.0f;
-  }
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float t = noise_2d(fscale * p);
-    float sum2 = sum + t * amp;
-    sum /= maxamp;
-    sum2 /= maxamp + amp;
-    return (1.0f - rmd) * sum + rmd * sum2;
-  }
-  else {
-    return sum / maxamp;
-  }
-}
-
-/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
-ccl_device_noinline float fractal_noise_3d(float3 p, float octaves, float roughness)
-{
-  float fscale = 1.0f;
-  float amp = 1.0f;
-  float maxamp = 0.0f;
-  float sum = 0.0f;
-  octaves = clamp(octaves, 0.0f, 16.0f);
-  int n = float_to_int(octaves);
-  for (int i = 0; i <= n; i++) {
-    float t = noise_3d(fscale * p);
-    sum += t * amp;
-    maxamp += amp;
-    amp *= clamp(roughness, 0.0f, 1.0f);
-    fscale *= 2.0f;
-  }
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float t = noise_3d(fscale * p);
-    float sum2 = sum + t * amp;
-    sum /= maxamp;
-    sum2 /= maxamp + amp;
-    return (1.0f - rmd) * sum + rmd * sum2;
-  }
-  else {
-    return sum / maxamp;
-  }
-}
-
-/* The fractal_noise_[1-4] functions are all exactly the same except for the input type. */
-ccl_device_noinline float fractal_noise_4d(float4 p, float octaves, float roughness)
-{
-  float fscale = 1.0f;
-  float amp = 1.0f;
-  float maxamp = 0.0f;
-  float sum = 0.0f;
-  octaves = clamp(octaves, 0.0f, 16.0f);
-  int n = float_to_int(octaves);
-  for (int i = 0; i <= n; i++) {
-    float t = noise_4d(fscale * p);
-    sum += t * amp;
-    maxamp += amp;
-    amp *= clamp(roughness, 0.0f, 1.0f);
-    fscale *= 2.0f;
-  }
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float t = noise_4d(fscale * p);
-    float sum2 = sum + t * amp;
-    sum /= maxamp;
-    sum2 /= maxamp + amp;
-    return (1.0f - rmd) * sum + rmd * sum2;
-  }
-  else {
-    return sum / maxamp;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
deleted file mode 100644
index 449ec84370f..00000000000
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Fresnel Node */
-
-ccl_device_noinline void svm_node_fresnel(ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint ior_offset,
-                                          uint ior_value,
-                                          uint node)
-{
-  uint normal_offset, out_offset;
-  svm_unpack_node_uchar2(node, &normal_offset, &out_offset);
-  float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) :
-                                          __uint_as_float(ior_value);
-  float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
-
-  eta = fmaxf(eta, 1e-5f);
-  eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
-
-  float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
-
-  stack_store_float(stack, out_offset, f);
-}
-
-/* Layer Weight Node */
-
-ccl_device_noinline void svm_node_layer_weight(ccl_private ShaderData *sd,
-                                               ccl_private float *stack,
-                                               uint4 node)
-{
-  uint blend_offset = node.y;
-  uint blend_value = node.z;
-
-  uint type, normal_offset, out_offset;
-  svm_unpack_node_uchar3(node.w, &type, &normal_offset, &out_offset);
-
-  float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) :
-                                              __uint_as_float(blend_value);
-  float3 normal_in = (stack_valid(normal_offset)) ? stack_load_float3(stack, normal_offset) :
-                                                    sd->N;
-
-  float f;
-
-  if (type == NODE_LAYER_WEIGHT_FRESNEL) {
-    float eta = fmaxf(1.0f - blend, 1e-5f);
-    eta = (sd->flag & SD_BACKFACING) ? eta : 1.0f / eta;
-
-    f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta);
-  }
-  else {
-    f = fabsf(dot(sd->I, normal_in));
-
-    if (blend != 0.5f) {
-      blend = clamp(blend, 0.0f, 1.0f - 1e-5f);
-      blend = (blend < 0.5f) ? 2.0f * blend : 0.5f / (1.0f - blend);
-
-      f = powf(f, blend);
-    }
-
-    f = 1.0f - f;
-  }
-
-  stack_store_float(stack, out_offset, f);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h
deleted file mode 100644
index 7ec6c31065d..00000000000
--- a/intern/cycles/kernel/svm/svm_gamma.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_gamma(ccl_private ShaderData *sd,
-                                        ccl_private float *stack,
-                                        uint in_gamma,
-                                        uint in_color,
-                                        uint out_color)
-{
-  float3 color = stack_load_float3(stack, in_color);
-  float gamma = stack_load_float(stack, in_gamma);
-
-  color = svm_math_gamma_color(color, gamma);
-
-  if (stack_valid(out_color))
-    stack_store_float3(stack, out_color, color);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
deleted file mode 100644
index b29bfdbed07..00000000000
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Geometry Node */
-
-ccl_device_noinline void svm_node_geometry(KernelGlobals kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint type,
-                                           uint out_offset)
-{
-  float3 data;
-
-  switch (type) {
-    case NODE_GEOM_P:
-      data = sd->P;
-      break;
-    case NODE_GEOM_N:
-      data = sd->N;
-      break;
-#ifdef __DPDU__
-    case NODE_GEOM_T:
-      data = primitive_tangent(kg, sd);
-      break;
-#endif
-    case NODE_GEOM_I:
-      data = sd->I;
-      break;
-    case NODE_GEOM_Ng:
-      data = sd->Ng;
-      break;
-    case NODE_GEOM_uv:
-      data = make_float3(sd->u, sd->v, 0.0f);
-      break;
-    default:
-      data = make_float3(0.0f, 0.0f, 0.0f);
-  }
-
-  stack_store_float3(stack, out_offset, data);
-}
-
-ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg,
-                                                   ccl_private ShaderData *sd,
-                                                   ccl_private float *stack,
-                                                   uint type,
-                                                   uint out_offset)
-{
-#ifdef __RAY_DIFFERENTIALS__
-  float3 data;
-
-  switch (type) {
-    case NODE_GEOM_P:
-      data = sd->P + sd->dP.dx;
-      break;
-    case NODE_GEOM_uv:
-      data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f);
-      break;
-    default:
-      svm_node_geometry(kg, sd, stack, type, out_offset);
-      return;
-  }
-
-  stack_store_float3(stack, out_offset, data);
-#else
-  svm_node_geometry(kg, sd, stack, type, out_offset);
-#endif
-}
-
-ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg,
-                                                   ccl_private ShaderData *sd,
-                                                   ccl_private float *stack,
-                                                   uint type,
-                                                   uint out_offset)
-{
-#ifdef __RAY_DIFFERENTIALS__
-  float3 data;
-
-  switch (type) {
-    case NODE_GEOM_P:
-      data = sd->P + sd->dP.dy;
-      break;
-    case NODE_GEOM_uv:
-      data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f);
-      break;
-    default:
-      svm_node_geometry(kg, sd, stack, type, out_offset);
-      return;
-  }
-
-  stack_store_float3(stack, out_offset, data);
-#else
-  svm_node_geometry(kg, sd, stack, type, out_offset);
-#endif
-}
-
-/* Object Info */
-
-ccl_device_noinline void svm_node_object_info(KernelGlobals kg,
-                                              ccl_private ShaderData *sd,
-                                              ccl_private float *stack,
-                                              uint type,
-                                              uint out_offset)
-{
-  float data;
-
-  switch (type) {
-    case NODE_INFO_OB_LOCATION: {
-      stack_store_float3(stack, out_offset, object_location(kg, sd));
-      return;
-    }
-    case NODE_INFO_OB_COLOR: {
-      stack_store_float3(stack, out_offset, object_color(kg, sd->object));
-      return;
-    }
-    case NODE_INFO_OB_INDEX:
-      data = object_pass_id(kg, sd->object);
-      break;
-    case NODE_INFO_MAT_INDEX:
-      data = shader_pass_id(kg, sd);
-      break;
-    case NODE_INFO_OB_RANDOM: {
-      if (sd->lamp != LAMP_NONE) {
-        data = lamp_random_number(kg, sd->lamp);
-      }
-      else {
-        data = object_random_number(kg, sd->object);
-      }
-      break;
-    }
-    default:
-      data = 0.0f;
-      break;
-  }
-
-  stack_store_float(stack, out_offset, data);
-}
-
-/* Particle Info */
-
-ccl_device_noinline void svm_node_particle_info(KernelGlobals kg,
-                                                ccl_private ShaderData *sd,
-                                                ccl_private float *stack,
-                                                uint type,
-                                                uint out_offset)
-{
-  switch (type) {
-    case NODE_INFO_PAR_INDEX: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float(stack, out_offset, particle_index(kg, particle_id));
-      break;
-    }
-    case NODE_INFO_PAR_RANDOM: {
-      int particle_id = object_particle_id(kg, sd->object);
-      float random = hash_uint2_to_float(particle_index(kg, particle_id), 0);
-      stack_store_float(stack, out_offset, random);
-      break;
-    }
-    case NODE_INFO_PAR_AGE: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float(stack, out_offset, particle_age(kg, particle_id));
-      break;
-    }
-    case NODE_INFO_PAR_LIFETIME: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
-      break;
-    }
-    case NODE_INFO_PAR_LOCATION: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
-      break;
-    }
-#if 0 /* XXX float4 currently not supported in SVM stack */
-    case NODE_INFO_PAR_ROTATION: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
-      break;
-    }
-#endif
-    case NODE_INFO_PAR_SIZE: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float(stack, out_offset, particle_size(kg, particle_id));
-      break;
-    }
-    case NODE_INFO_PAR_VELOCITY: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
-      break;
-    }
-    case NODE_INFO_PAR_ANGULAR_VELOCITY: {
-      int particle_id = object_particle_id(kg, sd->object);
-      stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
-      break;
-    }
-  }
-}
-
-#ifdef __HAIR__
-
-/* Hair Info */
-
-ccl_device_noinline void svm_node_hair_info(KernelGlobals kg,
-                                            ccl_private ShaderData *sd,
-                                            ccl_private float *stack,
-                                            uint type,
-                                            uint out_offset)
-{
-  float data;
-  float3 data3;
-
-  switch (type) {
-    case NODE_INFO_CURVE_IS_STRAND: {
-      data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
-      stack_store_float(stack, out_offset, data);
-      break;
-    }
-    case NODE_INFO_CURVE_INTERCEPT:
-      break; /* handled as attribute */
-    case NODE_INFO_CURVE_LENGTH:
-      break; /* handled as attribute */
-    case NODE_INFO_CURVE_RANDOM:
-      break; /* handled as attribute */
-    case NODE_INFO_CURVE_THICKNESS: {
-      data = curve_thickness(kg, sd);
-      stack_store_float(stack, out_offset, data);
-      break;
-    }
-#  if 0
-    case NODE_INFO_CURVE_FADE: {
-      data = sd->curve_transparency;
-      stack_store_float(stack, out_offset, data);
-      break;
-    }
-#  endif
-    case NODE_INFO_CURVE_TANGENT_NORMAL: {
-      data3 = curve_tangent_normal(kg, sd);
-      stack_store_float3(stack, out_offset, data3);
-      break;
-    }
-  }
-}
-#endif
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
deleted file mode 100644
index 8cc37be606f..00000000000
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Gradient */
-
-ccl_device float svm_gradient(float3 p, NodeGradientType type)
-{
-  float x, y, z;
-
-  x = p.x;
-  y = p.y;
-  z = p.z;
-
-  if (type == NODE_BLEND_LINEAR) {
-    return x;
-  }
-  else if (type == NODE_BLEND_QUADRATIC) {
-    float r = fmaxf(x, 0.0f);
-    return r * r;
-  }
-  else if (type == NODE_BLEND_EASING) {
-    float r = fminf(fmaxf(x, 0.0f), 1.0f);
-    float t = r * r;
-
-    return (3.0f * t - 2.0f * t * r);
-  }
-  else if (type == NODE_BLEND_DIAGONAL) {
-    return (x + y) * 0.5f;
-  }
-  else if (type == NODE_BLEND_RADIAL) {
-    return atan2f(y, x) / M_2PI_F + 0.5f;
-  }
-  else {
-    /* Bias a little bit for the case where p is a unit length vector,
-     * to get exactly zero instead of a small random value depending
-     * on float precision. */
-    float r = fmaxf(0.999999f - sqrtf(x * x + y * y + z * z), 0.0f);
-
-    if (type == NODE_BLEND_QUADRATIC_SPHERE)
-      return r * r;
-    else if (type == NODE_BLEND_SPHERICAL)
-      return r;
-  }
-
-  return 0.0f;
-}
-
-ccl_device_noinline void svm_node_tex_gradient(ccl_private ShaderData *sd,
-                                               ccl_private float *stack,
-                                               uint4 node)
-{
-  uint type, co_offset, color_offset, fac_offset;
-
-  svm_unpack_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
-
-  float3 co = stack_load_float3(stack, co_offset);
-
-  float f = svm_gradient(co, (NodeGradientType)type);
-  f = saturate(f);
-
-  if (stack_valid(fac_offset))
-    stack_store_float(stack, fac_offset, f);
-  if (stack_valid(color_offset))
-    stack_store_float3(stack, color_offset, make_float3(f, f, f));
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
deleted file mode 100644
index 978c4c2d781..00000000000
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SVM_HSV_H__
-#define __SVM_HSV_H__
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_hsv(KernelGlobals kg,
-                                      ccl_private ShaderData *sd,
-                                      ccl_private float *stack,
-                                      uint4 node)
-{
-  uint in_color_offset, fac_offset, out_color_offset;
-  uint hue_offset, sat_offset, val_offset;
-  svm_unpack_node_uchar3(node.y, &in_color_offset, &fac_offset, &out_color_offset);
-  svm_unpack_node_uchar3(node.z, &hue_offset, &sat_offset, &val_offset);
-
-  float fac = stack_load_float(stack, fac_offset);
-  float3 in_color = stack_load_float3(stack, in_color_offset);
-  float3 color = in_color;
-
-  float hue = stack_load_float(stack, hue_offset);
-  float sat = stack_load_float(stack, sat_offset);
-  float val = stack_load_float(stack, val_offset);
-
-  color = rgb_to_hsv(color);
-
-  /* Remember: `fmodf` doesn't work for negative numbers here. */
-  color.x = fmodf(color.x + hue + 0.5f, 1.0f);
-  color.y = saturate(color.y * sat);
-  color.z *= val;
-
-  color = hsv_to_rgb(color);
-
-  color.x = fac * color.x + (1.0f - fac) * in_color.x;
-  color.y = fac * color.y + (1.0f - fac) * in_color.y;
-  color.z = fac * color.z + (1.0f - fac) * in_color.z;
-
-  /* Clamp color to prevent negative values caused by over saturation. */
-  color.x = max(color.x, 0.0f);
-  color.y = max(color.y, 0.0f);
-  color.z = max(color.z, 0.0f);
-
-  if (stack_valid(out_color_offset))
-    stack_store_float3(stack, out_color_offset, color);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __SVM_HSV_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
deleted file mode 100644
index 0215670d062..00000000000
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* IES Light */
-
-ccl_device_inline float interpolate_ies_vertical(
-    KernelGlobals kg, int ofs, int v, int v_num, float v_frac, int h)
-{
-  /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end
-   * of v (corresponding to the north pole) would result in artifacts. The proper way of dealing
-   * with this would be to lookup the corresponding value on the other side of the pole, but since
-   * the horizontal coordinates might be nonuniform, this would require yet another interpolation.
-   * Therefore, the assumption is made that the light is going to be symmetrical, which means that
-   * we can just take the corresponding value at the current horizontal coordinate. */
-
-#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v))
-  /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */
-  float a = IES_LOOKUP((v == 0) ? 1 : v - 1);
-  float b = IES_LOOKUP(v);
-  float c = IES_LOOKUP(v + 1);
-  float d = IES_LOOKUP(min(v + 2, v_num - 1));
-#undef IES_LOOKUP
-
-  return cubic_interp(a, b, c, d, v_frac);
-}
-
-ccl_device_inline float kernel_ies_interp(KernelGlobals kg, int slot, float h_angle, float v_angle)
-{
-  /* Find offset of the IES data in the table. */
-  int ofs = __float_as_int(kernel_tex_fetch(__ies, slot));
-  if (ofs == -1) {
-    return 100.0f;
-  }
-
-  int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
-  int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++));
-
-#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs + (h))
-#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs + h_num + (v))
-
-  /* Check whether the angle is within the bounds of the IES texture. */
-  if (v_angle >= IES_LOOKUP_ANGLE_V(v_num - 1)) {
-    return 0.0f;
-  }
-  kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0));
-  kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0));
-  kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num - 1));
-
-  /* Lookup the angles to find the table position. */
-  int h_i, v_i;
-  /* TODO(lukas): Consider using bisection.
-   * Probably not worth it for the vast majority of IES files. */
-  for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++)
-    ;
-  for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++)
-    ;
-
-  float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i + 1), h_angle);
-  float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i + 1), v_angle);
-
-#undef IES_LOOKUP_ANGLE_H
-#undef IES_LOOKUP_ANGLE_V
-
-  /* Skip forward to the actual intensity data. */
-  ofs += h_num + v_num;
-
-  /* Perform cubic interpolation along the horizontal coordinate to get the intensity value.
-   * If h_i is zero, just wrap around since the horizontal angles always go over the full circle.
-   * However, the last entry (360°) equals the first one, so we need to wrap around to the one
-   * before that. */
-  float a = interpolate_ies_vertical(
-      kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1);
-  float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i);
-  float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i + 1);
-  /* Same logic here, wrap around to the second element if necessary. */
-  float d = interpolate_ies_vertical(
-      kg, ofs, v_i, v_num, v_frac, (h_i + 2 == h_num) ? 1 : h_i + 2);
-
-  /* Cubic interpolation can result in negative values, so get rid of them. */
-  return max(cubic_interp(a, b, c, d, h_frac), 0.0f);
-}
-
-ccl_device_noinline void svm_node_ies(KernelGlobals kg,
-                                      ccl_private ShaderData *sd,
-                                      ccl_private float *stack,
-                                      uint4 node)
-{
-  uint vector_offset, strength_offset, fac_offset, slot = node.z;
-  svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset);
-
-  float3 vector = stack_load_float3(stack, vector_offset);
-  float strength = stack_load_float_default(stack, strength_offset, node.w);
-
-  vector = normalize(vector);
-  float v_angle = safe_acosf(-vector.z);
-  float h_angle = atan2f(vector.x, vector.y) + M_PI_F;
-
-  float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle);
-
-  if (stack_valid(fac_offset)) {
-    stack_store_float(stack, fac_offset, fac);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
deleted file mode 100644
index 68374fcfb0d..00000000000
--- a/intern/cycles/kernel/svm/svm_image.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float4 svm_image_texture(KernelGlobals kg, int id, float x, float y, uint flags)
-{
-  if (id == -1) {
-    return make_float4(
-        TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
-  }
-
-  float4 r = kernel_tex_image_interp(kg, id, x, y);
-  const float alpha = r.w;
-
-  if ((flags & NODE_IMAGE_ALPHA_UNASSOCIATE) && alpha != 1.0f && alpha != 0.0f) {
-    r /= alpha;
-    r.w = alpha;
-  }
-
-  if (flags & NODE_IMAGE_COMPRESS_AS_SRGB) {
-    r = color_srgb_to_linear_v4(r);
-  }
-
-  return r;
-}
-
-/* Remap coordinate from 0..1 box to -1..-1 */
-ccl_device_inline float3 texco_remap_square(float3 co)
-{
-  return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
-}
-
-ccl_device_noinline int svm_node_tex_image(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint co_offset, out_offset, alpha_offset, flags;
-
-  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
-
-  float3 co = stack_load_float3(stack, co_offset);
-  float2 tex_co;
-  if (node.w == NODE_IMAGE_PROJ_SPHERE) {
-    co = texco_remap_square(co);
-    tex_co = map_to_sphere(co);
-  }
-  else if (node.w == NODE_IMAGE_PROJ_TUBE) {
-    co = texco_remap_square(co);
-    tex_co = map_to_tube(co);
-  }
-  else {
-    tex_co = make_float2(co.x, co.y);
-  }
-
-  /* TODO(lukas): Consider moving tile information out of the SVM node.
-   * TextureInfo seems a reasonable candidate. */
-  int id = -1;
-  int num_nodes = (int)node.y;
-  if (num_nodes > 0) {
-    /* Remember the offset of the node following the tile nodes. */
-    int next_offset = offset + num_nodes;
-
-    /* Find the tile that the UV lies in. */
-    int tx = (int)tex_co.x;
-    int ty = (int)tex_co.y;
-
-    /* Check that we're within a legitimate tile. */
-    if (tx >= 0 && ty >= 0 && tx < 10) {
-      int tile = 1001 + 10 * ty + tx;
-
-      /* Find the index of the tile. */
-      for (int i = 0; i < num_nodes; i++) {
-        uint4 tile_node = read_node(kg, &offset);
-        if (tile_node.x == tile) {
-          id = tile_node.y;
-          break;
-        }
-        if (tile_node.z == tile) {
-          id = tile_node.w;
-          break;
-        }
-      }
-
-      /* If we found the tile, offset the UVs to be relative to it. */
-      if (id != -1) {
-        tex_co.x -= tx;
-        tex_co.y -= ty;
-      }
-    }
-
-    /* Skip over the remaining nodes. */
-    offset = next_offset;
-  }
-  else {
-    id = -num_nodes;
-  }
-
-  float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, flags);
-
-  if (stack_valid(out_offset))
-    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
-  if (stack_valid(alpha_offset))
-    stack_store_float(stack, alpha_offset, f.w);
-  return offset;
-}
-
-ccl_device_noinline void svm_node_tex_image_box(KernelGlobals kg,
-                                                ccl_private ShaderData *sd,
-                                                ccl_private float *stack,
-                                                uint4 node)
-{
-  /* get object space normal */
-  float3 N = sd->N;
-
-  N = sd->N;
-  object_inverse_normal_transform(kg, sd, &N);
-
-  /* project from direction vector to barycentric coordinates in triangles */
-  float3 signed_N = N;
-
-  N.x = fabsf(N.x);
-  N.y = fabsf(N.y);
-  N.z = fabsf(N.z);
-
-  N /= (N.x + N.y + N.z);
-
-  /* basic idea is to think of this as a triangle, each corner representing
-   * one of the 3 faces of the cube. in the corners we have single textures,
-   * in between we blend between two textures, and in the middle we a blend
-   * between three textures.
-   *
-   * The `Nxyz` values are the barycentric coordinates in an equilateral
-   * triangle, which in case of blending, in the middle has a smaller
-   * equilateral triangle where 3 textures blend. this divides things into
-   * 7 zones, with an if() test for each zone. */
-
-  float3 weight = make_float3(0.0f, 0.0f, 0.0f);
-  float blend = __int_as_float(node.w);
-  float limit = 0.5f * (1.0f + blend);
-
-  /* first test for corners with single texture */
-  if (N.x > limit * (N.x + N.y) && N.x > limit * (N.x + N.z)) {
-    weight.x = 1.0f;
-  }
-  else if (N.y > limit * (N.x + N.y) && N.y > limit * (N.y + N.z)) {
-    weight.y = 1.0f;
-  }
-  else if (N.z > limit * (N.x + N.z) && N.z > limit * (N.y + N.z)) {
-    weight.z = 1.0f;
-  }
-  else if (blend > 0.0f) {
-    /* in case of blending, test for mixes between two textures */
-    if (N.z < (1.0f - limit) * (N.y + N.x)) {
-      weight.x = N.x / (N.x + N.y);
-      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
-      weight.y = 1.0f - weight.x;
-    }
-    else if (N.x < (1.0f - limit) * (N.y + N.z)) {
-      weight.y = N.y / (N.y + N.z);
-      weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend);
-      weight.z = 1.0f - weight.y;
-    }
-    else if (N.y < (1.0f - limit) * (N.x + N.z)) {
-      weight.x = N.x / (N.x + N.z);
-      weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend);
-      weight.z = 1.0f - weight.x;
-    }
-    else {
-      /* last case, we have a mix between three */
-      weight.x = ((2.0f - limit) * N.x + (limit - 1.0f)) / (2.0f * limit - 1.0f);
-      weight.y = ((2.0f - limit) * N.y + (limit - 1.0f)) / (2.0f * limit - 1.0f);
-      weight.z = ((2.0f - limit) * N.z + (limit - 1.0f)) / (2.0f * limit - 1.0f);
-    }
-  }
-  else {
-    /* Desperate mode, no valid choice anyway, fallback to one side. */
-    weight.x = 1.0f;
-  }
-
-  /* now fetch textures */
-  uint co_offset, out_offset, alpha_offset, flags;
-  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
-
-  float3 co = stack_load_float3(stack, co_offset);
-  uint id = node.y;
-
-  float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-  /* Map so that no textures are flipped, rotation is somewhat arbitrary. */
-  if (weight.x > 0.0f) {
-    float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z);
-    f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, flags);
-  }
-  if (weight.y > 0.0f) {
-    float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z);
-    f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, flags);
-  }
-  if (weight.z > 0.0f) {
-    float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x);
-    f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, flags);
-  }
-
-  if (stack_valid(out_offset))
-    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
-  if (stack_valid(alpha_offset))
-    stack_store_float(stack, alpha_offset, f.w);
-}
-
-ccl_device_noinline void svm_node_tex_environment(KernelGlobals kg,
-                                                  ccl_private ShaderData *sd,
-                                                  ccl_private float *stack,
-                                                  uint4 node)
-{
-  uint id = node.y;
-  uint co_offset, out_offset, alpha_offset, flags;
-  uint projection = node.w;
-
-  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
-
-  float3 co = stack_load_float3(stack, co_offset);
-  float2 uv;
-
-  co = safe_normalize(co);
-
-  if (projection == 0)
-    uv = direction_to_equirectangular(co);
-  else
-    uv = direction_to_mirrorball(co);
-
-  float4 f = svm_image_texture(kg, id, uv.x, uv.y, flags);
-
-  if (stack_valid(out_offset))
-    stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
-  if (stack_valid(alpha_offset))
-    stack_store_float(stack, alpha_offset, f.w);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h
deleted file mode 100644
index 60668ec00f1..00000000000
--- a/intern/cycles/kernel/svm/svm_invert.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float invert(float color, float factor)
-{
-  return factor * (1.0f - color) + (1.0f - factor) * color;
-}
-
-ccl_device_noinline void svm_node_invert(ccl_private ShaderData *sd,
-                                         ccl_private float *stack,
-                                         uint in_fac,
-                                         uint in_color,
-                                         uint out_color)
-{
-  float factor = stack_load_float(stack, in_fac);
-  float3 color = stack_load_float3(stack, in_color);
-
-  color.x = invert(color.x, factor);
-  color.y = invert(color.y, factor);
-  color.z = invert(color.z, factor);
-
-  if (stack_valid(out_color))
-    stack_store_float3(stack, out_color, color);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
deleted file mode 100644
index 5e1fc4f671c..00000000000
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Light Path Node */
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-ccl_device_noinline void svm_node_light_path(KernelGlobals kg,
-                                             ConstIntegratorGenericState state,
-                                             ccl_private const ShaderData *sd,
-                                             ccl_private float *stack,
-                                             uint type,
-                                             uint out_offset,
-                                             uint32_t path_flag)
-{
-  float info = 0.0f;
-
-  switch (type) {
-    case NODE_LP_camera:
-      info = (path_flag & PATH_RAY_CAMERA) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_shadow:
-      info = (path_flag & PATH_RAY_SHADOW) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_diffuse:
-      info = (path_flag & PATH_RAY_DIFFUSE) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_glossy:
-      info = (path_flag & PATH_RAY_GLOSSY) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_singular:
-      info = (path_flag & PATH_RAY_SINGULAR) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_reflection:
-      info = (path_flag & PATH_RAY_REFLECT) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_transmission:
-      info = (path_flag & PATH_RAY_TRANSMIT) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_volume_scatter:
-      info = (path_flag & PATH_RAY_VOLUME_SCATTER) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_backfacing:
-      info = (sd->flag & SD_BACKFACING) ? 1.0f : 0.0f;
-      break;
-    case NODE_LP_ray_length:
-      info = sd->ray_length;
-      break;
-    case NODE_LP_ray_depth: {
-      /* Read bounce from difference location depending if this is a shadow
-       * path. It's a bit dubious to have integrate state details leak into
-       * this function but hard to avoid currently. */
-      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
-      {
-        info = (float)integrator_state_bounce(state, path_flag);
-      }
-
-      /* For background, light emission and shadow evaluation we from a
-       * surface or volume we are effective one bounce further. */
-      if (path_flag & (PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
-        info += 1.0f;
-      }
-      break;
-    }
-    case NODE_LP_ray_transparent: {
-      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
-      {
-        info = (float)integrator_state_transparent_bounce(state, path_flag);
-      }
-      break;
-    }
-    case NODE_LP_ray_diffuse:
-      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
-      {
-        info = (float)integrator_state_diffuse_bounce(state, path_flag);
-      }
-      break;
-    case NODE_LP_ray_glossy:
-      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
-      {
-        info = (float)integrator_state_glossy_bounce(state, path_flag);
-      }
-      break;
-    case NODE_LP_ray_transmission:
-      IF_KERNEL_NODES_FEATURE(LIGHT_PATH)
-      {
-        info = (float)integrator_state_transmission_bounce(state, path_flag);
-      }
-      break;
-  }
-
-  stack_store_float(stack, out_offset, info);
-}
-
-/* Light Falloff Node */
-
-ccl_device_noinline void svm_node_light_falloff(ccl_private ShaderData *sd,
-                                                ccl_private float *stack,
-                                                uint4 node)
-{
-  uint strength_offset, out_offset, smooth_offset;
-
-  svm_unpack_node_uchar3(node.z, &strength_offset, &smooth_offset, &out_offset);
-
-  float strength = stack_load_float(stack, strength_offset);
-  uint type = node.y;
-
-  switch (type) {
-    case NODE_LIGHT_FALLOFF_QUADRATIC:
-      break;
-    case NODE_LIGHT_FALLOFF_LINEAR:
-      strength *= sd->ray_length;
-      break;
-    case NODE_LIGHT_FALLOFF_CONSTANT:
-      strength *= sd->ray_length * sd->ray_length;
-      break;
-  }
-
-  float smooth = stack_load_float(stack, smooth_offset);
-
-  if (smooth > 0.0f) {
-    float squared = sd->ray_length * sd->ray_length;
-    /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */
-    if (isfinite(squared)) {
-      strength *= squared / (smooth + squared);
-    }
-  }
-
-  stack_store_float(stack, out_offset, strength);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
deleted file mode 100644
index d3a429fec56..00000000000
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Magic */
-
-ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
-{
-  float x = sinf((p.x + p.y + p.z) * 5.0f);
-  float y = cosf((-p.x + p.y - p.z) * 5.0f);
-  float z = -cosf((-p.x - p.y + p.z) * 5.0f);
-
-  if (n > 0) {
-    x *= distortion;
-    y *= distortion;
-    z *= distortion;
-    y = -cosf(x - y + z);
-    y *= distortion;
-
-    if (n > 1) {
-      x = cosf(x - y - z);
-      x *= distortion;
-
-      if (n > 2) {
-        z = sinf(-x - y - z);
-        z *= distortion;
-
-        if (n > 3) {
-          x = -cosf(-x + y - z);
-          x *= distortion;
-
-          if (n > 4) {
-            y = -sinf(-x + y + z);
-            y *= distortion;
-
-            if (n > 5) {
-              y = -cosf(-x + y + z);
-              y *= distortion;
-
-              if (n > 6) {
-                x = cosf(x + y + z);
-                x *= distortion;
-
-                if (n > 7) {
-                  z = sinf(x + y - z);
-                  z *= distortion;
-
-                  if (n > 8) {
-                    x = -cosf(-x - y + z);
-                    x *= distortion;
-
-                    if (n > 9) {
-                      y = -sinf(x - y + z);
-                      y *= distortion;
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  if (distortion != 0.0f) {
-    distortion *= 2.0f;
-    x /= distortion;
-    y /= distortion;
-    z /= distortion;
-  }
-
-  return make_float3(0.5f - x, 0.5f - y, 0.5f - z);
-}
-
-ccl_device_noinline int svm_node_tex_magic(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint depth;
-  uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
-
-  svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset);
-  svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
-
-  uint4 node2 = read_node(kg, &offset);
-  float3 co = stack_load_float3(stack, co_offset);
-  float scale = stack_load_float_default(stack, scale_offset, node2.x);
-  float distortion = stack_load_float_default(stack, distortion_offset, node2.y);
-
-  float3 color = svm_magic(co * scale, depth, distortion);
-
-  if (stack_valid(fac_offset))
-    stack_store_float(stack, fac_offset, average(color));
-  if (stack_valid(color_offset))
-    stack_store_float3(stack, color_offset, color);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
deleted file mode 100644
index 5e89947c6c7..00000000000
--- a/intern/cycles/kernel/svm/svm_map_range.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Map Range Node */
-
-ccl_device_inline float smootherstep(float edge0, float edge1, float x)
-{
-  x = clamp(safe_divide((x - edge0), (edge1 - edge0)), 0.0f, 1.0f);
-  return x * x * x * (x * (x * 6.0f - 15.0f) + 10.0f);
-}
-
-ccl_device_noinline int svm_node_map_range(KernelGlobals kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint value_stack_offset,
-                                           uint parameters_stack_offsets,
-                                           uint results_stack_offsets,
-                                           int offset)
-{
-  uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset;
-  uint type_stack_offset, steps_stack_offset, result_stack_offset;
-  svm_unpack_node_uchar4(parameters_stack_offsets,
-                         &from_min_stack_offset,
-                         &from_max_stack_offset,
-                         &to_min_stack_offset,
-                         &to_max_stack_offset);
-  svm_unpack_node_uchar3(
-      results_stack_offsets, &type_stack_offset, &steps_stack_offset, &result_stack_offset);
-
-  uint4 defaults = read_node(kg, &offset);
-  uint4 defaults2 = read_node(kg, &offset);
-
-  float value = stack_load_float(stack, value_stack_offset);
-  float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x);
-  float from_max = stack_load_float_default(stack, from_max_stack_offset, defaults.y);
-  float to_min = stack_load_float_default(stack, to_min_stack_offset, defaults.z);
-  float to_max = stack_load_float_default(stack, to_max_stack_offset, defaults.w);
-  float steps = stack_load_float_default(stack, steps_stack_offset, defaults2.x);
-
-  float result;
-
-  if (from_max != from_min) {
-    float factor = value;
-    switch (type_stack_offset) {
-      default:
-      case NODE_MAP_RANGE_LINEAR:
-        factor = (value - from_min) / (from_max - from_min);
-        break;
-      case NODE_MAP_RANGE_STEPPED: {
-        factor = (value - from_min) / (from_max - from_min);
-        factor = (steps > 0.0f) ? floorf(factor * (steps + 1.0f)) / steps : 0.0f;
-        break;
-      }
-      case NODE_MAP_RANGE_SMOOTHSTEP: {
-        factor = (from_min > from_max) ? 1.0f - smoothstep(from_max, from_min, factor) :
-                                         smoothstep(from_min, from_max, factor);
-        break;
-      }
-      case NODE_MAP_RANGE_SMOOTHERSTEP: {
-        factor = (from_min > from_max) ? 1.0f - smootherstep(from_max, from_min, factor) :
-                                         smootherstep(from_min, from_max, factor);
-        break;
-      }
-    }
-    result = to_min + factor * (to_max - to_min);
-  }
-  else {
-    result = 0.0f;
-  }
-  stack_store_float(stack, result_stack_offset, result);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
deleted file mode 100644
index ed420e5bc3d..00000000000
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Mapping Node */
-
-ccl_device_noinline void svm_node_mapping(KernelGlobals kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint type,
-                                          uint inputs_stack_offsets,
-                                          uint result_stack_offset)
-{
-  uint vector_stack_offset, location_stack_offset, rotation_stack_offset, scale_stack_offset;
-  svm_unpack_node_uchar4(inputs_stack_offsets,
-                         &vector_stack_offset,
-                         &location_stack_offset,
-                         &rotation_stack_offset,
-                         &scale_stack_offset);
-
-  float3 vector = stack_load_float3(stack, vector_stack_offset);
-  float3 location = stack_load_float3(stack, location_stack_offset);
-  float3 rotation = stack_load_float3(stack, rotation_stack_offset);
-  float3 scale = stack_load_float3(stack, scale_stack_offset);
-
-  float3 result = svm_mapping((NodeMappingType)type, vector, location, rotation, scale);
-  stack_store_float3(stack, result_stack_offset, result);
-}
-
-/* Texture Mapping */
-
-ccl_device_noinline int svm_node_texture_mapping(KernelGlobals kg,
-                                                 ccl_private ShaderData *sd,
-                                                 ccl_private float *stack,
-                                                 uint vec_offset,
-                                                 uint out_offset,
-                                                 int offset)
-{
-  float3 v = stack_load_float3(stack, vec_offset);
-
-  Transform tfm;
-  tfm.x = read_node_float(kg, &offset);
-  tfm.y = read_node_float(kg, &offset);
-  tfm.z = read_node_float(kg, &offset);
-
-  float3 r = transform_point(&tfm, v);
-  stack_store_float3(stack, out_offset, r);
-  return offset;
-}
-
-ccl_device_noinline int svm_node_min_max(KernelGlobals kg,
-                                         ccl_private ShaderData *sd,
-                                         ccl_private float *stack,
-                                         uint vec_offset,
-                                         uint out_offset,
-                                         int offset)
-{
-  float3 v = stack_load_float3(stack, vec_offset);
-
-  float3 mn = float4_to_float3(read_node_float(kg, &offset));
-  float3 mx = float4_to_float3(read_node_float(kg, &offset));
-
-  float3 r = min(max(mn, v), mx);
-  stack_store_float3(stack, out_offset, r);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mapping_util.h b/intern/cycles/kernel/svm/svm_mapping_util.h
deleted file mode 100644
index ec2c84e0791..00000000000
--- a/intern/cycles/kernel/svm/svm_mapping_util.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float3
-svm_mapping(NodeMappingType type, float3 vector, float3 location, float3 rotation, float3 scale)
-{
-  Transform rotationTransform = euler_to_transform(rotation);
-  switch (type) {
-    case NODE_MAPPING_TYPE_POINT:
-      return transform_direction(&rotationTransform, (vector * scale)) + location;
-    case NODE_MAPPING_TYPE_TEXTURE:
-      return safe_divide_float3_float3(
-          transform_direction_transposed(&rotationTransform, (vector - location)), scale);
-    case NODE_MAPPING_TYPE_VECTOR:
-      return transform_direction(&rotationTransform, (vector * scale));
-    case NODE_MAPPING_TYPE_NORMAL:
-      return safe_normalize(
-          transform_direction(&rotationTransform, safe_divide_float3_float3(vector, scale)));
-    default:
-      return make_float3(0.0f, 0.0f, 0.0f);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
deleted file mode 100644
index 97f7d486c09..00000000000
--- a/intern/cycles/kernel/svm/svm_math.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_math(KernelGlobals kg,
-                                       ccl_private ShaderData *sd,
-                                       ccl_private float *stack,
-                                       uint type,
-                                       uint inputs_stack_offsets,
-                                       uint result_stack_offset)
-{
-  uint a_stack_offset, b_stack_offset, c_stack_offset;
-  svm_unpack_node_uchar3(inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &c_stack_offset);
-
-  float a = stack_load_float(stack, a_stack_offset);
-  float b = stack_load_float(stack, b_stack_offset);
-  float c = stack_load_float(stack, c_stack_offset);
-  float result = svm_math((NodeMathType)type, a, b, c);
-
-  stack_store_float(stack, result_stack_offset, result);
-}
-
-ccl_device_noinline int svm_node_vector_math(KernelGlobals kg,
-                                             ccl_private ShaderData *sd,
-                                             ccl_private float *stack,
-                                             uint type,
-                                             uint inputs_stack_offsets,
-                                             uint outputs_stack_offsets,
-                                             int offset)
-{
-  uint value_stack_offset, vector_stack_offset;
-  uint a_stack_offset, b_stack_offset, param1_stack_offset;
-  svm_unpack_node_uchar3(
-      inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &param1_stack_offset);
-  svm_unpack_node_uchar2(outputs_stack_offsets, &value_stack_offset, &vector_stack_offset);
-
-  float3 a = stack_load_float3(stack, a_stack_offset);
-  float3 b = stack_load_float3(stack, b_stack_offset);
-  float3 c = make_float3(0.0f, 0.0f, 0.0f);
-  float param1 = stack_load_float(stack, param1_stack_offset);
-
-  float value;
-  float3 vector;
-
-  /* 3 Vector Operators */
-  if (type == NODE_VECTOR_MATH_WRAP || type == NODE_VECTOR_MATH_FACEFORWARD ||
-      type == NODE_VECTOR_MATH_MULTIPLY_ADD) {
-    uint4 extra_node = read_node(kg, &offset);
-    c = stack_load_float3(stack, extra_node.x);
-  }
-
-  svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, c, param1);
-
-  if (stack_valid(value_stack_offset))
-    stack_store_float(stack, value_stack_offset, value);
-  if (stack_valid(vector_stack_offset))
-    stack_store_float3(stack, vector_stack_offset, vector);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
deleted file mode 100644
index d3225b55ef0..00000000000
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device void svm_vector_math(ccl_private float *value,
-                                ccl_private float3 *vector,
-                                NodeVectorMathType type,
-                                float3 a,
-                                float3 b,
-                                float3 c,
-                                float param1)
-{
-  switch (type) {
-    case NODE_VECTOR_MATH_ADD:
-      *vector = a + b;
-      break;
-    case NODE_VECTOR_MATH_SUBTRACT:
-      *vector = a - b;
-      break;
-    case NODE_VECTOR_MATH_MULTIPLY:
-      *vector = a * b;
-      break;
-    case NODE_VECTOR_MATH_DIVIDE:
-      *vector = safe_divide_float3_float3(a, b);
-      break;
-    case NODE_VECTOR_MATH_CROSS_PRODUCT:
-      *vector = cross(a, b);
-      break;
-    case NODE_VECTOR_MATH_PROJECT:
-      *vector = project(a, b);
-      break;
-    case NODE_VECTOR_MATH_REFLECT:
-      *vector = reflect(a, b);
-      break;
-    case NODE_VECTOR_MATH_REFRACT:
-      *vector = refract(a, normalize(b), param1);
-      break;
-    case NODE_VECTOR_MATH_FACEFORWARD:
-      *vector = faceforward(a, b, c);
-      break;
-    case NODE_VECTOR_MATH_MULTIPLY_ADD:
-      *vector = a * b + c;
-      break;
-    case NODE_VECTOR_MATH_DOT_PRODUCT:
-      *value = dot(a, b);
-      break;
-    case NODE_VECTOR_MATH_DISTANCE:
-      *value = distance(a, b);
-      break;
-    case NODE_VECTOR_MATH_LENGTH:
-      *value = len(a);
-      break;
-    case NODE_VECTOR_MATH_SCALE:
-      *vector = a * param1;
-      break;
-    case NODE_VECTOR_MATH_NORMALIZE:
-      *vector = safe_normalize(a);
-      break;
-    case NODE_VECTOR_MATH_SNAP:
-      *vector = floor(safe_divide_float3_float3(a, b)) * b;
-      break;
-    case NODE_VECTOR_MATH_FLOOR:
-      *vector = floor(a);
-      break;
-    case NODE_VECTOR_MATH_CEIL:
-      *vector = ceil(a);
-      break;
-    case NODE_VECTOR_MATH_MODULO:
-      *vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z));
-      break;
-    case NODE_VECTOR_MATH_WRAP:
-      *vector = make_float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z));
-      break;
-    case NODE_VECTOR_MATH_FRACTION:
-      *vector = a - floor(a);
-      break;
-    case NODE_VECTOR_MATH_ABSOLUTE:
-      *vector = fabs(a);
-      break;
-    case NODE_VECTOR_MATH_MINIMUM:
-      *vector = min(a, b);
-      break;
-    case NODE_VECTOR_MATH_MAXIMUM:
-      *vector = max(a, b);
-      break;
-    case NODE_VECTOR_MATH_SINE:
-      *vector = make_float3(sinf(a.x), sinf(a.y), sinf(a.z));
-      break;
-    case NODE_VECTOR_MATH_COSINE:
-      *vector = make_float3(cosf(a.x), cosf(a.y), cosf(a.z));
-      break;
-    case NODE_VECTOR_MATH_TANGENT:
-      *vector = make_float3(tanf(a.x), tanf(a.y), tanf(a.z));
-      break;
-    default:
-      *vector = zero_float3();
-      *value = 0.0f;
-  }
-}
-
-ccl_device float svm_math(NodeMathType type, float a, float b, float c)
-{
-  switch (type) {
-    case NODE_MATH_ADD:
-      return a + b;
-    case NODE_MATH_SUBTRACT:
-      return a - b;
-    case NODE_MATH_MULTIPLY:
-      return a * b;
-    case NODE_MATH_DIVIDE:
-      return safe_divide(a, b);
-    case NODE_MATH_POWER:
-      return safe_powf(a, b);
-    case NODE_MATH_LOGARITHM:
-      return safe_logf(a, b);
-    case NODE_MATH_SQRT:
-      return safe_sqrtf(a);
-    case NODE_MATH_INV_SQRT:
-      return inversesqrtf(a);
-    case NODE_MATH_ABSOLUTE:
-      return fabsf(a);
-    case NODE_MATH_RADIANS:
-      return a * (M_PI_F / 180.0f);
-    case NODE_MATH_DEGREES:
-      return a * (180.0f / M_PI_F);
-    case NODE_MATH_MINIMUM:
-      return fminf(a, b);
-    case NODE_MATH_MAXIMUM:
-      return fmaxf(a, b);
-    case NODE_MATH_LESS_THAN:
-      return a < b;
-    case NODE_MATH_GREATER_THAN:
-      return a > b;
-    case NODE_MATH_ROUND:
-      return floorf(a + 0.5f);
-    case NODE_MATH_FLOOR:
-      return floorf(a);
-    case NODE_MATH_CEIL:
-      return ceilf(a);
-    case NODE_MATH_FRACTION:
-      return a - floorf(a);
-    case NODE_MATH_MODULO:
-      return safe_modulo(a, b);
-    case NODE_MATH_TRUNC:
-      return a >= 0.0f ? floorf(a) : ceilf(a);
-    case NODE_MATH_SNAP:
-      return floorf(safe_divide(a, b)) * b;
-    case NODE_MATH_WRAP:
-      return wrapf(a, b, c);
-    case NODE_MATH_PINGPONG:
-      return pingpongf(a, b);
-    case NODE_MATH_SINE:
-      return sinf(a);
-    case NODE_MATH_COSINE:
-      return cosf(a);
-    case NODE_MATH_TANGENT:
-      return tanf(a);
-    case NODE_MATH_SINH:
-      return sinhf(a);
-    case NODE_MATH_COSH:
-      return coshf(a);
-    case NODE_MATH_TANH:
-      return tanhf(a);
-    case NODE_MATH_ARCSINE:
-      return safe_asinf(a);
-    case NODE_MATH_ARCCOSINE:
-      return safe_acosf(a);
-    case NODE_MATH_ARCTANGENT:
-      return atanf(a);
-    case NODE_MATH_ARCTAN2:
-      return atan2f(a, b);
-    case NODE_MATH_SIGN:
-      return compatible_signf(a);
-    case NODE_MATH_EXPONENT:
-      return expf(a);
-    case NODE_MATH_COMPARE:
-      return ((a == b) || (fabsf(a - b) <= fmaxf(c, FLT_EPSILON))) ? 1.0f : 0.0f;
-    case NODE_MATH_MULTIPLY_ADD:
-      return a * b + c;
-    case NODE_MATH_SMOOTH_MIN:
-      return smoothminf(a, b, c);
-    case NODE_MATH_SMOOTH_MAX:
-      return -smoothminf(-a, -b, c);
-    default:
-      return 0.0f;
-  }
-}
-
-ccl_device float3 svm_math_blackbody_color(float t)
-{
-  /* TODO(lukas): Reimplement in XYZ. */
-
-  /* Calculate color in range 800..12000 using an approximation
-   * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
-   * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
-   * which is enough to get the same 8 bit/channel color.
-   */
-
-  const float blackbody_table_r[6][3] = {
-      {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f},
-      {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f},
-      {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f},
-      {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f},
-      {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f},
-      {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f},
-  };
-
-  const float blackbody_table_g[6][3] = {
-      {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f},
-      {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f},
-      {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f},
-      {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f},
-      {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f},
-      {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f},
-  };
-
-  const float blackbody_table_b[6][4] = {
-      {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */
-      {0.0f, 0.0f, 0.0f, 0.0f},
-      {0.0f, 0.0f, 0.0f, 0.0f},
-      {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f},
-      {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f},
-      {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f},
-  };
-
-  if (t >= 12000.0f) {
-    return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
-  }
-  else if (t < 965.0f) {
-    /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */
-    return make_float3(4.70366907f, 0.0f, 0.0f);
-  }
-
-  /* Manually align for readability. */
-  /* clang-format off */
-  int i = (t >= 6365.0f) ? 5 :
-          (t >= 3315.0f) ? 4 :
-          (t >= 1902.0f) ? 3 :
-          (t >= 1449.0f) ? 2 :
-          (t >= 1167.0f) ? 1 :
-                           0;
-  /* clang-format on */
-
-  ccl_constant float *r = blackbody_table_r[i];
-  ccl_constant float *g = blackbody_table_g[i];
-  ccl_constant float *b = blackbody_table_b[i];
-
-  const float t_inv = 1.0f / t;
-  return make_float3(r[0] * t_inv + r[1] * t + r[2],
-                     g[0] * t_inv + g[1] * t + g[2],
-                     ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
-}
-
-ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
-{
-  if (gamma == 0.0f)
-    return make_float3(1.0f, 1.0f, 1.0f);
-
-  if (color.x > 0.0f)
-    color.x = powf(color.x, gamma);
-  if (color.y > 0.0f)
-    color.y = powf(color.y, gamma);
-  if (color.z > 0.0f)
-    color.z = powf(color.z, gamma);
-
-  return color;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
deleted file mode 100644
index 568dda3dddc..00000000000
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Node */
-
-ccl_device_noinline int svm_node_mix(KernelGlobals kg,
-                                     ccl_private ShaderData *sd,
-                                     ccl_private float *stack,
-                                     uint fac_offset,
-                                     uint c1_offset,
-                                     uint c2_offset,
-                                     int offset)
-{
-  /* read extra data */
-  uint4 node1 = read_node(kg, &offset);
-
-  float fac = stack_load_float(stack, fac_offset);
-  float3 c1 = stack_load_float3(stack, c1_offset);
-  float3 c2 = stack_load_float3(stack, c2_offset);
-  float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
-
-  stack_store_float3(stack, node1.z, result);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
deleted file mode 100644
index decd29bbe13..00000000000
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ /dev/null
@@ -1,850 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* 1D Musgrave fBm
- *
- * H: fractal increment parameter
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- *
- * from "Texturing and Modelling: A procedural approach"
- */
-
-ccl_device_noinline_cpu float noise_musgrave_fBm_1d(float co,
-                                                    float H,
-                                                    float lacunarity,
-                                                    float octaves)
-{
-  float p = co;
-  float value = 0.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value += snoise_1d(p) * pwr;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * snoise_1d(p) * pwr;
-  }
-
-  return value;
-}
-
-/* 1D Musgrave Multifractal
- *
- * H: highest fractal dimension
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- */
-
-ccl_device_noinline_cpu float noise_musgrave_multi_fractal_1d(float co,
-                                                              float H,
-                                                              float lacunarity,
-                                                              float octaves)
-{
-  float p = co;
-  float value = 1.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value *= (pwr * snoise_1d(p) + 1.0f);
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value *= (rmd * pwr * snoise_1d(p) + 1.0f); /* correct? */
-  }
-
-  return value;
-}
-
-/* 1D Musgrave Heterogeneous Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_1d(
-    float co, float H, float lacunarity, float octaves, float offset)
-{
-  float p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  /* first unscaled octave of function; later octaves are scaled */
-  float value = offset + snoise_1d(p);
-  p *= lacunarity;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    float increment = (snoise_1d(p) + offset) * pwr * value;
-    value += increment;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float increment = (snoise_1d(p) + offset) * pwr * value;
-    value += rmd * increment;
-  }
-
-  return value;
-}
-
-/* 1D Hybrid Additive/Multiplicative Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d(
-    float co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float value = snoise_1d(p) + offset;
-  float weight = gain * value;
-  p *= lacunarity;
-
-  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
-    if (weight > 1.0f) {
-      weight = 1.0f;
-    }
-
-    float signal = (snoise_1d(p) + offset) * pwr;
-    pwr *= pwHL;
-    value += weight * signal;
-    weight *= gain * signal;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * ((snoise_1d(p) + offset) * pwr);
-  }
-
-  return value;
-}
-
-/* 1D Ridged Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_1d(
-    float co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float signal = offset - fabsf(snoise_1d(p));
-  signal *= signal;
-  float value = signal;
-  float weight = 1.0f;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    p *= lacunarity;
-    weight = saturate(signal * gain);
-    signal = offset - fabsf(snoise_1d(p));
-    signal *= signal;
-    signal *= weight;
-    value += signal * pwr;
-    pwr *= pwHL;
-  }
-
-  return value;
-}
-
-/* 2D Musgrave fBm
- *
- * H: fractal increment parameter
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- *
- * from "Texturing and Modelling: A procedural approach"
- */
-
-ccl_device_noinline_cpu float noise_musgrave_fBm_2d(float2 co,
-                                                    float H,
-                                                    float lacunarity,
-                                                    float octaves)
-{
-  float2 p = co;
-  float value = 0.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value += snoise_2d(p) * pwr;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * snoise_2d(p) * pwr;
-  }
-
-  return value;
-}
-
-/* 2D Musgrave Multifractal
- *
- * H: highest fractal dimension
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- */
-
-ccl_device_noinline_cpu float noise_musgrave_multi_fractal_2d(float2 co,
-                                                              float H,
-                                                              float lacunarity,
-                                                              float octaves)
-{
-  float2 p = co;
-  float value = 1.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value *= (pwr * snoise_2d(p) + 1.0f);
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value *= (rmd * pwr * snoise_2d(p) + 1.0f); /* correct? */
-  }
-
-  return value;
-}
-
-/* 2D Musgrave Heterogeneous Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_2d(
-    float2 co, float H, float lacunarity, float octaves, float offset)
-{
-  float2 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  /* first unscaled octave of function; later octaves are scaled */
-  float value = offset + snoise_2d(p);
-  p *= lacunarity;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    float increment = (snoise_2d(p) + offset) * pwr * value;
-    value += increment;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float increment = (snoise_2d(p) + offset) * pwr * value;
-    value += rmd * increment;
-  }
-
-  return value;
-}
-
-/* 2D Hybrid Additive/Multiplicative Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d(
-    float2 co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float2 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float value = snoise_2d(p) + offset;
-  float weight = gain * value;
-  p *= lacunarity;
-
-  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
-    if (weight > 1.0f) {
-      weight = 1.0f;
-    }
-
-    float signal = (snoise_2d(p) + offset) * pwr;
-    pwr *= pwHL;
-    value += weight * signal;
-    weight *= gain * signal;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * ((snoise_2d(p) + offset) * pwr);
-  }
-
-  return value;
-}
-
-/* 2D Ridged Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_2d(
-    float2 co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float2 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float signal = offset - fabsf(snoise_2d(p));
-  signal *= signal;
-  float value = signal;
-  float weight = 1.0f;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    p *= lacunarity;
-    weight = saturate(signal * gain);
-    signal = offset - fabsf(snoise_2d(p));
-    signal *= signal;
-    signal *= weight;
-    value += signal * pwr;
-    pwr *= pwHL;
-  }
-
-  return value;
-}
-
-/* 3D Musgrave fBm
- *
- * H: fractal increment parameter
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- *
- * from "Texturing and Modelling: A procedural approach"
- */
-
-ccl_device_noinline_cpu float noise_musgrave_fBm_3d(float3 co,
-                                                    float H,
-                                                    float lacunarity,
-                                                    float octaves)
-{
-  float3 p = co;
-  float value = 0.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value += snoise_3d(p) * pwr;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * snoise_3d(p) * pwr;
-  }
-
-  return value;
-}
-
-/* 3D Musgrave Multifractal
- *
- * H: highest fractal dimension
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- */
-
-ccl_device_noinline_cpu float noise_musgrave_multi_fractal_3d(float3 co,
-                                                              float H,
-                                                              float lacunarity,
-                                                              float octaves)
-{
-  float3 p = co;
-  float value = 1.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value *= (pwr * snoise_3d(p) + 1.0f);
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value *= (rmd * pwr * snoise_3d(p) + 1.0f); /* correct? */
-  }
-
-  return value;
-}
-
-/* 3D Musgrave Heterogeneous Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_3d(
-    float3 co, float H, float lacunarity, float octaves, float offset)
-{
-  float3 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  /* first unscaled octave of function; later octaves are scaled */
-  float value = offset + snoise_3d(p);
-  p *= lacunarity;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    float increment = (snoise_3d(p) + offset) * pwr * value;
-    value += increment;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float increment = (snoise_3d(p) + offset) * pwr * value;
-    value += rmd * increment;
-  }
-
-  return value;
-}
-
-/* 3D Hybrid Additive/Multiplicative Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d(
-    float3 co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float3 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float value = snoise_3d(p) + offset;
-  float weight = gain * value;
-  p *= lacunarity;
-
-  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
-    if (weight > 1.0f) {
-      weight = 1.0f;
-    }
-
-    float signal = (snoise_3d(p) + offset) * pwr;
-    pwr *= pwHL;
-    value += weight * signal;
-    weight *= gain * signal;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * ((snoise_3d(p) + offset) * pwr);
-  }
-
-  return value;
-}
-
-/* 3D Ridged Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_3d(
-    float3 co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float3 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float signal = offset - fabsf(snoise_3d(p));
-  signal *= signal;
-  float value = signal;
-  float weight = 1.0f;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    p *= lacunarity;
-    weight = saturate(signal * gain);
-    signal = offset - fabsf(snoise_3d(p));
-    signal *= signal;
-    signal *= weight;
-    value += signal * pwr;
-    pwr *= pwHL;
-  }
-
-  return value;
-}
-
-/* 4D Musgrave fBm
- *
- * H: fractal increment parameter
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- *
- * from "Texturing and Modelling: A procedural approach"
- */
-
-ccl_device_noinline_cpu float noise_musgrave_fBm_4d(float4 co,
-                                                    float H,
-                                                    float lacunarity,
-                                                    float octaves)
-{
-  float4 p = co;
-  float value = 0.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value += snoise_4d(p) * pwr;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * snoise_4d(p) * pwr;
-  }
-
-  return value;
-}
-
-/* 4D Musgrave Multifractal
- *
- * H: highest fractal dimension
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- */
-
-ccl_device_noinline_cpu float noise_musgrave_multi_fractal_4d(float4 co,
-                                                              float H,
-                                                              float lacunarity,
-                                                              float octaves)
-{
-  float4 p = co;
-  float value = 1.0f;
-  float pwr = 1.0f;
-  float pwHL = powf(lacunarity, -H);
-
-  for (int i = 0; i < float_to_int(octaves); i++) {
-    value *= (pwr * snoise_4d(p) + 1.0f);
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value *= (rmd * pwr * snoise_4d(p) + 1.0f); /* correct? */
-  }
-
-  return value;
-}
-
-/* 4D Musgrave Heterogeneous Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hetero_terrain_4d(
-    float4 co, float H, float lacunarity, float octaves, float offset)
-{
-  float4 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  /* first unscaled octave of function; later octaves are scaled */
-  float value = offset + snoise_4d(p);
-  p *= lacunarity;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    float increment = (snoise_4d(p) + offset) * pwr * value;
-    value += increment;
-    pwr *= pwHL;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    float increment = (snoise_4d(p) + offset) * pwr * value;
-    value += rmd * increment;
-  }
-
-  return value;
-}
-
-/* 4D Hybrid Additive/Multiplicative Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d(
-    float4 co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float4 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float value = snoise_4d(p) + offset;
-  float weight = gain * value;
-  p *= lacunarity;
-
-  for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
-    if (weight > 1.0f) {
-      weight = 1.0f;
-    }
-
-    float signal = (snoise_4d(p) + offset) * pwr;
-    pwr *= pwHL;
-    value += weight * signal;
-    weight *= gain * signal;
-    p *= lacunarity;
-  }
-
-  float rmd = octaves - floorf(octaves);
-  if (rmd != 0.0f) {
-    value += rmd * ((snoise_4d(p) + offset) * pwr);
-  }
-
-  return value;
-}
-
-/* 4D Ridged Multifractal Terrain
- *
- * H: fractal dimension of the roughest area
- * lacunarity: gap between successive frequencies
- * octaves: number of frequencies in the fBm
- * offset: raises the terrain from `sea level'
- */
-
-ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal_4d(
-    float4 co, float H, float lacunarity, float octaves, float offset, float gain)
-{
-  float4 p = co;
-  float pwHL = powf(lacunarity, -H);
-  float pwr = pwHL;
-
-  float signal = offset - fabsf(snoise_4d(p));
-  signal *= signal;
-  float value = signal;
-  float weight = 1.0f;
-
-  for (int i = 1; i < float_to_int(octaves); i++) {
-    p *= lacunarity;
-    weight = saturate(signal * gain);
-    signal = offset - fabsf(snoise_4d(p));
-    signal *= signal;
-    signal *= weight;
-    value += signal * pwr;
-    pwr *= pwHL;
-  }
-
-  return value;
-}
-
-ccl_device_noinline int svm_node_tex_musgrave(KernelGlobals kg,
-                                              ccl_private ShaderData *sd,
-                                              ccl_private float *stack,
-                                              uint offsets1,
-                                              uint offsets2,
-                                              uint offsets3,
-                                              int offset)
-{
-  uint type, dimensions, co_stack_offset, w_stack_offset;
-  uint scale_stack_offset, detail_stack_offset, dimension_stack_offset, lacunarity_stack_offset;
-  uint offset_stack_offset, gain_stack_offset, fac_stack_offset;
-
-  svm_unpack_node_uchar4(offsets1, &type, &dimensions, &co_stack_offset, &w_stack_offset);
-  svm_unpack_node_uchar4(offsets2,
-                         &scale_stack_offset,
-                         &detail_stack_offset,
-                         &dimension_stack_offset,
-                         &lacunarity_stack_offset);
-  svm_unpack_node_uchar3(offsets3, &offset_stack_offset, &gain_stack_offset, &fac_stack_offset);
-
-  uint4 defaults1 = read_node(kg, &offset);
-  uint4 defaults2 = read_node(kg, &offset);
-
-  float3 co = stack_load_float3(stack, co_stack_offset);
-  float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
-  float scale = stack_load_float_default(stack, scale_stack_offset, defaults1.y);
-  float detail = stack_load_float_default(stack, detail_stack_offset, defaults1.z);
-  float dimension = stack_load_float_default(stack, dimension_stack_offset, defaults1.w);
-  float lacunarity = stack_load_float_default(stack, lacunarity_stack_offset, defaults2.x);
-  float foffset = stack_load_float_default(stack, offset_stack_offset, defaults2.y);
-  float gain = stack_load_float_default(stack, gain_stack_offset, defaults2.z);
-
-  dimension = fmaxf(dimension, 1e-5f);
-  detail = clamp(detail, 0.0f, 16.0f);
-  lacunarity = fmaxf(lacunarity, 1e-5f);
-
-  float fac;
-
-  switch (dimensions) {
-    case 1: {
-      float p = w * scale;
-      switch ((NodeMusgraveType)type) {
-        case NODE_MUSGRAVE_MULTIFRACTAL:
-          fac = noise_musgrave_multi_fractal_1d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_FBM:
-          fac = noise_musgrave_fBm_1d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
-          fac = noise_musgrave_hybrid_multi_fractal_1d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
-          fac = noise_musgrave_ridged_multi_fractal_1d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_HETERO_TERRAIN:
-          fac = noise_musgrave_hetero_terrain_1d(p, dimension, lacunarity, detail, foffset);
-          break;
-        default:
-          fac = 0.0f;
-      }
-      break;
-    }
-    case 2: {
-      float2 p = make_float2(co.x, co.y) * scale;
-      switch ((NodeMusgraveType)type) {
-        case NODE_MUSGRAVE_MULTIFRACTAL:
-          fac = noise_musgrave_multi_fractal_2d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_FBM:
-          fac = noise_musgrave_fBm_2d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
-          fac = noise_musgrave_hybrid_multi_fractal_2d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
-          fac = noise_musgrave_ridged_multi_fractal_2d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_HETERO_TERRAIN:
-          fac = noise_musgrave_hetero_terrain_2d(p, dimension, lacunarity, detail, foffset);
-          break;
-        default:
-          fac = 0.0f;
-      }
-      break;
-    }
-    case 3: {
-      float3 p = co * scale;
-      switch ((NodeMusgraveType)type) {
-        case NODE_MUSGRAVE_MULTIFRACTAL:
-          fac = noise_musgrave_multi_fractal_3d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_FBM:
-          fac = noise_musgrave_fBm_3d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
-          fac = noise_musgrave_hybrid_multi_fractal_3d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
-          fac = noise_musgrave_ridged_multi_fractal_3d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_HETERO_TERRAIN:
-          fac = noise_musgrave_hetero_terrain_3d(p, dimension, lacunarity, detail, foffset);
-          break;
-        default:
-          fac = 0.0f;
-      }
-      break;
-    }
-    case 4: {
-      float4 p = make_float4(co.x, co.y, co.z, w) * scale;
-      switch ((NodeMusgraveType)type) {
-        case NODE_MUSGRAVE_MULTIFRACTAL:
-          fac = noise_musgrave_multi_fractal_4d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_FBM:
-          fac = noise_musgrave_fBm_4d(p, dimension, lacunarity, detail);
-          break;
-        case NODE_MUSGRAVE_HYBRID_MULTIFRACTAL:
-          fac = noise_musgrave_hybrid_multi_fractal_4d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_RIDGED_MULTIFRACTAL:
-          fac = noise_musgrave_ridged_multi_fractal_4d(
-              p, dimension, lacunarity, detail, foffset, gain);
-          break;
-        case NODE_MUSGRAVE_HETERO_TERRAIN:
-          fac = noise_musgrave_hetero_terrain_4d(p, dimension, lacunarity, detail, foffset);
-          break;
-        default:
-          fac = 0.0f;
-      }
-      break;
-    }
-    default:
-      fac = 0.0f;
-  }
-
-  stack_store_float(stack, fac_stack_offset, fac);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
deleted file mode 100644
index ecb4df6afdf..00000000000
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ /dev/null
@@ -1,742 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* **** Perlin Noise **** */
-
-ccl_device float fade(float t)
-{
-  return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
-}
-
-ccl_device_inline float negate_if(float val, int condition)
-{
-  return (condition) ? -val : val;
-}
-
-ccl_device float grad1(int hash, float x)
-{
-  int h = hash & 15;
-  float g = 1 + (h & 7);
-  return negate_if(g, h & 8) * x;
-}
-
-ccl_device_noinline_cpu float perlin_1d(float x)
-{
-  int X;
-  float fx = floorfrac(x, &X);
-  float u = fade(fx);
-
-  return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
-}
-
-/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
- * SSE is supported, that is, if __KERNEL_SSE2__ is defined. If it is not
- * supported, we do a standard implementation, but if it is supported, we
- * do an implementation using SSE intrinsics.
- */
-#if !defined(__KERNEL_SSE2__)
-
-/* ** Standard Implementation ** */
-
-/* Bilinear Interpolation:
- *
- * v2          v3
- *  @ + + + + @       y
- *  +         +       ^
- *  +         +       |
- *  +         +       |
- *  @ + + + + @       @------> x
- * v0          v1
- *
- */
-ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
-{
-  float x1 = 1.0f - x;
-  return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
-}
-
-/* Trilinear Interpolation:
- *
- *   v6               v7
- *     @ + + + + + + @
- *     +\            +\
- *     + \           + \
- *     +  \          +  \
- *     +   \ v4      +   \ v5
- *     +    @ + + + +++ + @          z
- *     +    +        +    +      y   ^
- *  v2 @ + +++ + + + @ v3 +       \  |
- *      \   +         \   +        \ |
- *       \  +          \  +         \|
- *        \ +           \ +          +---------> x
- *         \+            \+
- *          @ + + + + + + @
- *        v0               v1
- */
-ccl_device float tri_mix(float v0,
-                         float v1,
-                         float v2,
-                         float v3,
-                         float v4,
-                         float v5,
-                         float v6,
-                         float v7,
-                         float x,
-                         float y,
-                         float z)
-{
-  float x1 = 1.0f - x;
-  float y1 = 1.0f - y;
-  float z1 = 1.0f - z;
-  return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
-         z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
-}
-
-ccl_device float quad_mix(float v0,
-                          float v1,
-                          float v2,
-                          float v3,
-                          float v4,
-                          float v5,
-                          float v6,
-                          float v7,
-                          float v8,
-                          float v9,
-                          float v10,
-                          float v11,
-                          float v12,
-                          float v13,
-                          float v14,
-                          float v15,
-                          float x,
-                          float y,
-                          float z,
-                          float w)
-{
-  return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
-             tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
-             w);
-}
-
-ccl_device float grad2(int hash, float x, float y)
-{
-  int h = hash & 7;
-  float u = h < 4 ? x : y;
-  float v = 2.0f * (h < 4 ? y : x);
-  return negate_if(u, h & 1) + negate_if(v, h & 2);
-}
-
-ccl_device float grad3(int hash, float x, float y, float z)
-{
-  int h = hash & 15;
-  float u = h < 8 ? x : y;
-  float vt = ((h == 12) || (h == 14)) ? x : z;
-  float v = h < 4 ? y : vt;
-  return negate_if(u, h & 1) + negate_if(v, h & 2);
-}
-
-ccl_device float grad4(int hash, float x, float y, float z, float w)
-{
-  int h = hash & 31;
-  float u = h < 24 ? x : y;
-  float v = h < 16 ? y : z;
-  float s = h < 8 ? z : w;
-  return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
-}
-
-ccl_device_noinline_cpu float perlin_2d(float x, float y)
-{
-  int X;
-  int Y;
-
-  float fx = floorfrac(x, &X);
-  float fy = floorfrac(y, &Y);
-
-  float u = fade(fx);
-  float v = fade(fy);
-
-  float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
-                   grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
-                   grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
-                   grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
-                   u,
-                   v);
-
-  return r;
-}
-
-ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
-{
-  int X;
-  int Y;
-  int Z;
-
-  float fx = floorfrac(x, &X);
-  float fy = floorfrac(y, &Y);
-  float fz = floorfrac(z, &Z);
-
-  float u = fade(fx);
-  float v = fade(fy);
-  float w = fade(fz);
-
-  float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
-                    grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
-                    grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
-                    grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
-                    grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
-                    grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
-                    grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
-                    grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
-                    u,
-                    v,
-                    w);
-  return r;
-}
-
-ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
-{
-  int X;
-  int Y;
-  int Z;
-  int W;
-
-  float fx = floorfrac(x, &X);
-  float fy = floorfrac(y, &Y);
-  float fz = floorfrac(z, &Z);
-  float fw = floorfrac(w, &W);
-
-  float u = fade(fx);
-  float v = fade(fy);
-  float t = fade(fz);
-  float s = fade(fw);
-
-  float r = quad_mix(
-      grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
-      grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
-      grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
-      grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
-      grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
-      grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
-      grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
-      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
-      grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
-      grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
-      grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
-      grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
-      grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
-      u,
-      v,
-      t,
-      s);
-
-  return r;
-}
-
-#else /* SSE is supported. */
-
-/* ** SSE Implementation ** */
-
-/* SSE Bilinear Interpolation:
- *
- * The function takes two ssef inputs:
- * - p : Contains the values at the points (v0, v1, v2, v3).
- * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
- *
- * The interpolation is done in two steps:
- * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
- *    (v2, v3) is generated by moving v2 and v3 to the first and second
- *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
- *    fourth values are unused.
- * 2. Interpolate g0 and g1 along the y axis to get the final value.
- *    g1 is generated by populating an ssef with the second value of g.
- *    Only the first value is important in the final ssef.
- *
- * v1          v3          g1
- *  @ + + + + @            @                    y
- *  +         +     (1)    +    (2)             ^
- *  +         +     --->   +    --->   final    |
- *  +         +            +                    |
- *  @ + + + + @            @                    @------> x
- * v0          v2          g0
- *
- */
-ccl_device_inline ssef bi_mix(ssef p, ssef f)
-{
-  ssef g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
-  return mix(g, shuffle<1>(g), shuffle<1>(f));
-}
-
-ccl_device_inline ssef fade(const ssef &t)
-{
-  ssef a = madd(t, 6.0f, -15.0f);
-  ssef b = madd(t, a, 10.0f);
-  return (t * t) * (t * b);
-}
-
-/* Negate val if the nth bit of h is 1. */
-#  define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
-
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y)
-{
-  ssei h = hash & 7;
-  ssef u = select(h < 4, x, y);
-  ssef v = 2.0f * select(h < 4, y, x);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
-}
-
-/* We use SSE to compute and interpolate 4 gradients at once:
- *
- *    Point  Offset from v0
- *     v0       (0, 0)
- *     v1       (0, 1)
- *     v2       (1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
- *     v3       (1, 1)         ^
- *               |  |__________|       (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
- *               |                          ^
- *               |__________________________|
- *
- */
-ccl_device_noinline_cpu float perlin_2d(float x, float y)
-{
-  ssei XY;
-  ssef fxy = floorfrac(ssef(x, y, 0.0f, 0.0f), &XY);
-  ssef uv = fade(fxy);
-
-  ssei XY1 = XY + 1;
-  ssei X = shuffle<0, 0, 0, 0>(XY, XY1);
-  ssei Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
-
-  ssei h = hash_ssei2(X, Y);
-
-  ssef fxy1 = fxy - 1.0f;
-  ssef fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
-  ssef fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
-
-  ssef g = grad(h, fx, fy);
-
-  return extract<0>(bi_mix(g, uv));
-}
-
-/* SSE Trilinear Interpolation:
- *
- * The function takes three ssef inputs:
- * - p : Contains the values at the points (v0, v1, v2, v3).
- * - q : Contains the values at the points (v4, v5, v6, v7).
- * - f : Contains the values (x, y, z, _). The fourth value is unused.
- *
- * The interpolation is done in three steps:
- * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
- * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
- *    (s2, s3) is generated by moving v2 and v3 to the first and second
- *    places of the ssef using the shuffle mask <2, 3, 2, 3>. The third and
- *    fourth values are unused.
- * 3. Interpolate g0 and g1 along the z axis to get the final value.
- *    g1 is generated by populating an ssef with the second value of g.
- *    Only the first value is important in the final ssef.
- *
- *   v3               v7
- *     @ + + + + + + @               s3 @
- *     +\            +\                 +\
- *     + \           + \                + \
- *     +  \          +  \               +  \             g1
- *     +   \ v1      +   \ v5           +   \ s1         @
- *     +    @ + + + +++ + @             +    @           +                     z
- *     +    +        +    +    (1)      +    +    (2)    +   (3)           y   ^
- *  v2 @ + +++ + + + @ v6 +    --->  s2 @    +    --->   +   --->  final    \  |
- *      \   +         \   +              \   +           +                   \ |
- *       \  +          \  +               \  +           +                    \|
- *        \ +           \ +                \ +           @                     +---------> x
- *         \+            \+                 \+           g0
- *          @ + + + + + + @                  @
- *        v0               v4                 s0
- */
-ccl_device_inline ssef tri_mix(ssef p, ssef q, ssef f)
-{
-  ssef s = mix(p, q, shuffle<0>(f));
-  ssef g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
-  return mix(g, shuffle<1>(g), shuffle<2>(f));
-}
-
-/* 3D and 4D noise can be accelerated using AVX, so we first check if AVX
- * is supported, that is, if __KERNEL_AVX__ is defined. If it is not
- * supported, we do an SSE implementation, but if it is supported,
- * we do an implementation using AVX intrinsics.
- */
-#  if !defined(__KERNEL_AVX__)
-
-ccl_device_inline ssef grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z)
-{
-  ssei h = hash & 15;
-  ssef u = select(h < 8, x, y);
-  ssef vt = select((h == 12) | (h == 14), x, z);
-  ssef v = select(h < 4, y, vt);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
-}
-
-ccl_device_inline ssef
-grad(const ssei &hash, const ssef &x, const ssef &y, const ssef &z, const ssef &w)
-{
-  ssei h = hash & 31;
-  ssef u = select(h < 24, x, y);
-  ssef v = select(h < 16, y, z);
-  ssef s = select(h < 8, z, w);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
-}
-
-/* SSE Quadrilinear Interpolation:
- *
- * Quadrilinear interpolation is as simple as a linear interpolation
- * between two trilinear interpolations.
- *
- */
-ccl_device_inline ssef quad_mix(ssef p, ssef q, ssef r, ssef s, ssef f)
-{
-  return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
-}
-
-/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
- * gradients in 3D, we need to compute two sets of gradients at the points:
- *
- *    Point  Offset from v0
- *     v0      (0, 0, 0)
- *     v1      (0, 0, 1)
- *     v2      (0, 1, 0)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v3      (0, 1, 1)         ^
- *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                 |                          ^
- *                 |__________________________|
- *
- *    Point  Offset from v0
- *     v4      (1, 0, 0)
- *     v5      (1, 0, 1)
- *     v6      (1, 1, 0)
- *     v7      (1, 1, 1)
- *
- */
-ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
-{
-  ssei XYZ;
-  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
-  ssef uvw = fade(fxyz);
-
-  ssei XYZ1 = XYZ + 1;
-  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
-
-  ssei h1 = hash_ssei3(shuffle<0>(XYZ), Y, Z);
-  ssei h2 = hash_ssei3(shuffle<0>(XYZ1), Y, Z);
-
-  ssef fxyz1 = fxyz - 1.0f;
-  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
-
-  ssef g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
-  ssef g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
-
-  return extract<0>(tri_mix(g1, g2, uvw));
-}
-
-/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
- * gradients in 4D, we need to compute four sets of gradients at the points:
- *
- *    Point  Offset from v0
- *     v0     (0, 0, 0, 0)
- *     v1     (0, 0, 1, 0)
- *     v2     (0, 1, 0, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v3     (0, 1, 1, 0)    ^
- *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                |                       ^
- *                |_______________________|
- *
- *    Point  Offset from v0
- *     v4     (1, 0, 0, 0)
- *     v5     (1, 0, 1, 0)
- *     v6     (1, 1, 0, 0)
- *     v7     (1, 1, 1, 0)
- *
- *    Point  Offset from v0
- *     v8     (0, 0, 0, 1)
- *     v9     (0, 0, 1, 1)
- *     v10    (0, 1, 0, 1)
- *     v11    (0, 1, 1, 1)
- *
- *    Point  Offset from v0
- *     v12    (1, 0, 0, 1)
- *     v13    (1, 0, 1, 1)
- *     v14    (1, 1, 0, 1)
- *     v15    (1, 1, 1, 1)
- *
- */
-ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
-{
-  ssei XYZW;
-  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
-  ssef uvws = fade(fxyzw);
-
-  ssei XYZW1 = XYZW + 1;
-  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
-
-  ssei h1 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
-  ssei h2 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
-
-  ssei h3 = hash_ssei4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
-  ssei h4 = hash_ssei4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
-
-  ssef fxyzw1 = fxyzw - 1.0f;
-  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
-
-  ssef g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
-  ssef g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
-
-  ssef g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
-  ssef g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
-
-  return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
-}
-
-#  else /* AVX is supported. */
-
-/* AVX Implementation */
-
-ccl_device_inline avxf grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z)
-{
-  avxi h = hash & 15;
-  avxf u = select(h < 8, x, y);
-  avxf vt = select((h == 12) | (h == 14), x, z);
-  avxf v = select(h < 4, y, vt);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
-}
-
-ccl_device_inline avxf
-grad(const avxi &hash, const avxf &x, const avxf &y, const avxf &z, const avxf &w)
-{
-  avxi h = hash & 31;
-  avxf u = select(h < 24, x, y);
-  avxf v = select(h < 16, y, z);
-  avxf s = select(h < 8, z, w);
-  return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
-}
-
-/* SSE Quadrilinear Interpolation:
- *
- * The interpolation is done in two steps:
- * 1. Interpolate p and q along the w axis to get s.
- * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
- *    value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
- *    low and high ssef from s.
- *
- */
-ccl_device_inline ssef quad_mix(avxf p, avxf q, ssef f)
-{
-  ssef fv = shuffle<3>(f);
-  avxf s = mix(p, q, avxf(fv, fv));
-  return tri_mix(low(s), high(s), f);
-}
-
-/* We use AVX to compute and interpolate 8 gradients at once.
- *
- *    Point  Offset from v0
- *     v0      (0, 0, 0)
- *     v1      (0, 0, 1)    The full AVX type is computed by inserting the following
- *     v2      (0, 1, 0)    SSE types into both the low and high parts of the AVX.
- *     v3      (0, 1, 1)
- *     v4      (1, 0, 0)
- *     v5      (1, 0, 1)    (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v6      (1, 1, 0)         ^
- *     v7      (1, 1, 1)         |
- *                 |  |__________|       (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                 |                          ^
- *                 |__________________________|
- *
- */
-ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
-{
-  ssei XYZ;
-  ssef fxyz = floorfrac(ssef(x, y, z, 0.0f), &XYZ);
-  ssef uvw = fade(fxyz);
-
-  ssei XYZ1 = XYZ + 1;
-  ssei X = shuffle<0>(XYZ);
-  ssei X1 = shuffle<0>(XYZ1);
-  ssei Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
-
-  avxi h = hash_avxi3(avxi(X, X1), avxi(Y, Y), avxi(Z, Z));
-
-  ssef fxyz1 = fxyz - 1.0f;
-  ssef fx = shuffle<0>(fxyz);
-  ssef fx1 = shuffle<0>(fxyz1);
-  ssef fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
-
-  avxf g = grad(h, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz));
-
-  return extract<0>(tri_mix(low(g), high(g), uvw));
-}
-
-/* We use AVX to compute and interpolate 8 gradients at once. Since we have 16
- * gradients in 4D, we need to compute two sets of gradients at the points:
- *
- *    Point  Offset from v0
- *     v0     (0, 0, 0, 0)
- *     v1     (0, 0, 1, 0)  The full AVX type is computed by inserting the following
- *     v2     (0, 1, 0, 0)  SSE types into both the low and high parts of the AVX.
- *     v3     (0, 1, 1, 0)
- *     v4     (1, 0, 0, 0)
- *     v5     (1, 0, 1, 0)  (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
- *     v6     (1, 1, 0, 0)    ^
- *     v7     (1, 1, 1, 0)    |
- *                |  |________|    (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
- *                |                       ^
- *                |_______________________|
- *
- *    Point  Offset from v0
- *     v8     (0, 0, 0, 1)
- *     v9     (0, 0, 1, 1)
- *     v10    (0, 1, 0, 1)
- *     v11    (0, 1, 1, 1)
- *     v12    (1, 0, 0, 1)
- *     v13    (1, 0, 1, 1)
- *     v14    (1, 1, 0, 1)
- *     v15    (1, 1, 1, 1)
- *
- */
-ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
-{
-  ssei XYZW;
-  ssef fxyzw = floorfrac(ssef(x, y, z, w), &XYZW);
-  ssef uvws = fade(fxyzw);
-
-  ssei XYZW1 = XYZW + 1;
-  ssei X = shuffle<0>(XYZW);
-  ssei X1 = shuffle<0>(XYZW1);
-  ssei Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
-  ssei Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
-  ssei W = shuffle<3>(XYZW);
-  ssei W1 = shuffle<3>(XYZW1);
-
-  avxi h1 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W, W));
-  avxi h2 = hash_avxi4(avxi(X, X1), avxi(Y, Y), avxi(Z, Z), avxi(W1, W1));
-
-  ssef fxyzw1 = fxyzw - 1.0f;
-  ssef fx = shuffle<0>(fxyzw);
-  ssef fx1 = shuffle<0>(fxyzw1);
-  ssef fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
-  ssef fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
-  ssef fw = shuffle<3>(fxyzw);
-  ssef fw1 = shuffle<3>(fxyzw1);
-
-  avxf g1 = grad(h1, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw, fw));
-  avxf g2 = grad(h2, avxf(fx, fx1), avxf(fy, fy), avxf(fz, fz), avxf(fw1, fw1));
-
-  return extract<0>(quad_mix(g1, g2, uvws));
-}
-#  endif
-
-#  undef negate_if_nth_bit
-
-#endif
-
-/* Remap the output of noise to a predictable range [-1, 1].
- * The scale values were computed experimentally by the OSL developers.
- */
-
-ccl_device_inline float noise_scale1(float result)
-{
-  return 0.2500f * result;
-}
-
-ccl_device_inline float noise_scale2(float result)
-{
-  return 0.6616f * result;
-}
-
-ccl_device_inline float noise_scale3(float result)
-{
-  return 0.9820f * result;
-}
-
-ccl_device_inline float noise_scale4(float result)
-{
-  return 0.8344f * result;
-}
-
-/* Safe Signed And Unsigned Noise */
-
-ccl_device_inline float snoise_1d(float p)
-{
-  return noise_scale1(ensure_finite(perlin_1d(p)));
-}
-
-ccl_device_inline float noise_1d(float p)
-{
-  return 0.5f * snoise_1d(p) + 0.5f;
-}
-
-ccl_device_inline float snoise_2d(float2 p)
-{
-  return noise_scale2(ensure_finite(perlin_2d(p.x, p.y)));
-}
-
-ccl_device_inline float noise_2d(float2 p)
-{
-  return 0.5f * snoise_2d(p) + 0.5f;
-}
-
-ccl_device_inline float snoise_3d(float3 p)
-{
-  return noise_scale3(ensure_finite(perlin_3d(p.x, p.y, p.z)));
-}
-
-ccl_device_inline float noise_3d(float3 p)
-{
-  return 0.5f * snoise_3d(p) + 0.5f;
-}
-
-ccl_device_inline float snoise_4d(float4 p)
-{
-  return noise_scale4(ensure_finite(perlin_4d(p.x, p.y, p.z, p.w)));
-}
-
-ccl_device_inline float noise_4d(float4 p)
-{
-  return 0.5f * snoise_4d(p) + 0.5f;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
deleted file mode 100644
index 3fe33f72b59..00000000000
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* The following offset functions generate random offsets to be added to texture
- * coordinates to act as a seed since the noise functions don't have seed values.
- * A seed value is needed for generating distortion textures and color outputs.
- * The offset's components are in the range [100, 200], not too high to cause
- * bad precision and not too small to be noticeable. We use float seed because
- * OSL only support float hashes.
- */
-
-ccl_device_inline float random_float_offset(float seed)
-{
-  return 100.0f + hash_float_to_float(seed) * 100.0f;
-}
-
-ccl_device_inline float2 random_float2_offset(float seed)
-{
-  return make_float2(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
-                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f);
-}
-
-ccl_device_inline float3 random_float3_offset(float seed)
-{
-  return make_float3(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
-                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
-                     100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f);
-}
-
-ccl_device_inline float4 random_float4_offset(float seed)
-{
-  return make_float4(100.0f + hash_float2_to_float(make_float2(seed, 0.0f)) * 100.0f,
-                     100.0f + hash_float2_to_float(make_float2(seed, 1.0f)) * 100.0f,
-                     100.0f + hash_float2_to_float(make_float2(seed, 2.0f)) * 100.0f,
-                     100.0f + hash_float2_to_float(make_float2(seed, 3.0f)) * 100.0f);
-}
-
-ccl_device void noise_texture_1d(float co,
-                                 float detail,
-                                 float roughness,
-                                 float distortion,
-                                 bool color_is_needed,
-                                 ccl_private float *value,
-                                 ccl_private float3 *color)
-{
-  float p = co;
-  if (distortion != 0.0f) {
-    p += snoise_1d(p + random_float_offset(0.0f)) * distortion;
-  }
-
-  *value = fractal_noise_1d(p, detail, roughness);
-  if (color_is_needed) {
-    *color = make_float3(*value,
-                         fractal_noise_1d(p + random_float_offset(1.0f), detail, roughness),
-                         fractal_noise_1d(p + random_float_offset(2.0f), detail, roughness));
-  }
-}
-
-ccl_device void noise_texture_2d(float2 co,
-                                 float detail,
-                                 float roughness,
-                                 float distortion,
-                                 bool color_is_needed,
-                                 ccl_private float *value,
-                                 ccl_private float3 *color)
-{
-  float2 p = co;
-  if (distortion != 0.0f) {
-    p += make_float2(snoise_2d(p + random_float2_offset(0.0f)) * distortion,
-                     snoise_2d(p + random_float2_offset(1.0f)) * distortion);
-  }
-
-  *value = fractal_noise_2d(p, detail, roughness);
-  if (color_is_needed) {
-    *color = make_float3(*value,
-                         fractal_noise_2d(p + random_float2_offset(2.0f), detail, roughness),
-                         fractal_noise_2d(p + random_float2_offset(3.0f), detail, roughness));
-  }
-}
-
-ccl_device void noise_texture_3d(float3 co,
-                                 float detail,
-                                 float roughness,
-                                 float distortion,
-                                 bool color_is_needed,
-                                 ccl_private float *value,
-                                 ccl_private float3 *color)
-{
-  float3 p = co;
-  if (distortion != 0.0f) {
-    p += make_float3(snoise_3d(p + random_float3_offset(0.0f)) * distortion,
-                     snoise_3d(p + random_float3_offset(1.0f)) * distortion,
-                     snoise_3d(p + random_float3_offset(2.0f)) * distortion);
-  }
-
-  *value = fractal_noise_3d(p, detail, roughness);
-  if (color_is_needed) {
-    *color = make_float3(*value,
-                         fractal_noise_3d(p + random_float3_offset(3.0f), detail, roughness),
-                         fractal_noise_3d(p + random_float3_offset(4.0f), detail, roughness));
-  }
-}
-
-ccl_device void noise_texture_4d(float4 co,
-                                 float detail,
-                                 float roughness,
-                                 float distortion,
-                                 bool color_is_needed,
-                                 ccl_private float *value,
-                                 ccl_private float3 *color)
-{
-  float4 p = co;
-  if (distortion != 0.0f) {
-    p += make_float4(snoise_4d(p + random_float4_offset(0.0f)) * distortion,
-                     snoise_4d(p + random_float4_offset(1.0f)) * distortion,
-                     snoise_4d(p + random_float4_offset(2.0f)) * distortion,
-                     snoise_4d(p + random_float4_offset(3.0f)) * distortion);
-  }
-
-  *value = fractal_noise_4d(p, detail, roughness);
-  if (color_is_needed) {
-    *color = make_float3(*value,
-                         fractal_noise_4d(p + random_float4_offset(4.0f), detail, roughness),
-                         fractal_noise_4d(p + random_float4_offset(5.0f), detail, roughness));
-  }
-}
-
-ccl_device_noinline int svm_node_tex_noise(KernelGlobals kg,
-                                           ccl_private ShaderData *sd,
-                                           ccl_private float *stack,
-                                           uint dimensions,
-                                           uint offsets1,
-                                           uint offsets2,
-                                           int offset)
-{
-  uint vector_stack_offset, w_stack_offset, scale_stack_offset;
-  uint detail_stack_offset, roughness_stack_offset, distortion_stack_offset;
-  uint value_stack_offset, color_stack_offset;
-
-  svm_unpack_node_uchar4(
-      offsets1, &vector_stack_offset, &w_stack_offset, &scale_stack_offset, &detail_stack_offset);
-  svm_unpack_node_uchar4(offsets2,
-                         &roughness_stack_offset,
-                         &distortion_stack_offset,
-                         &value_stack_offset,
-                         &color_stack_offset);
-
-  uint4 defaults1 = read_node(kg, &offset);
-  uint4 defaults2 = read_node(kg, &offset);
-
-  float3 vector = stack_load_float3(stack, vector_stack_offset);
-  float w = stack_load_float_default(stack, w_stack_offset, defaults1.x);
-  float scale = stack_load_float_default(stack, scale_stack_offset, defaults1.y);
-  float detail = stack_load_float_default(stack, detail_stack_offset, defaults1.z);
-  float roughness = stack_load_float_default(stack, roughness_stack_offset, defaults1.w);
-  float distortion = stack_load_float_default(stack, distortion_stack_offset, defaults2.x);
-
-  vector *= scale;
-  w *= scale;
-
-  float value;
-  float3 color;
-  switch (dimensions) {
-    case 1:
-      noise_texture_1d(
-          w, detail, roughness, distortion, stack_valid(color_stack_offset), &value, &color);
-      break;
-    case 2:
-      noise_texture_2d(make_float2(vector.x, vector.y),
-                       detail,
-                       roughness,
-                       distortion,
-                       stack_valid(color_stack_offset),
-                       &value,
-                       &color);
-      break;
-    case 3:
-      noise_texture_3d(
-          vector, detail, roughness, distortion, stack_valid(color_stack_offset), &value, &color);
-      break;
-    case 4:
-      noise_texture_4d(make_float4(vector.x, vector.y, vector.z, w),
-                       detail,
-                       roughness,
-                       distortion,
-                       stack_valid(color_stack_offset),
-                       &value,
-                       &color);
-      break;
-    default:
-      kernel_assert(0);
-  }
-
-  if (stack_valid(value_stack_offset)) {
-    stack_store_float(stack, value_stack_offset, value);
-  }
-  if (stack_valid(color_stack_offset)) {
-    stack_store_float3(stack, color_stack_offset, color);
-  }
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h
deleted file mode 100644
index 9bf64ed8823..00000000000
--- a/intern/cycles/kernel/svm/svm_normal.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline int svm_node_normal(KernelGlobals kg,
-                                        ccl_private ShaderData *sd,
-                                        ccl_private float *stack,
-                                        uint in_normal_offset,
-                                        uint out_normal_offset,
-                                        uint out_dot_offset,
-                                        int offset)
-{
-  /* read extra data */
-  uint4 node1 = read_node(kg, &offset);
-  float3 normal = stack_load_float3(stack, in_normal_offset);
-
-  float3 direction;
-  direction.x = __int_as_float(node1.x);
-  direction.y = __int_as_float(node1.y);
-  direction.z = __int_as_float(node1.z);
-  direction = normalize(direction);
-
-  if (stack_valid(out_normal_offset))
-    stack_store_float3(stack, out_normal_offset, direction);
-
-  if (stack_valid(out_dot_offset))
-    stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal)));
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
deleted file mode 100644
index d2dddf4c6eb..00000000000
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SVM_RAMP_H__
-#define __SVM_RAMP_H__
-
-CCL_NAMESPACE_BEGIN
-
-/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
-
-ccl_device_inline float fetch_float(KernelGlobals kg, int offset)
-{
-  uint4 node = kernel_tex_fetch(__svm_nodes, offset);
-  return __uint_as_float(node.x);
-}
-
-ccl_device_inline float float_ramp_lookup(
-    KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
-{
-  if ((f < 0.0f || f > 1.0f) && extrapolate) {
-    float t0, dy;
-    if (f < 0.0f) {
-      t0 = fetch_float(kg, offset);
-      dy = t0 - fetch_float(kg, offset + 1);
-      f = -f;
-    }
-    else {
-      t0 = fetch_float(kg, offset + table_size - 1);
-      dy = t0 - fetch_float(kg, offset + table_size - 2);
-      f = f - 1.0f;
-    }
-    return t0 + dy * f * (table_size - 1);
-  }
-
-  f = saturate(f) * (table_size - 1);
-
-  /* clamp int as well in case of NaN */
-  int i = clamp(float_to_int(f), 0, table_size - 1);
-  float t = f - (float)i;
-
-  float a = fetch_float(kg, offset + i);
-
-  if (interpolate && t > 0.0f)
-    a = (1.0f - t) * a + t * fetch_float(kg, offset + i + 1);
-
-  return a;
-}
-
-ccl_device_inline float4 rgb_ramp_lookup(
-    KernelGlobals kg, int offset, float f, bool interpolate, bool extrapolate, int table_size)
-{
-  if ((f < 0.0f || f > 1.0f) && extrapolate) {
-    float4 t0, dy;
-    if (f < 0.0f) {
-      t0 = fetch_node_float(kg, offset);
-      dy = t0 - fetch_node_float(kg, offset + 1);
-      f = -f;
-    }
-    else {
-      t0 = fetch_node_float(kg, offset + table_size - 1);
-      dy = t0 - fetch_node_float(kg, offset + table_size - 2);
-      f = f - 1.0f;
-    }
-    return t0 + dy * f * (table_size - 1);
-  }
-
-  f = saturate(f) * (table_size - 1);
-
-  /* clamp int as well in case of NaN */
-  int i = clamp(float_to_int(f), 0, table_size - 1);
-  float t = f - (float)i;
-
-  float4 a = fetch_node_float(kg, offset + i);
-
-  if (interpolate && t > 0.0f)
-    a = (1.0f - t) * a + t * fetch_node_float(kg, offset + i + 1);
-
-  return a;
-}
-
-ccl_device_noinline int svm_node_rgb_ramp(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint fac_offset, color_offset, alpha_offset;
-  uint interpolate = node.z;
-
-  svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset);
-
-  uint table_size = read_node(kg, &offset).x;
-
-  float fac = stack_load_float(stack, fac_offset);
-  float4 color = rgb_ramp_lookup(kg, offset, fac, interpolate, false, table_size);
-
-  if (stack_valid(color_offset))
-    stack_store_float3(stack, color_offset, float4_to_float3(color));
-  if (stack_valid(alpha_offset))
-    stack_store_float(stack, alpha_offset, color.w);
-
-  offset += table_size;
-  return offset;
-}
-
-ccl_device_noinline int svm_node_curves(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint fac_offset, color_offset, out_offset;
-  svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
-
-  uint table_size = read_node(kg, &offset).x;
-
-  float fac = stack_load_float(stack, fac_offset);
-  float3 color = stack_load_float3(stack, color_offset);
-
-  const float min_x = __int_as_float(node.z), max_x = __int_as_float(node.w);
-  const float range_x = max_x - min_x;
-  const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x;
-
-  float r = rgb_ramp_lookup(kg, offset, relpos.x, true, true, table_size).x;
-  float g = rgb_ramp_lookup(kg, offset, relpos.y, true, true, table_size).y;
-  float b = rgb_ramp_lookup(kg, offset, relpos.z, true, true, table_size).z;
-
-  color = (1.0f - fac) * color + fac * make_float3(r, g, b);
-  stack_store_float3(stack, out_offset, color);
-
-  offset += table_size;
-  return offset;
-}
-
-ccl_device_noinline int svm_node_curve(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint fac_offset, value_in_offset, out_offset;
-  svm_unpack_node_uchar3(node.y, &fac_offset, &value_in_offset, &out_offset);
-
-  uint table_size = read_node(kg, &offset).x;
-
-  float fac = stack_load_float(stack, fac_offset);
-  float in = stack_load_float(stack, value_in_offset);
-
-  const float min = __int_as_float(node.z), max = __int_as_float(node.w);
-  const float range = max - min;
-  const float relpos = (in - min) / range;
-
-  float v = float_ramp_lookup(kg, offset, relpos, true, true, table_size);
-
-  in = (1.0f - fac) * in + fac * v;
-  stack_store_float(stack, out_offset, in);
-
-  offset += table_size;
-  return offset;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __SVM_RAMP_H__ */
diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h
deleted file mode 100644
index 202596c1fe3..00000000000
--- a/intern/cycles/kernel/svm/svm_ramp_util.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SVM_RAMP_UTIL_H__
-#define __SVM_RAMP_UTIL_H__
-
-CCL_NAMESPACE_BEGIN
-
-/* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
-
-ccl_device_inline float3
-rgb_ramp_lookup(const float3 *ramp, float f, bool interpolate, bool extrapolate, int table_size)
-{
-  if ((f < 0.0f || f > 1.0f) && extrapolate) {
-    float3 t0, dy;
-    if (f < 0.0f) {
-      t0 = ramp[0];
-      dy = t0 - ramp[1], f = -f;
-    }
-    else {
-      t0 = ramp[table_size - 1];
-      dy = t0 - ramp[table_size - 2];
-      f = f - 1.0f;
-    }
-    return t0 + dy * f * (table_size - 1);
-  }
-
-  f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
-
-  /* clamp int as well in case of NaN */
-  int i = clamp(float_to_int(f), 0, table_size - 1);
-  float t = f - (float)i;
-
-  float3 result = ramp[i];
-
-  if (interpolate && t > 0.0f) {
-    result = (1.0f - t) * result + t * ramp[i + 1];
-  }
-
-  return result;
-}
-
-ccl_device float float_ramp_lookup(
-    const float *ramp, float f, bool interpolate, bool extrapolate, int table_size)
-{
-  if ((f < 0.0f || f > 1.0f) && extrapolate) {
-    float t0, dy;
-    if (f < 0.0f) {
-      t0 = ramp[0];
-      dy = t0 - ramp[1], f = -f;
-    }
-    else {
-      t0 = ramp[table_size - 1];
-      dy = t0 - ramp[table_size - 2];
-      f = f - 1.0f;
-    }
-    return t0 + dy * f * (table_size - 1);
-  }
-
-  f = clamp(f, 0.0f, 1.0f) * (table_size - 1);
-
-  /* clamp int as well in case of NaN */
-  int i = clamp(float_to_int(f), 0, table_size - 1);
-  float t = f - (float)i;
-
-  float result = ramp[i];
-
-  if (interpolate && t > 0.0f) {
-    result = (1.0f - t) * result + t * ramp[i + 1];
-  }
-
-  return result;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __SVM_RAMP_UTIL_H__ */
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
deleted file mode 100644
index bafa0456342..00000000000
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline int svm_node_combine_hsv(KernelGlobals kg,
-                                             ccl_private ShaderData *sd,
-                                             ccl_private float *stack,
-                                             uint hue_in,
-                                             uint saturation_in,
-                                             uint value_in,
-                                             int offset)
-{
-  uint4 node1 = read_node(kg, &offset);
-  uint color_out = node1.y;
-
-  float hue = stack_load_float(stack, hue_in);
-  float saturation = stack_load_float(stack, saturation_in);
-  float value = stack_load_float(stack, value_in);
-
-  /* Combine, and convert back to RGB */
-  float3 color = hsv_to_rgb(make_float3(hue, saturation, value));
-
-  if (stack_valid(color_out))
-    stack_store_float3(stack, color_out, color);
-  return offset;
-}
-
-ccl_device_noinline int svm_node_separate_hsv(KernelGlobals kg,
-                                              ccl_private ShaderData *sd,
-                                              ccl_private float *stack,
-                                              uint color_in,
-                                              uint hue_out,
-                                              uint saturation_out,
-                                              int offset)
-{
-  uint4 node1 = read_node(kg, &offset);
-  uint value_out = node1.y;
-
-  float3 color = stack_load_float3(stack, color_in);
-
-  /* Convert to HSV */
-  color = rgb_to_hsv(color);
-
-  if (stack_valid(hue_out))
-    stack_store_float(stack, hue_out, color.x);
-  if (stack_valid(saturation_out))
-    stack_store_float(stack, saturation_out, color.y);
-  if (stack_valid(value_out))
-    stack_store_float(stack, value_out, color.z);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h
deleted file mode 100644
index 11e440f2cbf..00000000000
--- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Vector combine / separate, used for the RGB and XYZ nodes */
-
-ccl_device void svm_node_combine_vector(ccl_private ShaderData *sd,
-                                        ccl_private float *stack,
-                                        uint in_offset,
-                                        uint vector_index,
-                                        uint out_offset)
-{
-  float vector = stack_load_float(stack, in_offset);
-
-  if (stack_valid(out_offset))
-    stack_store_float(stack, out_offset + vector_index, vector);
-}
-
-ccl_device void svm_node_separate_vector(ccl_private ShaderData *sd,
-                                         ccl_private float *stack,
-                                         uint ivector_offset,
-                                         uint vector_index,
-                                         uint out_offset)
-{
-  float3 vector = stack_load_float3(stack, ivector_offset);
-
-  if (stack_valid(out_offset)) {
-    if (vector_index == 0)
-      stack_store_float(stack, out_offset, vector.x);
-    else if (vector_index == 1)
-      stack_store_float(stack, out_offset, vector.y);
-    else
-      stack_store_float(stack, out_offset, vector.z);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
deleted file mode 100644
index 3ab7bc89c66..00000000000
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Sky texture */
-
-ccl_device float sky_angle_between(float thetav, float phiv, float theta, float phi)
-{
-  float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta);
-  return safe_acosf(cospsi);
-}
-
-/*
- * "A Practical Analytic Model for Daylight"
- * A. J. Preetham, Peter Shirley, Brian Smits
- */
-ccl_device float sky_perez_function(ccl_private float *lam, float theta, float gamma)
-{
-  float ctheta = cosf(theta);
-  float cgamma = cosf(gamma);
-
-  return (1.0f + lam[0] * expf(lam[1] / ctheta)) *
-         (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma);
-}
-
-ccl_device float3 sky_radiance_preetham(KernelGlobals kg,
-                                        float3 dir,
-                                        float sunphi,
-                                        float suntheta,
-                                        float radiance_x,
-                                        float radiance_y,
-                                        float radiance_z,
-                                        ccl_private float *config_x,
-                                        ccl_private float *config_y,
-                                        ccl_private float *config_z)
-{
-  /* convert vector to spherical coordinates */
-  float2 spherical = direction_to_spherical(dir);
-  float theta = spherical.x;
-  float phi = spherical.y;
-
-  /* angle between sun direction and dir */
-  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
-
-  /* clamp theta to horizon */
-  theta = min(theta, M_PI_2_F - 0.001f);
-
-  /* compute xyY color space values */
-  float x = radiance_y * sky_perez_function(config_y, theta, gamma);
-  float y = radiance_z * sky_perez_function(config_z, theta, gamma);
-  float Y = radiance_x * sky_perez_function(config_x, theta, gamma);
-
-  /* convert to RGB */
-  float3 xyz = xyY_to_xyz(x, y, Y);
-  return xyz_to_rgb(kg, xyz);
-}
-
-/*
- * "An Analytic Model for Full Spectral Sky-Dome Radiance"
- * Lukas Hosek, Alexander Wilkie
- */
-ccl_device float sky_radiance_internal(ccl_private float *configuration, float theta, float gamma)
-{
-  float ctheta = cosf(theta);
-  float cgamma = cosf(gamma);
-
-  float expM = expf(configuration[4] * gamma);
-  float rayM = cgamma * cgamma;
-  float mieM = (1.0f + rayM) / powf((1.0f + configuration[8] * configuration[8] -
-                                     2.0f * configuration[8] * cgamma),
-                                    1.5f);
-  float zenith = sqrtf(ctheta);
-
-  return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
-         (configuration[2] + configuration[3] * expM + configuration[5] * rayM +
-          configuration[6] * mieM + configuration[7] * zenith);
-}
-
-ccl_device float3 sky_radiance_hosek(KernelGlobals kg,
-                                     float3 dir,
-                                     float sunphi,
-                                     float suntheta,
-                                     float radiance_x,
-                                     float radiance_y,
-                                     float radiance_z,
-                                     ccl_private float *config_x,
-                                     ccl_private float *config_y,
-                                     ccl_private float *config_z)
-{
-  /* convert vector to spherical coordinates */
-  float2 spherical = direction_to_spherical(dir);
-  float theta = spherical.x;
-  float phi = spherical.y;
-
-  /* angle between sun direction and dir */
-  float gamma = sky_angle_between(theta, phi, suntheta, sunphi);
-
-  /* clamp theta to horizon */
-  theta = min(theta, M_PI_2_F - 0.001f);
-
-  /* compute xyz color space values */
-  float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x;
-  float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y;
-  float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z;
-
-  /* convert to RGB and adjust strength */
-  return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683);
-}
-
-/* Nishita improved sky model */
-ccl_device float3 geographical_to_direction(float lat, float lon)
-{
-  return make_float3(cos(lat) * cos(lon), cos(lat) * sin(lon), sin(lat));
-}
-
-ccl_device float3 sky_radiance_nishita(KernelGlobals kg,
-                                       float3 dir,
-                                       ccl_private float *nishita_data,
-                                       uint texture_id)
-{
-  /* definitions */
-  float sun_elevation = nishita_data[6];
-  float sun_rotation = nishita_data[7];
-  float angular_diameter = nishita_data[8];
-  float sun_intensity = nishita_data[9];
-  bool sun_disc = (angular_diameter >= 0.0f);
-  float3 xyz;
-  /* convert dir to spherical coordinates */
-  float2 direction = direction_to_spherical(dir);
-
-  /* render above the horizon */
-  if (dir.z >= 0.0f) {
-    /* definitions */
-    float3 sun_dir = geographical_to_direction(sun_elevation, sun_rotation + M_PI_2_F);
-    float sun_dir_angle = precise_angle(dir, sun_dir);
-    float half_angular = angular_diameter / 2.0f;
-    float dir_elevation = M_PI_2_F - direction.x;
-
-    /* if ray inside sun disc render it, otherwise render sky */
-    if (sun_disc && sun_dir_angle < half_angular) {
-      /* get 2 pixels data */
-      float3 pixel_bottom = make_float3(nishita_data[0], nishita_data[1], nishita_data[2]);
-      float3 pixel_top = make_float3(nishita_data[3], nishita_data[4], nishita_data[5]);
-      float y;
-
-      /* sun interpolation */
-      if (sun_elevation - half_angular > 0.0f) {
-        if (sun_elevation + half_angular > 0.0f) {
-          y = ((dir_elevation - sun_elevation) / angular_diameter) + 0.5f;
-          xyz = interp(pixel_bottom, pixel_top, y) * sun_intensity;
-        }
-      }
-      else {
-        if (sun_elevation + half_angular > 0.0f) {
-          y = dir_elevation / (sun_elevation + half_angular);
-          xyz = interp(pixel_bottom, pixel_top, y) * sun_intensity;
-        }
-      }
-      /* limb darkening, coefficient is 0.6f */
-      float limb_darkening = (1.0f -
-                              0.6f * (1.0f - sqrtf(1.0f - sqr(sun_dir_angle / half_angular))));
-      xyz *= limb_darkening;
-    }
-    /* sky */
-    else {
-      /* sky interpolation */
-      float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
-      /* more pixels toward horizon compensation */
-      float y = safe_sqrtf(dir_elevation / M_PI_2_F);
-      if (x > 1.0f) {
-        x -= 1.0f;
-      }
-      xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, y));
-    }
-  }
-  /* ground */
-  else {
-    if (dir.z < -0.4f) {
-      xyz = make_float3(0.0f, 0.0f, 0.0f);
-    }
-    else {
-      /* black ground fade */
-      float fade = 1.0f + dir.z * 2.5f;
-      fade = sqr(fade) * fade;
-      /* interpolation */
-      float x = (direction.y + M_PI_F + sun_rotation) / M_2PI_F;
-      if (x > 1.0f) {
-        x -= 1.0f;
-      }
-      xyz = float4_to_float3(kernel_tex_image_interp(kg, texture_id, x, -0.5)) * fade;
-    }
-  }
-
-  /* convert to RGB */
-  return xyz_to_rgb(kg, xyz);
-}
-
-ccl_device_noinline int svm_node_tex_sky(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  /* Load data */
-  uint dir_offset = node.y;
-  uint out_offset = node.z;
-  int sky_model = node.w;
-
-  float3 dir = stack_load_float3(stack, dir_offset);
-  float3 f;
-
-  /* Preetham and Hosek share the same data */
-  if (sky_model == 0 || sky_model == 1) {
-    /* Define variables */
-    float sunphi, suntheta, radiance_x, radiance_y, radiance_z;
-    float config_x[9], config_y[9], config_z[9];
-
-    float4 data = read_node_float(kg, &offset);
-    sunphi = data.x;
-    suntheta = data.y;
-    radiance_x = data.z;
-    radiance_y = data.w;
-
-    data = read_node_float(kg, &offset);
-    radiance_z = data.x;
-    config_x[0] = data.y;
-    config_x[1] = data.z;
-    config_x[2] = data.w;
-
-    data = read_node_float(kg, &offset);
-    config_x[3] = data.x;
-    config_x[4] = data.y;
-    config_x[5] = data.z;
-    config_x[6] = data.w;
-
-    data = read_node_float(kg, &offset);
-    config_x[7] = data.x;
-    config_x[8] = data.y;
-    config_y[0] = data.z;
-    config_y[1] = data.w;
-
-    data = read_node_float(kg, &offset);
-    config_y[2] = data.x;
-    config_y[3] = data.y;
-    config_y[4] = data.z;
-    config_y[5] = data.w;
-
-    data = read_node_float(kg, &offset);
-    config_y[6] = data.x;
-    config_y[7] = data.y;
-    config_y[8] = data.z;
-    config_z[0] = data.w;
-
-    data = read_node_float(kg, &offset);
-    config_z[1] = data.x;
-    config_z[2] = data.y;
-    config_z[3] = data.z;
-    config_z[4] = data.w;
-
-    data = read_node_float(kg, &offset);
-    config_z[5] = data.x;
-    config_z[6] = data.y;
-    config_z[7] = data.z;
-    config_z[8] = data.w;
-
-    /* Compute Sky */
-    if (sky_model == 0) {
-      f = sky_radiance_preetham(kg,
-                                dir,
-                                sunphi,
-                                suntheta,
-                                radiance_x,
-                                radiance_y,
-                                radiance_z,
-                                config_x,
-                                config_y,
-                                config_z);
-    }
-    else {
-      f = sky_radiance_hosek(kg,
-                             dir,
-                             sunphi,
-                             suntheta,
-                             radiance_x,
-                             radiance_y,
-                             radiance_z,
-                             config_x,
-                             config_y,
-                             config_z);
-    }
-  }
-  /* Nishita */
-  else {
-    /* Define variables */
-    float nishita_data[10];
-
-    float4 data = read_node_float(kg, &offset);
-    nishita_data[0] = data.x;
-    nishita_data[1] = data.y;
-    nishita_data[2] = data.z;
-    nishita_data[3] = data.w;
-
-    data = read_node_float(kg, &offset);
-    nishita_data[4] = data.x;
-    nishita_data[5] = data.y;
-    nishita_data[6] = data.z;
-    nishita_data[7] = data.w;
-
-    data = read_node_float(kg, &offset);
-    nishita_data[8] = data.x;
-    nishita_data[9] = data.y;
-    uint texture_id = __float_as_uint(data.z);
-
-    /* Compute Sky */
-    f = sky_radiance_nishita(kg, dir, nishita_data, texture_id);
-  }
-
-  stack_store_float3(stack, out_offset, f);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
deleted file mode 100644
index 9af0a818cad..00000000000
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel/camera/camera.h"
-#include "kernel/geom/geom.h"
-#include "kernel/sample/sample_mapping.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Texture Coordinate Node */
-
-ccl_device_noinline int svm_node_tex_coord(KernelGlobals kg,
-                                           ccl_private ShaderData *sd,
-                                           uint32_t path_flag,
-                                           ccl_private float *stack,
-                                           uint4 node,
-                                           int offset)
-{
-  float3 data;
-  uint type = node.y;
-  uint out_offset = node.z;
-
-  switch (type) {
-    case NODE_TEXCO_OBJECT: {
-      data = sd->P;
-      if (node.w == 0) {
-        if (sd->object != OBJECT_NONE) {
-          object_inverse_position_transform(kg, sd, &data);
-        }
-      }
-      else {
-        Transform tfm;
-        tfm.x = read_node_float(kg, &offset);
-        tfm.y = read_node_float(kg, &offset);
-        tfm.z = read_node_float(kg, &offset);
-        data = transform_point(&tfm, data);
-      }
-      break;
-    }
-    case NODE_TEXCO_NORMAL: {
-      data = sd->N;
-      object_inverse_normal_transform(kg, sd, &data);
-      break;
-    }
-    case NODE_TEXCO_CAMERA: {
-      Transform tfm = kernel_data.cam.worldtocamera;
-
-      if (sd->object != OBJECT_NONE)
-        data = transform_point(&tfm, sd->P);
-      else
-        data = transform_point(&tfm, sd->P + camera_position(kg));
-      break;
-    }
-    case NODE_TEXCO_WINDOW: {
-      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
-          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-        data = camera_world_to_ndc(kg, sd, sd->ray_P);
-      else
-        data = camera_world_to_ndc(kg, sd, sd->P);
-      data.z = 0.0f;
-      break;
-    }
-    case NODE_TEXCO_REFLECTION: {
-      if (sd->object != OBJECT_NONE)
-        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
-      else
-        data = sd->I;
-      break;
-    }
-    case NODE_TEXCO_DUPLI_GENERATED: {
-      data = object_dupli_generated(kg, sd->object);
-      break;
-    }
-    case NODE_TEXCO_DUPLI_UV: {
-      data = object_dupli_uv(kg, sd->object);
-      break;
-    }
-    case NODE_TEXCO_VOLUME_GENERATED: {
-      data = sd->P;
-
-#ifdef __VOLUME__
-      if (sd->object != OBJECT_NONE)
-        data = volume_normalized_position(kg, sd, data);
-#endif
-      break;
-    }
-  }
-
-  stack_store_float3(stack, out_offset, data);
-  return offset;
-}
-
-ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
-                                                   ccl_private ShaderData *sd,
-                                                   uint32_t path_flag,
-                                                   ccl_private float *stack,
-                                                   uint4 node,
-                                                   int offset)
-{
-#ifdef __RAY_DIFFERENTIALS__
-  float3 data;
-  uint type = node.y;
-  uint out_offset = node.z;
-
-  switch (type) {
-    case NODE_TEXCO_OBJECT: {
-      data = sd->P + sd->dP.dx;
-      if (node.w == 0) {
-        if (sd->object != OBJECT_NONE) {
-          object_inverse_position_transform(kg, sd, &data);
-        }
-      }
-      else {
-        Transform tfm;
-        tfm.x = read_node_float(kg, &offset);
-        tfm.y = read_node_float(kg, &offset);
-        tfm.z = read_node_float(kg, &offset);
-        data = transform_point(&tfm, data);
-      }
-      break;
-    }
-    case NODE_TEXCO_NORMAL: {
-      data = sd->N;
-      object_inverse_normal_transform(kg, sd, &data);
-      break;
-    }
-    case NODE_TEXCO_CAMERA: {
-      Transform tfm = kernel_data.cam.worldtocamera;
-
-      if (sd->object != OBJECT_NONE)
-        data = transform_point(&tfm, sd->P + sd->dP.dx);
-      else
-        data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
-      break;
-    }
-    case NODE_TEXCO_WINDOW: {
-      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
-          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-        data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f));
-      else
-        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
-      data.z = 0.0f;
-      break;
-    }
-    case NODE_TEXCO_REFLECTION: {
-      if (sd->object != OBJECT_NONE)
-        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
-      else
-        data = sd->I;
-      break;
-    }
-    case NODE_TEXCO_DUPLI_GENERATED: {
-      data = object_dupli_generated(kg, sd->object);
-      break;
-    }
-    case NODE_TEXCO_DUPLI_UV: {
-      data = object_dupli_uv(kg, sd->object);
-      break;
-    }
-    case NODE_TEXCO_VOLUME_GENERATED: {
-      data = sd->P + sd->dP.dx;
-
-#  ifdef __VOLUME__
-      if (sd->object != OBJECT_NONE)
-        data = volume_normalized_position(kg, sd, data);
-#  endif
-      break;
-    }
-  }
-
-  stack_store_float3(stack, out_offset, data);
-  return offset;
-#else
-  return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
-#endif
-}
-
-ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
-                                                   ccl_private ShaderData *sd,
-                                                   uint32_t path_flag,
-                                                   ccl_private float *stack,
-                                                   uint4 node,
-                                                   int offset)
-{
-#ifdef __RAY_DIFFERENTIALS__
-  float3 data;
-  uint type = node.y;
-  uint out_offset = node.z;
-
-  switch (type) {
-    case NODE_TEXCO_OBJECT: {
-      data = sd->P + sd->dP.dy;
-      if (node.w == 0) {
-        if (sd->object != OBJECT_NONE) {
-          object_inverse_position_transform(kg, sd, &data);
-        }
-      }
-      else {
-        Transform tfm;
-        tfm.x = read_node_float(kg, &offset);
-        tfm.y = read_node_float(kg, &offset);
-        tfm.z = read_node_float(kg, &offset);
-        data = transform_point(&tfm, data);
-      }
-      break;
-    }
-    case NODE_TEXCO_NORMAL: {
-      data = sd->N;
-      object_inverse_normal_transform(kg, sd, &data);
-      break;
-    }
-    case NODE_TEXCO_CAMERA: {
-      Transform tfm = kernel_data.cam.worldtocamera;
-
-      if (sd->object != OBJECT_NONE)
-        data = transform_point(&tfm, sd->P + sd->dP.dy);
-      else
-        data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
-      break;
-    }
-    case NODE_TEXCO_WINDOW: {
-      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
-          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
-        data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f));
-      else
-        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
-      data.z = 0.0f;
-      break;
-    }
-    case NODE_TEXCO_REFLECTION: {
-      if (sd->object != OBJECT_NONE)
-        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
-      else
-        data = sd->I;
-      break;
-    }
-    case NODE_TEXCO_DUPLI_GENERATED: {
-      data = object_dupli_generated(kg, sd->object);
-      break;
-    }
-    case NODE_TEXCO_DUPLI_UV: {
-      data = object_dupli_uv(kg, sd->object);
-      break;
-    }
-    case NODE_TEXCO_VOLUME_GENERATED: {
-      data = sd->P + sd->dP.dy;
-
-#  ifdef __VOLUME__
-      if (sd->object != OBJECT_NONE)
-        data = volume_normalized_position(kg, sd, data);
-#  endif
-      break;
-    }
-  }
-
-  stack_store_float3(stack, out_offset, data);
-  return offset;
-#else
-  return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
-#endif
-}
-
-ccl_device_noinline void svm_node_normal_map(KernelGlobals kg,
-                                             ccl_private ShaderData *sd,
-                                             ccl_private float *stack,
-                                             uint4 node)
-{
-  uint color_offset, strength_offset, normal_offset, space;
-  svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
-
-  float3 color = stack_load_float3(stack, color_offset);
-  color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
-
-  bool is_backfacing = (sd->flag & SD_BACKFACING) != 0;
-  float3 N;
-
-  if (space == NODE_NORMAL_MAP_TANGENT) {
-    /* tangent space */
-    if (sd->object == OBJECT_NONE || (sd->type & PRIMITIVE_ALL_TRIANGLE) == 0) {
-      /* Fallback to unperturbed normal. */
-      stack_store_float3(stack, normal_offset, sd->N);
-      return;
-    }
-
-    /* first try to get tangent attribute */
-    const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
-    const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
-
-    if (attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND) {
-      /* Fallback to unperturbed normal. */
-      stack_store_float3(stack, normal_offset, sd->N);
-      return;
-    }
-
-    /* get _unnormalized_ interpolated normal and tangent */
-    float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
-    float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
-    float3 normal;
-
-    if (sd->shader & SHADER_SMOOTH_NORMAL) {
-      normal = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
-    }
-    else {
-      normal = sd->Ng;
-
-      /* the normal is already inverted, which is too soon for the math here */
-      if (is_backfacing) {
-        normal = -normal;
-      }
-
-      object_inverse_normal_transform(kg, sd, &normal);
-    }
-
-    /* apply normal map */
-    float3 B = sign * cross(normal, tangent);
-    N = safe_normalize(color.x * tangent + color.y * B + color.z * normal);
-
-    /* transform to world space */
-    object_normal_transform(kg, sd, &N);
-  }
-  else {
-    /* strange blender convention */
-    if (space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) {
-      color.y = -color.y;
-      color.z = -color.z;
-    }
-
-    /* object, world space */
-    N = color;
-
-    if (space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT)
-      object_normal_transform(kg, sd, &N);
-    else
-      N = safe_normalize(N);
-  }
-
-  /* invert normal for backfacing polygons */
-  if (is_backfacing) {
-    N = -N;
-  }
-
-  float strength = stack_load_float(stack, strength_offset);
-
-  if (strength != 1.0f) {
-    strength = max(strength, 0.0f);
-    N = safe_normalize(sd->N + (N - sd->N) * strength);
-  }
-
-  if (is_zero(N)) {
-    N = sd->N;
-  }
-
-  stack_store_float3(stack, normal_offset, N);
-}
-
-ccl_device_noinline void svm_node_tangent(KernelGlobals kg,
-                                          ccl_private ShaderData *sd,
-                                          ccl_private float *stack,
-                                          uint4 node)
-{
-  uint tangent_offset, direction_type, axis;
-  svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis);
-
-  float3 tangent;
-  float3 attribute_value;
-  const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
-  if (desc.offset != ATTR_STD_NOT_FOUND) {
-    if (desc.type == NODE_ATTR_FLOAT2) {
-      float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
-      attribute_value.x = value.x;
-      attribute_value.y = value.y;
-      attribute_value.z = 0.0f;
-    }
-    else {
-      attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
-    }
-  }
-
-  if (direction_type == NODE_TANGENT_UVMAP) {
-    /* UV map */
-    if (desc.offset == ATTR_STD_NOT_FOUND) {
-      stack_store_float3(stack, tangent_offset, zero_float3());
-      return;
-    }
-    else {
-      tangent = attribute_value;
-    }
-  }
-  else {
-    /* radial */
-    float3 generated;
-
-    if (desc.offset == ATTR_STD_NOT_FOUND)
-      generated = sd->P;
-    else
-      generated = attribute_value;
-
-    if (axis == NODE_TANGENT_AXIS_X)
-      tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
-    else if (axis == NODE_TANGENT_AXIS_Y)
-      tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f));
-    else
-      tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f);
-  }
-
-  object_normal_transform(kg, sd, &tangent);
-  tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
-  stack_store_float3(stack, tangent_offset, tangent);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
deleted file mode 100644
index 6f6c101fb69..00000000000
--- a/intern/cycles/kernel/svm/svm_types.h
+++ /dev/null
@@ -1,604 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SVM_TYPES_H__
-#define __SVM_TYPES_H__
-
-CCL_NAMESPACE_BEGIN
-
-/* Stack */
-
-/* SVM stack has a fixed size */
-#define SVM_STACK_SIZE 255
-/* SVM stack offsets with this value indicate that it's not on the stack */
-#define SVM_STACK_INVALID 255
-
-#define SVM_BUMP_EVAL_STATE_SIZE 9
-
-/* Nodes */
-
-typedef enum ShaderNodeType {
-  NODE_END = 0,
-  NODE_SHADER_JUMP,
-  NODE_CLOSURE_BSDF,
-  NODE_CLOSURE_EMISSION,
-  NODE_CLOSURE_BACKGROUND,
-  NODE_CLOSURE_SET_WEIGHT,
-  NODE_CLOSURE_WEIGHT,
-  NODE_EMISSION_WEIGHT,
-  NODE_MIX_CLOSURE,
-  NODE_JUMP_IF_ZERO,
-  NODE_JUMP_IF_ONE,
-  NODE_GEOMETRY,
-  NODE_CONVERT,
-  NODE_TEX_COORD,
-  NODE_VALUE_F,
-  NODE_VALUE_V,
-  NODE_ATTR,
-  NODE_VERTEX_COLOR,
-  NODE_GEOMETRY_BUMP_DX,
-  NODE_GEOMETRY_BUMP_DY,
-  NODE_SET_DISPLACEMENT,
-  NODE_DISPLACEMENT,
-  NODE_VECTOR_DISPLACEMENT,
-  NODE_TEX_IMAGE,
-  NODE_TEX_IMAGE_BOX,
-  NODE_TEX_NOISE,
-  NODE_SET_BUMP,
-  NODE_ATTR_BUMP_DX,
-  NODE_ATTR_BUMP_DY,
-  NODE_VERTEX_COLOR_BUMP_DX,
-  NODE_VERTEX_COLOR_BUMP_DY,
-  NODE_TEX_COORD_BUMP_DX,
-  NODE_TEX_COORD_BUMP_DY,
-  NODE_CLOSURE_SET_NORMAL,
-  NODE_ENTER_BUMP_EVAL,
-  NODE_LEAVE_BUMP_EVAL,
-  NODE_HSV,
-  NODE_CLOSURE_HOLDOUT,
-  NODE_FRESNEL,
-  NODE_LAYER_WEIGHT,
-  NODE_CLOSURE_VOLUME,
-  NODE_PRINCIPLED_VOLUME,
-  NODE_MATH,
-  NODE_VECTOR_MATH,
-  NODE_RGB_RAMP,
-  NODE_GAMMA,
-  NODE_BRIGHTCONTRAST,
-  NODE_LIGHT_PATH,
-  NODE_OBJECT_INFO,
-  NODE_PARTICLE_INFO,
-  NODE_HAIR_INFO,
-  NODE_TEXTURE_MAPPING,
-  NODE_MAPPING,
-  NODE_MIN_MAX,
-  NODE_CAMERA,
-  NODE_TEX_ENVIRONMENT,
-  NODE_TEX_SKY,
-  NODE_TEX_GRADIENT,
-  NODE_TEX_VORONOI,
-  NODE_TEX_MUSGRAVE,
-  NODE_TEX_WAVE,
-  NODE_TEX_MAGIC,
-  NODE_TEX_CHECKER,
-  NODE_TEX_BRICK,
-  NODE_TEX_WHITE_NOISE,
-  NODE_NORMAL,
-  NODE_LIGHT_FALLOFF,
-  NODE_IES,
-  NODE_RGB_CURVES,
-  NODE_VECTOR_CURVES,
-  NODE_TANGENT,
-  NODE_NORMAL_MAP,
-  NODE_INVERT,
-  NODE_MIX,
-  NODE_SEPARATE_VECTOR,
-  NODE_COMBINE_VECTOR,
-  NODE_SEPARATE_HSV,
-  NODE_COMBINE_HSV,
-  NODE_VECTOR_ROTATE,
-  NODE_VECTOR_TRANSFORM,
-  NODE_WIREFRAME,
-  NODE_WAVELENGTH,
-  NODE_BLACKBODY,
-  NODE_MAP_RANGE,
-  NODE_CLAMP,
-  NODE_BEVEL,
-  NODE_AMBIENT_OCCLUSION,
-  NODE_TEX_VOXEL,
-  NODE_AOV_START,
-  NODE_AOV_COLOR,
-  NODE_AOV_VALUE,
-  NODE_FLOAT_CURVE,
-  /* NOTE: for best OpenCL performance, item definition in the enum must
-   * match the switch case order in svm.h. */
-} ShaderNodeType;
-
-typedef enum NodeAttributeOutputType {
-  NODE_ATTR_OUTPUT_FLOAT3 = 0,
-  NODE_ATTR_OUTPUT_FLOAT,
-  NODE_ATTR_OUTPUT_FLOAT_ALPHA,
-} NodeAttributeOutputType;
-
-typedef enum NodeAttributeType {
-  NODE_ATTR_FLOAT = 0,
-  NODE_ATTR_FLOAT2,
-  NODE_ATTR_FLOAT3,
-  NODE_ATTR_FLOAT4,
-  NODE_ATTR_RGBA,
-  NODE_ATTR_MATRIX
-} NodeAttributeType;
-
-typedef enum NodeGeometry {
-  NODE_GEOM_P = 0,
-  NODE_GEOM_N,
-  NODE_GEOM_T,
-  NODE_GEOM_I,
-  NODE_GEOM_Ng,
-  NODE_GEOM_uv
-} NodeGeometry;
-
-typedef enum NodeObjectInfo {
-  NODE_INFO_OB_LOCATION,
-  NODE_INFO_OB_COLOR,
-  NODE_INFO_OB_INDEX,
-  NODE_INFO_MAT_INDEX,
-  NODE_INFO_OB_RANDOM
-} NodeObjectInfo;
-
-typedef enum NodeParticleInfo {
-  NODE_INFO_PAR_INDEX,
-  NODE_INFO_PAR_RANDOM,
-  NODE_INFO_PAR_AGE,
-  NODE_INFO_PAR_LIFETIME,
-  NODE_INFO_PAR_LOCATION,
-  NODE_INFO_PAR_ROTATION,
-  NODE_INFO_PAR_SIZE,
-  NODE_INFO_PAR_VELOCITY,
-  NODE_INFO_PAR_ANGULAR_VELOCITY
-} NodeParticleInfo;
-
-typedef enum NodeHairInfo {
-  NODE_INFO_CURVE_IS_STRAND,
-  NODE_INFO_CURVE_INTERCEPT,
-  NODE_INFO_CURVE_LENGTH,
-  NODE_INFO_CURVE_THICKNESS,
-  /* Fade for minimum hair width transiency. */
-  // NODE_INFO_CURVE_FADE,
-  NODE_INFO_CURVE_TANGENT_NORMAL,
-  NODE_INFO_CURVE_RANDOM,
-} NodeHairInfo;
-
-typedef enum NodeLightPath {
-  NODE_LP_camera = 0,
-  NODE_LP_shadow,
-  NODE_LP_diffuse,
-  NODE_LP_glossy,
-  NODE_LP_singular,
-  NODE_LP_reflection,
-  NODE_LP_transmission,
-  NODE_LP_volume_scatter,
-  NODE_LP_backfacing,
-  NODE_LP_ray_length,
-  NODE_LP_ray_depth,
-  NODE_LP_ray_diffuse,
-  NODE_LP_ray_glossy,
-  NODE_LP_ray_transparent,
-  NODE_LP_ray_transmission,
-} NodeLightPath;
-
-typedef enum NodeLightFalloff {
-  NODE_LIGHT_FALLOFF_QUADRATIC,
-  NODE_LIGHT_FALLOFF_LINEAR,
-  NODE_LIGHT_FALLOFF_CONSTANT
-} NodeLightFalloff;
-
-typedef enum NodeTexCoord {
-  NODE_TEXCO_NORMAL,
-  NODE_TEXCO_OBJECT,
-  NODE_TEXCO_CAMERA,
-  NODE_TEXCO_WINDOW,
-  NODE_TEXCO_REFLECTION,
-  NODE_TEXCO_DUPLI_GENERATED,
-  NODE_TEXCO_DUPLI_UV,
-  NODE_TEXCO_VOLUME_GENERATED
-} NodeTexCoord;
-
-typedef enum NodeMix {
-  NODE_MIX_BLEND = 0,
-  NODE_MIX_ADD,
-  NODE_MIX_MUL,
-  NODE_MIX_SUB,
-  NODE_MIX_SCREEN,
-  NODE_MIX_DIV,
-  NODE_MIX_DIFF,
-  NODE_MIX_DARK,
-  NODE_MIX_LIGHT,
-  NODE_MIX_OVERLAY,
-  NODE_MIX_DODGE,
-  NODE_MIX_BURN,
-  NODE_MIX_HUE,
-  NODE_MIX_SAT,
-  NODE_MIX_VAL,
-  NODE_MIX_COLOR,
-  NODE_MIX_SOFT,
-  NODE_MIX_LINEAR,
-  NODE_MIX_CLAMP /* used for the clamp UI option */
-} NodeMix;
-
-typedef enum NodeMathType {
-  NODE_MATH_ADD,
-  NODE_MATH_SUBTRACT,
-  NODE_MATH_MULTIPLY,
-  NODE_MATH_DIVIDE,
-  NODE_MATH_SINE,
-  NODE_MATH_COSINE,
-  NODE_MATH_TANGENT,
-  NODE_MATH_ARCSINE,
-  NODE_MATH_ARCCOSINE,
-  NODE_MATH_ARCTANGENT,
-  NODE_MATH_POWER,
-  NODE_MATH_LOGARITHM,
-  NODE_MATH_MINIMUM,
-  NODE_MATH_MAXIMUM,
-  NODE_MATH_ROUND,
-  NODE_MATH_LESS_THAN,
-  NODE_MATH_GREATER_THAN,
-  NODE_MATH_MODULO,
-  NODE_MATH_ABSOLUTE,
-  NODE_MATH_ARCTAN2,
-  NODE_MATH_FLOOR,
-  NODE_MATH_CEIL,
-  NODE_MATH_FRACTION,
-  NODE_MATH_SQRT,
-  NODE_MATH_INV_SQRT,
-  NODE_MATH_SIGN,
-  NODE_MATH_EXPONENT,
-  NODE_MATH_RADIANS,
-  NODE_MATH_DEGREES,
-  NODE_MATH_SINH,
-  NODE_MATH_COSH,
-  NODE_MATH_TANH,
-  NODE_MATH_TRUNC,
-  NODE_MATH_SNAP,
-  NODE_MATH_WRAP,
-  NODE_MATH_COMPARE,
-  NODE_MATH_MULTIPLY_ADD,
-  NODE_MATH_PINGPONG,
-  NODE_MATH_SMOOTH_MIN,
-  NODE_MATH_SMOOTH_MAX,
-} NodeMathType;
-
-typedef enum NodeVectorMathType {
-  NODE_VECTOR_MATH_ADD,
-  NODE_VECTOR_MATH_SUBTRACT,
-  NODE_VECTOR_MATH_MULTIPLY,
-  NODE_VECTOR_MATH_DIVIDE,
-
-  NODE_VECTOR_MATH_CROSS_PRODUCT,
-  NODE_VECTOR_MATH_PROJECT,
-  NODE_VECTOR_MATH_REFLECT,
-  NODE_VECTOR_MATH_DOT_PRODUCT,
-
-  NODE_VECTOR_MATH_DISTANCE,
-  NODE_VECTOR_MATH_LENGTH,
-  NODE_VECTOR_MATH_SCALE,
-  NODE_VECTOR_MATH_NORMALIZE,
-
-  NODE_VECTOR_MATH_SNAP,
-  NODE_VECTOR_MATH_FLOOR,
-  NODE_VECTOR_MATH_CEIL,
-  NODE_VECTOR_MATH_MODULO,
-  NODE_VECTOR_MATH_FRACTION,
-  NODE_VECTOR_MATH_ABSOLUTE,
-  NODE_VECTOR_MATH_MINIMUM,
-  NODE_VECTOR_MATH_MAXIMUM,
-  NODE_VECTOR_MATH_WRAP,
-  NODE_VECTOR_MATH_SINE,
-  NODE_VECTOR_MATH_COSINE,
-  NODE_VECTOR_MATH_TANGENT,
-  NODE_VECTOR_MATH_REFRACT,
-  NODE_VECTOR_MATH_FACEFORWARD,
-  NODE_VECTOR_MATH_MULTIPLY_ADD,
-} NodeVectorMathType;
-
-typedef enum NodeClampType {
-  NODE_CLAMP_MINMAX,
-  NODE_CLAMP_RANGE,
-} NodeClampType;
-
-typedef enum NodeMapRangeType {
-  NODE_MAP_RANGE_LINEAR,
-  NODE_MAP_RANGE_STEPPED,
-  NODE_MAP_RANGE_SMOOTHSTEP,
-  NODE_MAP_RANGE_SMOOTHERSTEP,
-} NodeMapRangeType;
-
-typedef enum NodeMappingType {
-  NODE_MAPPING_TYPE_POINT,
-  NODE_MAPPING_TYPE_TEXTURE,
-  NODE_MAPPING_TYPE_VECTOR,
-  NODE_MAPPING_TYPE_NORMAL
-} NodeMappingType;
-
-typedef enum NodeVectorRotateType {
-  NODE_VECTOR_ROTATE_TYPE_AXIS,
-  NODE_VECTOR_ROTATE_TYPE_AXIS_X,
-  NODE_VECTOR_ROTATE_TYPE_AXIS_Y,
-  NODE_VECTOR_ROTATE_TYPE_AXIS_Z,
-  NODE_VECTOR_ROTATE_TYPE_EULER_XYZ,
-} NodeVectorRotateType;
-
-typedef enum NodeVectorTransformType {
-  NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
-  NODE_VECTOR_TRANSFORM_TYPE_POINT,
-  NODE_VECTOR_TRANSFORM_TYPE_NORMAL
-} NodeVectorTransformType;
-
-typedef enum NodeVectorTransformConvertSpace {
-  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD,
-  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT,
-  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA
-} NodeVectorTransformConvertSpace;
-
-typedef enum NodeConvert {
-  NODE_CONVERT_FV,
-  NODE_CONVERT_FI,
-  NODE_CONVERT_CF,
-  NODE_CONVERT_CI,
-  NODE_CONVERT_VF,
-  NODE_CONVERT_VI,
-  NODE_CONVERT_IF,
-  NODE_CONVERT_IV
-} NodeConvert;
-
-typedef enum NodeMusgraveType {
-  NODE_MUSGRAVE_MULTIFRACTAL,
-  NODE_MUSGRAVE_FBM,
-  NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
-  NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
-  NODE_MUSGRAVE_HETERO_TERRAIN
-} NodeMusgraveType;
-
-typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType;
-
-typedef enum NodeWaveBandsDirection {
-  NODE_WAVE_BANDS_DIRECTION_X,
-  NODE_WAVE_BANDS_DIRECTION_Y,
-  NODE_WAVE_BANDS_DIRECTION_Z,
-  NODE_WAVE_BANDS_DIRECTION_DIAGONAL
-} NodeWaveBandsDirection;
-
-typedef enum NodeWaveRingsDirection {
-  NODE_WAVE_RINGS_DIRECTION_X,
-  NODE_WAVE_RINGS_DIRECTION_Y,
-  NODE_WAVE_RINGS_DIRECTION_Z,
-  NODE_WAVE_RINGS_DIRECTION_SPHERICAL
-} NodeWaveRingsDirection;
-
-typedef enum NodeWaveProfile {
-  NODE_WAVE_PROFILE_SIN,
-  NODE_WAVE_PROFILE_SAW,
-  NODE_WAVE_PROFILE_TRI,
-} NodeWaveProfile;
-
-typedef enum NodeSkyType { NODE_SKY_PREETHAM, NODE_SKY_HOSEK, NODE_SKY_NISHITA } NodeSkyType;
-
-typedef enum NodeGradientType {
-  NODE_BLEND_LINEAR,
-  NODE_BLEND_QUADRATIC,
-  NODE_BLEND_EASING,
-  NODE_BLEND_DIAGONAL,
-  NODE_BLEND_RADIAL,
-  NODE_BLEND_QUADRATIC_SPHERE,
-  NODE_BLEND_SPHERICAL
-} NodeGradientType;
-
-typedef enum NodeVoronoiDistanceMetric {
-  NODE_VORONOI_EUCLIDEAN,
-  NODE_VORONOI_MANHATTAN,
-  NODE_VORONOI_CHEBYCHEV,
-  NODE_VORONOI_MINKOWSKI,
-} NodeVoronoiDistanceMetric;
-
-typedef enum NodeVoronoiFeature {
-  NODE_VORONOI_F1,
-  NODE_VORONOI_F2,
-  NODE_VORONOI_SMOOTH_F1,
-  NODE_VORONOI_DISTANCE_TO_EDGE,
-  NODE_VORONOI_N_SPHERE_RADIUS,
-} NodeVoronoiFeature;
-
-typedef enum NodeBlendWeightType {
-  NODE_LAYER_WEIGHT_FRESNEL,
-  NODE_LAYER_WEIGHT_FACING
-} NodeBlendWeightType;
-
-typedef enum NodeTangentDirectionType {
-  NODE_TANGENT_RADIAL,
-  NODE_TANGENT_UVMAP
-} NodeTangentDirectionType;
-
-typedef enum NodeTangentAxis {
-  NODE_TANGENT_AXIS_X,
-  NODE_TANGENT_AXIS_Y,
-  NODE_TANGENT_AXIS_Z
-} NodeTangentAxis;
-
-typedef enum NodeNormalMapSpace {
-  NODE_NORMAL_MAP_TANGENT,
-  NODE_NORMAL_MAP_OBJECT,
-  NODE_NORMAL_MAP_WORLD,
-  NODE_NORMAL_MAP_BLENDER_OBJECT,
-  NODE_NORMAL_MAP_BLENDER_WORLD,
-} NodeNormalMapSpace;
-
-typedef enum NodeImageProjection {
-  NODE_IMAGE_PROJ_FLAT = 0,
-  NODE_IMAGE_PROJ_BOX = 1,
-  NODE_IMAGE_PROJ_SPHERE = 2,
-  NODE_IMAGE_PROJ_TUBE = 3,
-} NodeImageProjection;
-
-typedef enum NodeImageFlags {
-  NODE_IMAGE_COMPRESS_AS_SRGB = 1,
-  NODE_IMAGE_ALPHA_UNASSOCIATE = 2,
-} NodeImageFlags;
-
-typedef enum NodeEnvironmentProjection {
-  NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
-  NODE_ENVIRONMENT_MIRROR_BALL = 1,
-} NodeEnvironmentProjection;
-
-typedef enum NodeBumpOffset {
-  NODE_BUMP_OFFSET_CENTER,
-  NODE_BUMP_OFFSET_DX,
-  NODE_BUMP_OFFSET_DY,
-} NodeBumpOffset;
-
-typedef enum NodeTexVoxelSpace {
-  NODE_TEX_VOXEL_SPACE_OBJECT = 0,
-  NODE_TEX_VOXEL_SPACE_WORLD = 1,
-} NodeTexVoxelSpace;
-
-typedef enum NodeAO {
-  NODE_AO_ONLY_LOCAL = (1 << 0),
-  NODE_AO_INSIDE = (1 << 1),
-  NODE_AO_GLOBAL_RADIUS = (1 << 2),
-} NodeAO;
-
-typedef enum ShaderType {
-  SHADER_TYPE_SURFACE,
-  SHADER_TYPE_VOLUME,
-  SHADER_TYPE_DISPLACEMENT,
-  SHADER_TYPE_BUMP,
-} ShaderType;
-
-typedef enum NodePrincipledHairParametrization {
-  NODE_PRINCIPLED_HAIR_REFLECTANCE = 0,
-  NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1,
-  NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2,
-  NODE_PRINCIPLED_HAIR_NUM,
-} NodePrincipledHairParametrization;
-
-/* Closure */
-
-typedef enum ClosureType {
-  /* Special type, flags generic node as a non-BSDF. */
-  CLOSURE_NONE_ID,
-
-  CLOSURE_BSDF_ID,
-
-  /* Diffuse */
-  CLOSURE_BSDF_DIFFUSE_ID,
-  CLOSURE_BSDF_OREN_NAYAR_ID,
-  CLOSURE_BSDF_DIFFUSE_RAMP_ID,
-  CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
-  CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
-  CLOSURE_BSDF_DIFFUSE_TOON_ID,
-  CLOSURE_BSDF_TRANSLUCENT_ID,
-
-  /* Glossy */
-  CLOSURE_BSDF_REFLECTION_ID,
-  CLOSURE_BSDF_MICROFACET_GGX_ID,
-  CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID,
-  CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID,
-  CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
-  CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
-  CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
-  CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
-  CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
-  CLOSURE_BSDF_PHONG_RAMP_ID,
-  CLOSURE_BSDF_GLOSSY_TOON_ID,
-  CLOSURE_BSDF_HAIR_REFLECTION_ID,
-
-  /* Transmission */
-  CLOSURE_BSDF_REFRACTION_ID,
-  CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
-  CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
-  CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID,
-  CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID,
-  CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID,
-  CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID,
-  CLOSURE_BSDF_SHARP_GLASS_ID,
-  CLOSURE_BSDF_HAIR_PRINCIPLED_ID,
-  CLOSURE_BSDF_HAIR_TRANSMISSION_ID,
-
-  /* Special cases */
-  CLOSURE_BSDF_TRANSPARENT_ID,
-
-  /* BSSRDF */
-  CLOSURE_BSSRDF_BURLEY_ID,
-  CLOSURE_BSSRDF_RANDOM_WALK_ID,
-  CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID,
-
-  /* Other */
-  CLOSURE_HOLDOUT_ID,
-
-  /* Volume */
-  CLOSURE_VOLUME_ID,
-  CLOSURE_VOLUME_ABSORPTION_ID,
-  CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID,
-
-  CLOSURE_BSDF_PRINCIPLED_ID,
-
-  NBUILTIN_CLOSURES
-} ClosureType;
-
-/* watch this, being lazy with memory usage */
-#define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_DIFFUSE(type) \
-  (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_TRANSLUCENT_ID)
-#define CLOSURE_IS_BSDF_GLOSSY(type) \
-  ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \
-   (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
-#define CLOSURE_IS_BSDF_TRANSMISSION(type) \
-  (type >= CLOSURE_BSDF_REFRACTION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
-#define CLOSURE_IS_BSDF_SINGULAR(type) \
-  (type == CLOSURE_BSDF_REFLECTION_ID || type == CLOSURE_BSDF_REFRACTION_ID || \
-   type == CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_TRANSPARENT(type) (type == CLOSURE_BSDF_TRANSPARENT_ID)
-#define CLOSURE_IS_BSDF_MULTISCATTER(type) \
-  (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID || \
-   type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
-#define CLOSURE_IS_BSDF_MICROFACET(type) \
-  ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID) || \
-   (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && \
-    type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) || \
-   (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
-#define CLOSURE_IS_BSDF_MICROFACET_FRESNEL(type) \
-  (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID || \
-   type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID || \
-   type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || \
-   type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID)
-#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID)
-#define CLOSURE_IS_BSSRDF(type) \
-  (type >= CLOSURE_BSSRDF_BURLEY_ID && type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID)
-#define CLOSURE_IS_VOLUME(type) \
-  (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
-#define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
-#define CLOSURE_IS_VOLUME_ABSORPTION(type) (type == CLOSURE_VOLUME_ABSORPTION_ID)
-#define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)
-#define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
-#define CLOSURE_IS_GLASS(type) \
-  (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
-#define CLOSURE_IS_PRINCIPLED(type) (type == CLOSURE_BSDF_PRINCIPLED_ID)
-
-#define CLOSURE_WEIGHT_CUTOFF 1e-5f
-
-CCL_NAMESPACE_END
-
-#endif /*  __SVM_TYPES_H__ */
diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h
deleted file mode 100644
index cc72961d0f6..00000000000
--- a/intern/cycles/kernel/svm/svm_value.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Value Nodes */
-
-ccl_device void svm_node_value_f(KernelGlobals kg,
-                                 ccl_private ShaderData *sd,
-                                 ccl_private float *stack,
-                                 uint ivalue,
-                                 uint out_offset)
-{
-  stack_store_float(stack, out_offset, __uint_as_float(ivalue));
-}
-
-ccl_device int svm_node_value_v(KernelGlobals kg,
-                                ccl_private ShaderData *sd,
-                                ccl_private float *stack,
-                                uint out_offset,
-                                int offset)
-{
-  /* read extra data */
-  uint4 node1 = read_node(kg, &offset);
-  float3 p = make_float3(
-      __uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
-
-  stack_store_float3(stack, out_offset, p);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vector_rotate.h b/intern/cycles/kernel/svm/svm_vector_rotate.h
deleted file mode 100644
index c20f9b2556f..00000000000
--- a/intern/cycles/kernel/svm/svm_vector_rotate.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Vector Rotate */
-
-ccl_device_noinline void svm_node_vector_rotate(ccl_private ShaderData *sd,
-                                                ccl_private float *stack,
-                                                uint input_stack_offsets,
-                                                uint axis_stack_offsets,
-                                                uint result_stack_offset)
-{
-  uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset,
-      angle_stack_offset, invert;
-
-  svm_unpack_node_uchar4(
-      input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset, &invert);
-  svm_unpack_node_uchar3(
-      axis_stack_offsets, &center_stack_offset, &axis_stack_offset, &angle_stack_offset);
-
-  if (stack_valid(result_stack_offset)) {
-
-    float3 vector = stack_load_float3(stack, vector_stack_offset);
-    float3 center = stack_load_float3(stack, center_stack_offset);
-    float3 result = make_float3(0.0f, 0.0f, 0.0f);
-
-    if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) {
-      float3 rotation = stack_load_float3(stack, rotation_stack_offset);  // Default XYZ.
-      Transform rotationTransform = euler_to_transform(rotation);
-      if (invert) {
-        result = transform_direction_transposed(&rotationTransform, vector - center) + center;
-      }
-      else {
-        result = transform_direction(&rotationTransform, vector - center) + center;
-      }
-    }
-    else {
-      float3 axis;
-      float axis_length;
-      switch (type) {
-        case NODE_VECTOR_ROTATE_TYPE_AXIS_X:
-          axis = make_float3(1.0f, 0.0f, 0.0f);
-          axis_length = 1.0f;
-          break;
-        case NODE_VECTOR_ROTATE_TYPE_AXIS_Y:
-          axis = make_float3(0.0f, 1.0f, 0.0f);
-          axis_length = 1.0f;
-          break;
-        case NODE_VECTOR_ROTATE_TYPE_AXIS_Z:
-          axis = make_float3(0.0f, 0.0f, 1.0f);
-          axis_length = 1.0f;
-          break;
-        default:
-          axis = stack_load_float3(stack, axis_stack_offset);
-          axis_length = len(axis);
-          break;
-      }
-      float angle = stack_load_float(stack, angle_stack_offset);
-      angle = invert ? -angle : angle;
-      result = (axis_length != 0.0f) ?
-                   rotate_around_axis(vector - center, axis / axis_length, angle) + center :
-                   vector;
-    }
-
-    stack_store_float3(stack, result_stack_offset, result);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
deleted file mode 100644
index 4e0d36647da..00000000000
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Vector Transform */
-
-ccl_device_noinline void svm_node_vector_transform(KernelGlobals kg,
-                                                   ccl_private ShaderData *sd,
-                                                   ccl_private float *stack,
-                                                   uint4 node)
-{
-  uint itype, ifrom, ito;
-  uint vector_in, vector_out;
-
-  svm_unpack_node_uchar3(node.y, &itype, &ifrom, &ito);
-  svm_unpack_node_uchar2(node.z, &vector_in, &vector_out);
-
-  float3 in = stack_load_float3(stack, vector_in);
-
-  NodeVectorTransformType type = (NodeVectorTransformType)itype;
-  NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom;
-  NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
-
-  Transform tfm;
-  bool is_object = (sd->object != OBJECT_NONE);
-  bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR ||
-                       type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
-
-  /* From world */
-  if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) {
-    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
-      tfm = kernel_data.cam.worldtocamera;
-      if (is_direction)
-        in = transform_direction(&tfm, in);
-      else
-        in = transform_point(&tfm, in);
-    }
-    else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
-      if (is_direction)
-        object_inverse_dir_transform(kg, sd, &in);
-      else
-        object_inverse_position_transform(kg, sd, &in);
-    }
-  }
-
-  /* From camera */
-  else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
-    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
-        to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
-      tfm = kernel_data.cam.cameratoworld;
-      if (is_direction)
-        in = transform_direction(&tfm, in);
-      else
-        in = transform_point(&tfm, in);
-    }
-    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
-      if (is_direction)
-        object_inverse_dir_transform(kg, sd, &in);
-      else
-        object_inverse_position_transform(kg, sd, &in);
-    }
-  }
-
-  /* From object */
-  else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
-    if ((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
-         to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) &&
-        is_object) {
-      if (is_direction)
-        object_dir_transform(kg, sd, &in);
-      else
-        object_position_transform(kg, sd, &in);
-    }
-    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
-      tfm = kernel_data.cam.worldtocamera;
-      if (is_direction)
-        in = transform_direction(&tfm, in);
-      else
-        in = transform_point(&tfm, in);
-    }
-  }
-
-  /* Normalize Normal */
-  if (type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
-    in = normalize(in);
-
-  /* Output */
-  if (stack_valid(vector_out)) {
-    stack_store_float3(stack, vector_out, in);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_vertex_color.h b/intern/cycles/kernel/svm/svm_vertex_color.h
deleted file mode 100644
index a5fa15ee085..00000000000
--- a/intern/cycles/kernel/svm/svm_vertex_color.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_vertex_color(KernelGlobals kg,
-                                               ccl_private ShaderData *sd,
-                                               ccl_private float *stack,
-                                               uint layer_id,
-                                               uint color_offset,
-                                               uint alpha_offset)
-{
-  AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
-  if (descriptor.offset != ATTR_STD_NOT_FOUND) {
-    float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, NULL);
-    stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
-    stack_store_float(stack, alpha_offset, vertex_color.w);
-  }
-  else {
-    stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
-    stack_store_float(stack, alpha_offset, 0.0f);
-  }
-}
-
-ccl_device_noinline void svm_node_vertex_color_bump_dx(KernelGlobals kg,
-                                                       ccl_private ShaderData *sd,
-                                                       ccl_private float *stack,
-                                                       uint layer_id,
-                                                       uint color_offset,
-                                                       uint alpha_offset)
-{
-  AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
-  if (descriptor.offset != ATTR_STD_NOT_FOUND) {
-    float4 dx;
-    float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, &dx, NULL);
-    vertex_color += dx;
-    stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
-    stack_store_float(stack, alpha_offset, vertex_color.w);
-  }
-  else {
-    stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
-    stack_store_float(stack, alpha_offset, 0.0f);
-  }
-}
-
-ccl_device_noinline void svm_node_vertex_color_bump_dy(KernelGlobals kg,
-                                                       ccl_private ShaderData *sd,
-                                                       ccl_private float *stack,
-                                                       uint layer_id,
-                                                       uint color_offset,
-                                                       uint alpha_offset)
-{
-  AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
-  if (descriptor.offset != ATTR_STD_NOT_FOUND) {
-    float4 dy;
-    float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, &dy);
-    vertex_color += dy;
-    stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
-    stack_store_float(stack, alpha_offset, vertex_color.w);
-  }
-  else {
-    stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
-    stack_store_float(stack, alpha_offset, 0.0f);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
deleted file mode 100644
index b8067520770..00000000000
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ /dev/null
@@ -1,1162 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/*
- * Original code is under the MIT License, Copyright (c) 2013 Inigo Quilez.
- *
- * Smooth Voronoi:
- *
- * - https://wiki.blender.org/wiki/User:OmarSquircleArt/GSoC2019/Documentation/Smooth_Voronoi
- *
- * Distance To Edge based on:
- *
- * - https://www.iquilezles.org/www/articles/voronoilines/voronoilines.htm
- * - https://www.shadertoy.com/view/ldl3W8
- *
- * With optimization to change -2..2 scan window to -1..1 for better performance,
- * as explained in https://www.shadertoy.com/view/llG3zy.
- */
-
-/* **** 1D Voronoi **** */
-
-ccl_device float voronoi_distance_1d(float a,
-                                     float b,
-                                     NodeVoronoiDistanceMetric metric,
-                                     float exponent)
-{
-  return fabsf(b - a);
-}
-
-ccl_device void voronoi_f1_1d(float w,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float *outW)
-{
-  float cellPosition = floorf(w);
-  float localPosition = w - cellPosition;
-
-  float minDistance = 8.0f;
-  float targetOffset = 0.0f;
-  float targetPosition = 0.0f;
-  for (int i = -1; i <= 1; i++) {
-    float cellOffset = i;
-    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
-    float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
-    if (distanceToPoint < minDistance) {
-      targetOffset = cellOffset;
-      minDistance = distanceToPoint;
-      targetPosition = pointPosition;
-    }
-  }
-  *outDistance = minDistance;
-  *outColor = hash_float_to_float3(cellPosition + targetOffset);
-  *outW = targetPosition + cellPosition;
-}
-
-ccl_device void voronoi_smooth_f1_1d(float w,
-                                     float smoothness,
-                                     float exponent,
-                                     float randomness,
-                                     NodeVoronoiDistanceMetric metric,
-                                     ccl_private float *outDistance,
-                                     ccl_private float3 *outColor,
-                                     ccl_private float *outW)
-{
-  float cellPosition = floorf(w);
-  float localPosition = w - cellPosition;
-
-  float smoothDistance = 8.0f;
-  float smoothPosition = 0.0f;
-  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
-  for (int i = -2; i <= 2; i++) {
-    float cellOffset = i;
-    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
-    float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
-    float h = smoothstep(
-        0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
-    float correctionFactor = smoothness * h * (1.0f - h);
-    smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
-    correctionFactor /= 1.0f + 3.0f * smoothness;
-    float3 cellColor = hash_float_to_float3(cellPosition + cellOffset);
-    smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
-    smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
-  }
-  *outDistance = smoothDistance;
-  *outColor = smoothColor;
-  *outW = cellPosition + smoothPosition;
-}
-
-ccl_device void voronoi_f2_1d(float w,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float *outW)
-{
-  float cellPosition = floorf(w);
-  float localPosition = w - cellPosition;
-
-  float distanceF1 = 8.0f;
-  float distanceF2 = 8.0f;
-  float offsetF1 = 0.0f;
-  float positionF1 = 0.0f;
-  float offsetF2 = 0.0f;
-  float positionF2 = 0.0f;
-  for (int i = -1; i <= 1; i++) {
-    float cellOffset = i;
-    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
-    float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
-    if (distanceToPoint < distanceF1) {
-      distanceF2 = distanceF1;
-      distanceF1 = distanceToPoint;
-      offsetF2 = offsetF1;
-      offsetF1 = cellOffset;
-      positionF2 = positionF1;
-      positionF1 = pointPosition;
-    }
-    else if (distanceToPoint < distanceF2) {
-      distanceF2 = distanceToPoint;
-      offsetF2 = cellOffset;
-      positionF2 = pointPosition;
-    }
-  }
-  *outDistance = distanceF2;
-  *outColor = hash_float_to_float3(cellPosition + offsetF2);
-  *outW = positionF2 + cellPosition;
-}
-
-ccl_device void voronoi_distance_to_edge_1d(float w,
-                                            float randomness,
-                                            ccl_private float *outDistance)
-{
-  float cellPosition = floorf(w);
-  float localPosition = w - cellPosition;
-
-  float midPointPosition = hash_float_to_float(cellPosition) * randomness;
-  float leftPointPosition = -1.0f + hash_float_to_float(cellPosition - 1.0f) * randomness;
-  float rightPointPosition = 1.0f + hash_float_to_float(cellPosition + 1.0f) * randomness;
-  float distanceToMidLeft = fabsf((midPointPosition + leftPointPosition) / 2.0f - localPosition);
-  float distanceToMidRight = fabsf((midPointPosition + rightPointPosition) / 2.0f - localPosition);
-
-  *outDistance = min(distanceToMidLeft, distanceToMidRight);
-}
-
-ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, ccl_private float *outRadius)
-{
-  float cellPosition = floorf(w);
-  float localPosition = w - cellPosition;
-
-  float closestPoint = 0.0f;
-  float closestPointOffset = 0.0f;
-  float minDistance = 8.0f;
-  for (int i = -1; i <= 1; i++) {
-    float cellOffset = i;
-    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
-    float distanceToPoint = fabsf(pointPosition - localPosition);
-    if (distanceToPoint < minDistance) {
-      minDistance = distanceToPoint;
-      closestPoint = pointPosition;
-      closestPointOffset = cellOffset;
-    }
-  }
-
-  minDistance = 8.0f;
-  float closestPointToClosestPoint = 0.0f;
-  for (int i = -1; i <= 1; i++) {
-    if (i == 0) {
-      continue;
-    }
-    float cellOffset = i + closestPointOffset;
-    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
-    float distanceToPoint = fabsf(closestPoint - pointPosition);
-    if (distanceToPoint < minDistance) {
-      minDistance = distanceToPoint;
-      closestPointToClosestPoint = pointPosition;
-    }
-  }
-  *outRadius = fabsf(closestPointToClosestPoint - closestPoint) / 2.0f;
-}
-
-/* **** 2D Voronoi **** */
-
-ccl_device float voronoi_distance_2d(float2 a,
-                                     float2 b,
-                                     NodeVoronoiDistanceMetric metric,
-                                     float exponent)
-{
-  if (metric == NODE_VORONOI_EUCLIDEAN) {
-    return distance(a, b);
-  }
-  else if (metric == NODE_VORONOI_MANHATTAN) {
-    return fabsf(a.x - b.x) + fabsf(a.y - b.y);
-  }
-  else if (metric == NODE_VORONOI_CHEBYCHEV) {
-    return max(fabsf(a.x - b.x), fabsf(a.y - b.y));
-  }
-  else if (metric == NODE_VORONOI_MINKOWSKI) {
-    return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent),
-                1.0f / exponent);
-  }
-  else {
-    return 0.0f;
-  }
-}
-
-ccl_device void voronoi_f1_2d(float2 coord,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float2 *outPosition)
-{
-  float2 cellPosition = floor(coord);
-  float2 localPosition = coord - cellPosition;
-
-  float minDistance = 8.0f;
-  float2 targetOffset = make_float2(0.0f, 0.0f);
-  float2 targetPosition = make_float2(0.0f, 0.0f);
-  for (int j = -1; j <= 1; j++) {
-    for (int i = -1; i <= 1; i++) {
-      float2 cellOffset = make_float2(i, j);
-      float2 pointPosition = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
-      float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
-      if (distanceToPoint < minDistance) {
-        targetOffset = cellOffset;
-        minDistance = distanceToPoint;
-        targetPosition = pointPosition;
-      }
-    }
-  }
-  *outDistance = minDistance;
-  *outColor = hash_float2_to_float3(cellPosition + targetOffset);
-  *outPosition = targetPosition + cellPosition;
-}
-
-ccl_device void voronoi_smooth_f1_2d(float2 coord,
-                                     float smoothness,
-                                     float exponent,
-                                     float randomness,
-                                     NodeVoronoiDistanceMetric metric,
-                                     ccl_private float *outDistance,
-                                     ccl_private float3 *outColor,
-                                     ccl_private float2 *outPosition)
-{
-  float2 cellPosition = floor(coord);
-  float2 localPosition = coord - cellPosition;
-
-  float smoothDistance = 8.0f;
-  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
-  float2 smoothPosition = make_float2(0.0f, 0.0f);
-  for (int j = -2; j <= 2; j++) {
-    for (int i = -2; i <= 2; i++) {
-      float2 cellOffset = make_float2(i, j);
-      float2 pointPosition = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
-      float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
-      float h = smoothstep(
-          0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
-      float correctionFactor = smoothness * h * (1.0f - h);
-      smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
-      correctionFactor /= 1.0f + 3.0f * smoothness;
-      float3 cellColor = hash_float2_to_float3(cellPosition + cellOffset);
-      smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
-      smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
-    }
-  }
-  *outDistance = smoothDistance;
-  *outColor = smoothColor;
-  *outPosition = cellPosition + smoothPosition;
-}
-
-ccl_device void voronoi_f2_2d(float2 coord,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float2 *outPosition)
-{
-  float2 cellPosition = floor(coord);
-  float2 localPosition = coord - cellPosition;
-
-  float distanceF1 = 8.0f;
-  float distanceF2 = 8.0f;
-  float2 offsetF1 = make_float2(0.0f, 0.0f);
-  float2 positionF1 = make_float2(0.0f, 0.0f);
-  float2 offsetF2 = make_float2(0.0f, 0.0f);
-  float2 positionF2 = make_float2(0.0f, 0.0f);
-  for (int j = -1; j <= 1; j++) {
-    for (int i = -1; i <= 1; i++) {
-      float2 cellOffset = make_float2(i, j);
-      float2 pointPosition = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
-      float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
-      if (distanceToPoint < distanceF1) {
-        distanceF2 = distanceF1;
-        distanceF1 = distanceToPoint;
-        offsetF2 = offsetF1;
-        offsetF1 = cellOffset;
-        positionF2 = positionF1;
-        positionF1 = pointPosition;
-      }
-      else if (distanceToPoint < distanceF2) {
-        distanceF2 = distanceToPoint;
-        offsetF2 = cellOffset;
-        positionF2 = pointPosition;
-      }
-    }
-  }
-  *outDistance = distanceF2;
-  *outColor = hash_float2_to_float3(cellPosition + offsetF2);
-  *outPosition = positionF2 + cellPosition;
-}
-
-ccl_device void voronoi_distance_to_edge_2d(float2 coord,
-                                            float randomness,
-                                            ccl_private float *outDistance)
-{
-  float2 cellPosition = floor(coord);
-  float2 localPosition = coord - cellPosition;
-
-  float2 vectorToClosest = make_float2(0.0f, 0.0f);
-  float minDistance = 8.0f;
-  for (int j = -1; j <= 1; j++) {
-    for (int i = -1; i <= 1; i++) {
-      float2 cellOffset = make_float2(i, j);
-      float2 vectorToPoint = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness -
-                             localPosition;
-      float distanceToPoint = dot(vectorToPoint, vectorToPoint);
-      if (distanceToPoint < minDistance) {
-        minDistance = distanceToPoint;
-        vectorToClosest = vectorToPoint;
-      }
-    }
-  }
-
-  minDistance = 8.0f;
-  for (int j = -1; j <= 1; j++) {
-    for (int i = -1; i <= 1; i++) {
-      float2 cellOffset = make_float2(i, j);
-      float2 vectorToPoint = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness -
-                             localPosition;
-      float2 perpendicularToEdge = vectorToPoint - vectorToClosest;
-      if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
-        float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
-                                   normalize(perpendicularToEdge));
-        minDistance = min(minDistance, distanceToEdge);
-      }
-    }
-  }
-  *outDistance = minDistance;
-}
-
-ccl_device void voronoi_n_sphere_radius_2d(float2 coord,
-                                           float randomness,
-                                           ccl_private float *outRadius)
-{
-  float2 cellPosition = floor(coord);
-  float2 localPosition = coord - cellPosition;
-
-  float2 closestPoint = make_float2(0.0f, 0.0f);
-  float2 closestPointOffset = make_float2(0.0f, 0.0f);
-  float minDistance = 8.0f;
-  for (int j = -1; j <= 1; j++) {
-    for (int i = -1; i <= 1; i++) {
-      float2 cellOffset = make_float2(i, j);
-      float2 pointPosition = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
-      float distanceToPoint = distance(pointPosition, localPosition);
-      if (distanceToPoint < minDistance) {
-        minDistance = distanceToPoint;
-        closestPoint = pointPosition;
-        closestPointOffset = cellOffset;
-      }
-    }
-  }
-
-  minDistance = 8.0f;
-  float2 closestPointToClosestPoint = make_float2(0.0f, 0.0f);
-  for (int j = -1; j <= 1; j++) {
-    for (int i = -1; i <= 1; i++) {
-      if (i == 0 && j == 0) {
-        continue;
-      }
-      float2 cellOffset = make_float2(i, j) + closestPointOffset;
-      float2 pointPosition = cellOffset +
-                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
-      float distanceToPoint = distance(closestPoint, pointPosition);
-      if (distanceToPoint < minDistance) {
-        minDistance = distanceToPoint;
-        closestPointToClosestPoint = pointPosition;
-      }
-    }
-  }
-  *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
-}
-
-/* **** 3D Voronoi **** */
-
-ccl_device float voronoi_distance_3d(float3 a,
-                                     float3 b,
-                                     NodeVoronoiDistanceMetric metric,
-                                     float exponent)
-{
-  if (metric == NODE_VORONOI_EUCLIDEAN) {
-    return distance(a, b);
-  }
-  else if (metric == NODE_VORONOI_MANHATTAN) {
-    return fabsf(a.x - b.x) + fabsf(a.y - b.y) + fabsf(a.z - b.z);
-  }
-  else if (metric == NODE_VORONOI_CHEBYCHEV) {
-    return max(fabsf(a.x - b.x), max(fabsf(a.y - b.y), fabsf(a.z - b.z)));
-  }
-  else if (metric == NODE_VORONOI_MINKOWSKI) {
-    return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent) +
-                    powf(fabsf(a.z - b.z), exponent),
-                1.0f / exponent);
-  }
-  else {
-    return 0.0f;
-  }
-}
-
-ccl_device void voronoi_f1_3d(float3 coord,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float3 *outPosition)
-{
-  float3 cellPosition = floor(coord);
-  float3 localPosition = coord - cellPosition;
-
-  float minDistance = 8.0f;
-  float3 targetOffset = make_float3(0.0f, 0.0f, 0.0f);
-  float3 targetPosition = make_float3(0.0f, 0.0f, 0.0f);
-  for (int k = -1; k <= 1; k++) {
-    for (int j = -1; j <= 1; j++) {
-      for (int i = -1; i <= 1; i++) {
-        float3 cellOffset = make_float3(i, j, k);
-        float3 pointPosition = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
-        float distanceToPoint = voronoi_distance_3d(
-            pointPosition, localPosition, metric, exponent);
-        if (distanceToPoint < minDistance) {
-          targetOffset = cellOffset;
-          minDistance = distanceToPoint;
-          targetPosition = pointPosition;
-        }
-      }
-    }
-  }
-  *outDistance = minDistance;
-  *outColor = hash_float3_to_float3(cellPosition + targetOffset);
-  *outPosition = targetPosition + cellPosition;
-}
-
-ccl_device void voronoi_smooth_f1_3d(float3 coord,
-                                     float smoothness,
-                                     float exponent,
-                                     float randomness,
-                                     NodeVoronoiDistanceMetric metric,
-                                     ccl_private float *outDistance,
-                                     ccl_private float3 *outColor,
-                                     ccl_private float3 *outPosition)
-{
-  float3 cellPosition = floor(coord);
-  float3 localPosition = coord - cellPosition;
-
-  float smoothDistance = 8.0f;
-  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
-  float3 smoothPosition = make_float3(0.0f, 0.0f, 0.0f);
-  for (int k = -2; k <= 2; k++) {
-    for (int j = -2; j <= 2; j++) {
-      for (int i = -2; i <= 2; i++) {
-        float3 cellOffset = make_float3(i, j, k);
-        float3 pointPosition = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
-        float distanceToPoint = voronoi_distance_3d(
-            pointPosition, localPosition, metric, exponent);
-        float h = smoothstep(
-            0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
-        float correctionFactor = smoothness * h * (1.0f - h);
-        smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
-        correctionFactor /= 1.0f + 3.0f * smoothness;
-        float3 cellColor = hash_float3_to_float3(cellPosition + cellOffset);
-        smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
-        smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
-      }
-    }
-  }
-  *outDistance = smoothDistance;
-  *outColor = smoothColor;
-  *outPosition = cellPosition + smoothPosition;
-}
-
-ccl_device void voronoi_f2_3d(float3 coord,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float3 *outPosition)
-{
-  float3 cellPosition = floor(coord);
-  float3 localPosition = coord - cellPosition;
-
-  float distanceF1 = 8.0f;
-  float distanceF2 = 8.0f;
-  float3 offsetF1 = make_float3(0.0f, 0.0f, 0.0f);
-  float3 positionF1 = make_float3(0.0f, 0.0f, 0.0f);
-  float3 offsetF2 = make_float3(0.0f, 0.0f, 0.0f);
-  float3 positionF2 = make_float3(0.0f, 0.0f, 0.0f);
-  for (int k = -1; k <= 1; k++) {
-    for (int j = -1; j <= 1; j++) {
-      for (int i = -1; i <= 1; i++) {
-        float3 cellOffset = make_float3(i, j, k);
-        float3 pointPosition = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
-        float distanceToPoint = voronoi_distance_3d(
-            pointPosition, localPosition, metric, exponent);
-        if (distanceToPoint < distanceF1) {
-          distanceF2 = distanceF1;
-          distanceF1 = distanceToPoint;
-          offsetF2 = offsetF1;
-          offsetF1 = cellOffset;
-          positionF2 = positionF1;
-          positionF1 = pointPosition;
-        }
-        else if (distanceToPoint < distanceF2) {
-          distanceF2 = distanceToPoint;
-          offsetF2 = cellOffset;
-          positionF2 = pointPosition;
-        }
-      }
-    }
-  }
-  *outDistance = distanceF2;
-  *outColor = hash_float3_to_float3(cellPosition + offsetF2);
-  *outPosition = positionF2 + cellPosition;
-}
-
-ccl_device void voronoi_distance_to_edge_3d(float3 coord,
-                                            float randomness,
-                                            ccl_private float *outDistance)
-{
-  float3 cellPosition = floor(coord);
-  float3 localPosition = coord - cellPosition;
-
-  float3 vectorToClosest = make_float3(0.0f, 0.0f, 0.0f);
-  float minDistance = 8.0f;
-  for (int k = -1; k <= 1; k++) {
-    for (int j = -1; j <= 1; j++) {
-      for (int i = -1; i <= 1; i++) {
-        float3 cellOffset = make_float3(i, j, k);
-        float3 vectorToPoint = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness -
-                               localPosition;
-        float distanceToPoint = dot(vectorToPoint, vectorToPoint);
-        if (distanceToPoint < minDistance) {
-          minDistance = distanceToPoint;
-          vectorToClosest = vectorToPoint;
-        }
-      }
-    }
-  }
-
-  minDistance = 8.0f;
-  for (int k = -1; k <= 1; k++) {
-    for (int j = -1; j <= 1; j++) {
-      for (int i = -1; i <= 1; i++) {
-        float3 cellOffset = make_float3(i, j, k);
-        float3 vectorToPoint = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness -
-                               localPosition;
-        float3 perpendicularToEdge = vectorToPoint - vectorToClosest;
-        if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
-          float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
-                                     normalize(perpendicularToEdge));
-          minDistance = min(minDistance, distanceToEdge);
-        }
-      }
-    }
-  }
-  *outDistance = minDistance;
-}
-
-ccl_device void voronoi_n_sphere_radius_3d(float3 coord,
-                                           float randomness,
-                                           ccl_private float *outRadius)
-{
-  float3 cellPosition = floor(coord);
-  float3 localPosition = coord - cellPosition;
-
-  float3 closestPoint = make_float3(0.0f, 0.0f, 0.0f);
-  float3 closestPointOffset = make_float3(0.0f, 0.0f, 0.0f);
-  float minDistance = 8.0f;
-  for (int k = -1; k <= 1; k++) {
-    for (int j = -1; j <= 1; j++) {
-      for (int i = -1; i <= 1; i++) {
-        float3 cellOffset = make_float3(i, j, k);
-        float3 pointPosition = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
-        float distanceToPoint = distance(pointPosition, localPosition);
-        if (distanceToPoint < minDistance) {
-          minDistance = distanceToPoint;
-          closestPoint = pointPosition;
-          closestPointOffset = cellOffset;
-        }
-      }
-    }
-  }
-
-  minDistance = 8.0f;
-  float3 closestPointToClosestPoint = make_float3(0.0f, 0.0f, 0.0f);
-  for (int k = -1; k <= 1; k++) {
-    for (int j = -1; j <= 1; j++) {
-      for (int i = -1; i <= 1; i++) {
-        if (i == 0 && j == 0 && k == 0) {
-          continue;
-        }
-        float3 cellOffset = make_float3(i, j, k) + closestPointOffset;
-        float3 pointPosition = cellOffset +
-                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
-        float distanceToPoint = distance(closestPoint, pointPosition);
-        if (distanceToPoint < minDistance) {
-          minDistance = distanceToPoint;
-          closestPointToClosestPoint = pointPosition;
-        }
-      }
-    }
-  }
-  *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
-}
-
-/* **** 4D Voronoi **** */
-
-ccl_device float voronoi_distance_4d(float4 a,
-                                     float4 b,
-                                     NodeVoronoiDistanceMetric metric,
-                                     float exponent)
-{
-  if (metric == NODE_VORONOI_EUCLIDEAN) {
-    return distance(a, b);
-  }
-  else if (metric == NODE_VORONOI_MANHATTAN) {
-    return fabsf(a.x - b.x) + fabsf(a.y - b.y) + fabsf(a.z - b.z) + fabsf(a.w - b.w);
-  }
-  else if (metric == NODE_VORONOI_CHEBYCHEV) {
-    return max(fabsf(a.x - b.x), max(fabsf(a.y - b.y), max(fabsf(a.z - b.z), fabsf(a.w - b.w))));
-  }
-  else if (metric == NODE_VORONOI_MINKOWSKI) {
-    return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent) +
-                    powf(fabsf(a.z - b.z), exponent) + powf(fabsf(a.w - b.w), exponent),
-                1.0f / exponent);
-  }
-  else {
-    return 0.0f;
-  }
-}
-
-ccl_device void voronoi_f1_4d(float4 coord,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float4 *outPosition)
-{
-  float4 cellPosition = floor(coord);
-  float4 localPosition = coord - cellPosition;
-
-  float minDistance = 8.0f;
-  float4 targetOffset = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  for (int u = -1; u <= 1; u++) {
-    for (int k = -1; k <= 1; k++) {
-      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
-      {
-        for (int i = -1; i <= 1; i++) {
-          float4 cellOffset = make_float4(i, j, k, u);
-          float4 pointPosition = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
-          float distanceToPoint = voronoi_distance_4d(
-              pointPosition, localPosition, metric, exponent);
-          if (distanceToPoint < minDistance) {
-            targetOffset = cellOffset;
-            minDistance = distanceToPoint;
-            targetPosition = pointPosition;
-          }
-        }
-      }
-    }
-  }
-  *outDistance = minDistance;
-  *outColor = hash_float4_to_float3(cellPosition + targetOffset);
-  *outPosition = targetPosition + cellPosition;
-}
-
-ccl_device void voronoi_smooth_f1_4d(float4 coord,
-                                     float smoothness,
-                                     float exponent,
-                                     float randomness,
-                                     NodeVoronoiDistanceMetric metric,
-                                     ccl_private float *outDistance,
-                                     ccl_private float3 *outColor,
-                                     ccl_private float4 *outPosition)
-{
-  float4 cellPosition = floor(coord);
-  float4 localPosition = coord - cellPosition;
-
-  float smoothDistance = 8.0f;
-  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
-  float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  for (int u = -2; u <= 2; u++) {
-    for (int k = -2; k <= 2; k++) {
-      ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
-      {
-        for (int i = -2; i <= 2; i++) {
-          float4 cellOffset = make_float4(i, j, k, u);
-          float4 pointPosition = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
-          float distanceToPoint = voronoi_distance_4d(
-              pointPosition, localPosition, metric, exponent);
-          float h = smoothstep(
-              0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
-          float correctionFactor = smoothness * h * (1.0f - h);
-          smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
-          correctionFactor /= 1.0f + 3.0f * smoothness;
-          float3 cellColor = hash_float4_to_float3(cellPosition + cellOffset);
-          smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
-          smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
-        }
-      }
-    }
-  }
-  *outDistance = smoothDistance;
-  *outColor = smoothColor;
-  *outPosition = cellPosition + smoothPosition;
-}
-
-ccl_device void voronoi_f2_4d(float4 coord,
-                              float exponent,
-                              float randomness,
-                              NodeVoronoiDistanceMetric metric,
-                              ccl_private float *outDistance,
-                              ccl_private float3 *outColor,
-                              ccl_private float4 *outPosition)
-{
-  float4 cellPosition = floor(coord);
-  float4 localPosition = coord - cellPosition;
-
-  float distanceF1 = 8.0f;
-  float distanceF2 = 8.0f;
-  float4 offsetF1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float4 positionF1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float4 offsetF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  for (int u = -1; u <= 1; u++) {
-    for (int k = -1; k <= 1; k++) {
-      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
-      {
-        for (int i = -1; i <= 1; i++) {
-          float4 cellOffset = make_float4(i, j, k, u);
-          float4 pointPosition = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
-          float distanceToPoint = voronoi_distance_4d(
-              pointPosition, localPosition, metric, exponent);
-          if (distanceToPoint < distanceF1) {
-            distanceF2 = distanceF1;
-            distanceF1 = distanceToPoint;
-            offsetF2 = offsetF1;
-            offsetF1 = cellOffset;
-            positionF2 = positionF1;
-            positionF1 = pointPosition;
-          }
-          else if (distanceToPoint < distanceF2) {
-            distanceF2 = distanceToPoint;
-            offsetF2 = cellOffset;
-            positionF2 = pointPosition;
-          }
-        }
-      }
-    }
-  }
-  *outDistance = distanceF2;
-  *outColor = hash_float4_to_float3(cellPosition + offsetF2);
-  *outPosition = positionF2 + cellPosition;
-}
-
-ccl_device void voronoi_distance_to_edge_4d(float4 coord,
-                                            float randomness,
-                                            ccl_private float *outDistance)
-{
-  float4 cellPosition = floor(coord);
-  float4 localPosition = coord - cellPosition;
-
-  float4 vectorToClosest = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float minDistance = 8.0f;
-  for (int u = -1; u <= 1; u++) {
-    for (int k = -1; k <= 1; k++) {
-      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
-      {
-        for (int i = -1; i <= 1; i++) {
-          float4 cellOffset = make_float4(i, j, k, u);
-          float4 vectorToPoint = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness -
-                                 localPosition;
-          float distanceToPoint = dot(vectorToPoint, vectorToPoint);
-          if (distanceToPoint < minDistance) {
-            minDistance = distanceToPoint;
-            vectorToClosest = vectorToPoint;
-          }
-        }
-      }
-    }
-  }
-
-  minDistance = 8.0f;
-  for (int u = -1; u <= 1; u++) {
-    for (int k = -1; k <= 1; k++) {
-      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
-      {
-        for (int i = -1; i <= 1; i++) {
-          float4 cellOffset = make_float4(i, j, k, u);
-          float4 vectorToPoint = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness -
-                                 localPosition;
-          float4 perpendicularToEdge = vectorToPoint - vectorToClosest;
-          if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
-            float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
-                                       normalize(perpendicularToEdge));
-            minDistance = min(minDistance, distanceToEdge);
-          }
-        }
-      }
-    }
-  }
-  *outDistance = minDistance;
-}
-
-ccl_device void voronoi_n_sphere_radius_4d(float4 coord,
-                                           float randomness,
-                                           ccl_private float *outRadius)
-{
-  float4 cellPosition = floor(coord);
-  float4 localPosition = coord - cellPosition;
-
-  float4 closestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float4 closestPointOffset = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  float minDistance = 8.0f;
-  for (int u = -1; u <= 1; u++) {
-    for (int k = -1; k <= 1; k++) {
-      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
-      {
-        for (int i = -1; i <= 1; i++) {
-          float4 cellOffset = make_float4(i, j, k, u);
-          float4 pointPosition = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
-          float distanceToPoint = distance(pointPosition, localPosition);
-          if (distanceToPoint < minDistance) {
-            minDistance = distanceToPoint;
-            closestPoint = pointPosition;
-            closestPointOffset = cellOffset;
-          }
-        }
-      }
-    }
-  }
-
-  minDistance = 8.0f;
-  float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-  for (int u = -1; u <= 1; u++) {
-    for (int k = -1; k <= 1; k++) {
-      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
-      {
-        for (int i = -1; i <= 1; i++) {
-          if (i == 0 && j == 0 && k == 0 && u == 0) {
-            continue;
-          }
-          float4 cellOffset = make_float4(i, j, k, u) + closestPointOffset;
-          float4 pointPosition = cellOffset +
-                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
-          float distanceToPoint = distance(closestPoint, pointPosition);
-          if (distanceToPoint < minDistance) {
-            minDistance = distanceToPoint;
-            closestPointToClosestPoint = pointPosition;
-          }
-        }
-      }
-    }
-  }
-  *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
-}
-
-template<uint node_feature_mask>
-ccl_device_noinline int svm_node_tex_voronoi(KernelGlobals kg,
-                                             ccl_private ShaderData *sd,
-                                             ccl_private float *stack,
-                                             uint dimensions,
-                                             uint feature,
-                                             uint metric,
-                                             int offset)
-{
-  uint4 stack_offsets = read_node(kg, &offset);
-  uint4 defaults = read_node(kg, &offset);
-
-  uint coord_stack_offset, w_stack_offset, scale_stack_offset, smoothness_stack_offset;
-  uint exponent_stack_offset, randomness_stack_offset, distance_out_stack_offset,
-      color_out_stack_offset;
-  uint position_out_stack_offset, w_out_stack_offset, radius_out_stack_offset;
-
-  svm_unpack_node_uchar4(stack_offsets.x,
-                         &coord_stack_offset,
-                         &w_stack_offset,
-                         &scale_stack_offset,
-                         &smoothness_stack_offset);
-  svm_unpack_node_uchar4(stack_offsets.y,
-                         &exponent_stack_offset,
-                         &randomness_stack_offset,
-                         &distance_out_stack_offset,
-                         &color_out_stack_offset);
-  svm_unpack_node_uchar3(
-      stack_offsets.z, &position_out_stack_offset, &w_out_stack_offset, &radius_out_stack_offset);
-
-  float3 coord = stack_load_float3(stack, coord_stack_offset);
-  float w = stack_load_float_default(stack, w_stack_offset, stack_offsets.w);
-  float scale = stack_load_float_default(stack, scale_stack_offset, defaults.x);
-  float smoothness = stack_load_float_default(stack, smoothness_stack_offset, defaults.y);
-  float exponent = stack_load_float_default(stack, exponent_stack_offset, defaults.z);
-  float randomness = stack_load_float_default(stack, randomness_stack_offset, defaults.w);
-
-  NodeVoronoiFeature voronoi_feature = (NodeVoronoiFeature)feature;
-  NodeVoronoiDistanceMetric voronoi_metric = (NodeVoronoiDistanceMetric)metric;
-
-  float distance_out = 0.0f, w_out = 0.0f, radius_out = 0.0f;
-  float3 color_out = make_float3(0.0f, 0.0f, 0.0f);
-  float3 position_out = make_float3(0.0f, 0.0f, 0.0f);
-
-  randomness = clamp(randomness, 0.0f, 1.0f);
-  smoothness = clamp(smoothness / 2.0f, 0.0f, 0.5f);
-
-  w *= scale;
-  coord *= scale;
-
-  switch (dimensions) {
-    case 1: {
-      switch (voronoi_feature) {
-        case NODE_VORONOI_F1:
-          voronoi_f1_1d(
-              w, exponent, randomness, voronoi_metric, &distance_out, &color_out, &w_out);
-          break;
-        case NODE_VORONOI_SMOOTH_F1:
-          voronoi_smooth_f1_1d(w,
-                               smoothness,
-                               exponent,
-                               randomness,
-                               voronoi_metric,
-                               &distance_out,
-                               &color_out,
-                               &w_out);
-          break;
-        case NODE_VORONOI_F2:
-          voronoi_f2_1d(
-              w, exponent, randomness, voronoi_metric, &distance_out, &color_out, &w_out);
-          break;
-        case NODE_VORONOI_DISTANCE_TO_EDGE:
-          voronoi_distance_to_edge_1d(w, randomness, &distance_out);
-          break;
-        case NODE_VORONOI_N_SPHERE_RADIUS:
-          voronoi_n_sphere_radius_1d(w, randomness, &radius_out);
-          break;
-        default:
-          kernel_assert(0);
-      }
-      w_out = safe_divide(w_out, scale);
-      break;
-    }
-    case 2: {
-      float2 coord_2d = make_float2(coord.x, coord.y);
-      float2 position_out_2d = zero_float2();
-      switch (voronoi_feature) {
-        case NODE_VORONOI_F1:
-          voronoi_f1_2d(coord_2d,
-                        exponent,
-                        randomness,
-                        voronoi_metric,
-                        &distance_out,
-                        &color_out,
-                        &position_out_2d);
-          break;
-        case NODE_VORONOI_SMOOTH_F1:
-          IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
-          {
-            voronoi_smooth_f1_2d(coord_2d,
-                                 smoothness,
-                                 exponent,
-                                 randomness,
-                                 voronoi_metric,
-                                 &distance_out,
-                                 &color_out,
-                                 &position_out_2d);
-          }
-          break;
-        case NODE_VORONOI_F2:
-          voronoi_f2_2d(coord_2d,
-                        exponent,
-                        randomness,
-                        voronoi_metric,
-                        &distance_out,
-                        &color_out,
-                        &position_out_2d);
-          break;
-        case NODE_VORONOI_DISTANCE_TO_EDGE:
-          voronoi_distance_to_edge_2d(coord_2d, randomness, &distance_out);
-          break;
-        case NODE_VORONOI_N_SPHERE_RADIUS:
-          voronoi_n_sphere_radius_2d(coord_2d, randomness, &radius_out);
-          break;
-        default:
-          kernel_assert(0);
-      }
-      position_out_2d = safe_divide_float2_float(position_out_2d, scale);
-      position_out = make_float3(position_out_2d.x, position_out_2d.y, 0.0f);
-      break;
-    }
-    case 3: {
-      switch (voronoi_feature) {
-        case NODE_VORONOI_F1:
-          voronoi_f1_3d(coord,
-                        exponent,
-                        randomness,
-                        voronoi_metric,
-                        &distance_out,
-                        &color_out,
-                        &position_out);
-          break;
-        case NODE_VORONOI_SMOOTH_F1:
-          IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
-          {
-            voronoi_smooth_f1_3d(coord,
-                                 smoothness,
-                                 exponent,
-                                 randomness,
-                                 voronoi_metric,
-                                 &distance_out,
-                                 &color_out,
-                                 &position_out);
-          }
-          break;
-        case NODE_VORONOI_F2:
-          voronoi_f2_3d(coord,
-                        exponent,
-                        randomness,
-                        voronoi_metric,
-                        &distance_out,
-                        &color_out,
-                        &position_out);
-          break;
-        case NODE_VORONOI_DISTANCE_TO_EDGE:
-          voronoi_distance_to_edge_3d(coord, randomness, &distance_out);
-          break;
-        case NODE_VORONOI_N_SPHERE_RADIUS:
-          voronoi_n_sphere_radius_3d(coord, randomness, &radius_out);
-          break;
-        default:
-          kernel_assert(0);
-      }
-      position_out = safe_divide_float3_float(position_out, scale);
-      break;
-    }
-
-    case 4: {
-      IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
-      {
-        float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w);
-        float4 position_out_4d;
-        switch (voronoi_feature) {
-          case NODE_VORONOI_F1:
-            voronoi_f1_4d(coord_4d,
-                          exponent,
-                          randomness,
-                          voronoi_metric,
-                          &distance_out,
-                          &color_out,
-                          &position_out_4d);
-            break;
-          case NODE_VORONOI_SMOOTH_F1:
-            voronoi_smooth_f1_4d(coord_4d,
-                                 smoothness,
-                                 exponent,
-                                 randomness,
-                                 voronoi_metric,
-                                 &distance_out,
-                                 &color_out,
-                                 &position_out_4d);
-            break;
-          case NODE_VORONOI_F2:
-            voronoi_f2_4d(coord_4d,
-                          exponent,
-                          randomness,
-                          voronoi_metric,
-                          &distance_out,
-                          &color_out,
-                          &position_out_4d);
-            break;
-          case NODE_VORONOI_DISTANCE_TO_EDGE:
-            voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out);
-            break;
-          case NODE_VORONOI_N_SPHERE_RADIUS:
-            voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out);
-            break;
-          default:
-            kernel_assert(0);
-        }
-        position_out_4d = safe_divide_float4_float(position_out_4d, scale);
-        position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z);
-        w_out = position_out_4d.w;
-      }
-      break;
-    }
-    default:
-      kernel_assert(0);
-  }
-
-  if (stack_valid(distance_out_stack_offset))
-    stack_store_float(stack, distance_out_stack_offset, distance_out);
-  if (stack_valid(color_out_stack_offset))
-    stack_store_float3(stack, color_out_stack_offset, color_out);
-  if (stack_valid(position_out_stack_offset))
-    stack_store_float3(stack, position_out_stack_offset, position_out);
-  if (stack_valid(w_out_stack_offset))
-    stack_store_float(stack, w_out_stack_offset, w_out);
-  if (stack_valid(radius_out_stack_offset))
-    stack_store_float(stack, radius_out_stack_offset, radius_out);
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
deleted file mode 100644
index be4bb315145..00000000000
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* TODO(sergey): Think of making it more generic volume-type attribute
- * sampler.
- */
-ccl_device_noinline int svm_node_tex_voxel(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint co_offset, density_out_offset, color_out_offset, space;
-  svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
-#ifdef __VOLUME__
-  int id = node.y;
-  float3 co = stack_load_float3(stack, co_offset);
-  if (space == NODE_TEX_VOXEL_SPACE_OBJECT) {
-    co = volume_normalized_position(kg, sd, co);
-  }
-  else {
-    kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
-    Transform tfm;
-    tfm.x = read_node_float(kg, &offset);
-    tfm.y = read_node_float(kg, &offset);
-    tfm.z = read_node_float(kg, &offset);
-    co = transform_point(&tfm, co);
-  }
-
-  float4 r = kernel_tex_image_interp_3d(kg, id, co, INTERPOLATION_NONE);
-#else
-  float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#endif
-  if (stack_valid(density_out_offset))
-    stack_store_float(stack, density_out_offset, r.w);
-  if (stack_valid(color_out_offset))
-    stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
deleted file mode 100644
index d04b7aa3476..00000000000
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Wave */
-
-ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
-                                       NodeWaveBandsDirection bands_dir,
-                                       NodeWaveRingsDirection rings_dir,
-                                       NodeWaveProfile profile,
-                                       float3 p,
-                                       float distortion,
-                                       float detail,
-                                       float dscale,
-                                       float droughness,
-                                       float phase)
-{
-  /* Prevent precision issues on unit coordinates. */
-  p = (p + 0.000001f) * 0.999999f;
-
-  float n;
-
-  if (type == NODE_WAVE_BANDS) {
-    if (bands_dir == NODE_WAVE_BANDS_DIRECTION_X) {
-      n = p.x * 20.0f;
-    }
-    else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Y) {
-      n = p.y * 20.0f;
-    }
-    else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Z) {
-      n = p.z * 20.0f;
-    }
-    else { /* NODE_WAVE_BANDS_DIRECTION_DIAGONAL */
-      n = (p.x + p.y + p.z) * 10.0f;
-    }
-  }
-  else { /* NODE_WAVE_RINGS */
-    float3 rp = p;
-    if (rings_dir == NODE_WAVE_RINGS_DIRECTION_X) {
-      rp *= make_float3(0.0f, 1.0f, 1.0f);
-    }
-    else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Y) {
-      rp *= make_float3(1.0f, 0.0f, 1.0f);
-    }
-    else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Z) {
-      rp *= make_float3(1.0f, 1.0f, 0.0f);
-    }
-    /* else: NODE_WAVE_RINGS_DIRECTION_SPHERICAL */
-
-    n = len(rp) * 20.0f;
-  }
-
-  n += phase;
-
-  if (distortion != 0.0f)
-    n += distortion * (fractal_noise_3d(p * dscale, detail, droughness) * 2.0f - 1.0f);
-
-  if (profile == NODE_WAVE_PROFILE_SIN) {
-    return 0.5f + 0.5f * sinf(n - M_PI_2_F);
-  }
-  else if (profile == NODE_WAVE_PROFILE_SAW) {
-    n /= M_2PI_F;
-    return n - floorf(n);
-  }
-  else { /* NODE_WAVE_PROFILE_TRI */
-    n /= M_2PI_F;
-    return fabsf(n - floorf(n + 0.5f)) * 2.0f;
-  }
-}
-
-ccl_device_noinline int svm_node_tex_wave(
-    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
-{
-  uint4 node2 = read_node(kg, &offset);
-  uint4 node3 = read_node(kg, &offset);
-
-  /* RNA properties */
-  uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset;
-  /* Inputs, Outputs */
-  uint co_offset, scale_offset, distortion_offset, detail_offset, dscale_offset, droughness_offset,
-      phase_offset;
-  uint color_offset, fac_offset;
-
-  svm_unpack_node_uchar4(
-      node.y, &type_offset, &bands_dir_offset, &rings_dir_offset, &profile_offset);
-  svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
-  svm_unpack_node_uchar4(
-      node.w, &detail_offset, &dscale_offset, &droughness_offset, &phase_offset);
-  svm_unpack_node_uchar2(node2.x, &color_offset, &fac_offset);
-
-  float3 co = stack_load_float3(stack, co_offset);
-  float scale = stack_load_float_default(stack, scale_offset, node2.y);
-  float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
-  float detail = stack_load_float_default(stack, detail_offset, node2.w);
-  float dscale = stack_load_float_default(stack, dscale_offset, node3.x);
-  float droughness = stack_load_float_default(stack, droughness_offset, node3.y);
-  float phase = stack_load_float_default(stack, phase_offset, node3.z);
-
-  float f = svm_wave((NodeWaveType)type_offset,
-                     (NodeWaveBandsDirection)bands_dir_offset,
-                     (NodeWaveRingsDirection)rings_dir_offset,
-                     (NodeWaveProfile)profile_offset,
-                     co * scale,
-                     distortion,
-                     detail,
-                     dscale,
-                     droughness,
-                     phase);
-
-  if (stack_valid(fac_offset))
-    stack_store_float(stack, fac_offset, f);
-  if (stack_valid(color_offset))
-    stack_store_float3(stack, color_offset, make_float3(f, f, f));
-  return offset;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
deleted file mode 100644
index 4ef041f68d5..00000000000
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2013, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Wavelength to RGB */
-
-ccl_device_noinline void svm_node_wavelength(KernelGlobals kg,
-                                             ccl_private ShaderData *sd,
-                                             ccl_private float *stack,
-                                             uint wavelength,
-                                             uint color_out)
-{
-  // CIE colour matching functions xBar, yBar, and zBar for
-  //   wavelengths from 380 through 780 nanometers, every 5
-  //   nanometers.  For a wavelength lambda in this range:
-  //        cie_colour_match[(lambda - 380) / 5][0] = xBar
-  //        cie_colour_match[(lambda - 380) / 5][1] = yBar
-  //        cie_colour_match[(lambda - 380) / 5][2] = zBar
-  const float cie_colour_match[81][3] = {
-      {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f},
-      {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f},
-      {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f},
-      {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f},
-      {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f},
-      {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f},
-      {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f},
-      {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f},
-      {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f},
-      {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f},
-      {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f},
-      {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f},
-      {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f},
-      {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f},
-      {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f},
-      {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f},
-      {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f},
-      {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f},
-      {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f},
-      {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f},
-      {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f},
-      {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f},
-      {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f},
-      {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f},
-      {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f},
-      {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
-      {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
-
-  float lambda_nm = stack_load_float(stack, wavelength);
-  float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f);  // scaled 0..80
-  int i = float_to_int(ii);
-  float3 color;
-
-  if (i < 0 || i >= 80) {
-    color = make_float3(0.0f, 0.0f, 0.0f);
-  }
-  else {
-    ii -= i;
-    ccl_constant float *c = cie_colour_match[i];
-    color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
-  }
-
-  color = xyz_to_rgb(kg, color);
-  color *= 1.0f / 2.52f;  // Empirical scale from lg to make all comps <= 1
-
-  /* Clamp to zero if values are smaller */
-  color = max(color, make_float3(0.0f, 0.0f, 0.0f));
-
-  stack_store_float3(stack, color_out, color);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
deleted file mode 100644
index 6c2c3d6a683..00000000000
--- a/intern/cycles/kernel/svm/svm_white_noise.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_noinline void svm_node_tex_white_noise(KernelGlobals kg,
-                                                  ccl_private ShaderData *sd,
-                                                  ccl_private float *stack,
-                                                  uint dimensions,
-                                                  uint inputs_stack_offsets,
-                                                  uint ouptuts_stack_offsets)
-{
-  uint vector_stack_offset, w_stack_offset, value_stack_offset, color_stack_offset;
-  svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset);
-  svm_unpack_node_uchar2(ouptuts_stack_offsets, &value_stack_offset, &color_stack_offset);
-
-  float3 vector = stack_load_float3(stack, vector_stack_offset);
-  float w = stack_load_float(stack, w_stack_offset);
-
-  if (stack_valid(color_stack_offset)) {
-    float3 color;
-    switch (dimensions) {
-      case 1:
-        color = hash_float_to_float3(w);
-        break;
-      case 2:
-        color = hash_float2_to_float3(make_float2(vector.x, vector.y));
-        break;
-      case 3:
-        color = hash_float3_to_float3(vector);
-        break;
-      case 4:
-        color = hash_float4_to_float3(make_float4(vector.x, vector.y, vector.z, w));
-        break;
-      default:
-        color = make_float3(1.0f, 0.0f, 1.0f);
-        kernel_assert(0);
-        break;
-    }
-    stack_store_float3(stack, color_stack_offset, color);
-  }
-
-  if (stack_valid(value_stack_offset)) {
-    float value;
-    switch (dimensions) {
-      case 1:
-        value = hash_float_to_float(w);
-        break;
-      case 2:
-        value = hash_float2_to_float(make_float2(vector.x, vector.y));
-        break;
-      case 3:
-        value = hash_float3_to_float(vector);
-        break;
-      case 4:
-        value = hash_float4_to_float(make_float4(vector.x, vector.y, vector.z, w));
-        break;
-      default:
-        value = 0.0f;
-        kernel_assert(0);
-        break;
-    }
-    stack_store_float(stack, value_stack_offset, value);
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
deleted file mode 100644
index d75976d23e1..00000000000
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Adapted from Open Shading Language with this license:
- *
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2013, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of Sony Pictures Imageworks nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Wireframe Node */
-
-ccl_device_inline float wireframe(KernelGlobals kg,
-                                  ccl_private ShaderData *sd,
-                                  float size,
-                                  int pixel_size,
-                                  ccl_private float3 *P)
-{
-#ifdef __HAIR__
-  if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
-#else
-  if (sd->prim != PRIM_NONE)
-#endif
-  {
-    float3 Co[3];
-    float pixelwidth = 1.0f;
-
-    /* Triangles */
-    int np = 3;
-
-    if (sd->type & PRIMITIVE_TRIANGLE)
-      triangle_vertices(kg, sd->prim, Co);
-    else
-      motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
-
-    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-      object_position_transform(kg, sd, &Co[0]);
-      object_position_transform(kg, sd, &Co[1]);
-      object_position_transform(kg, sd, &Co[2]);
-    }
-
-    if (pixel_size) {
-      // Project the derivatives of P to the viewing plane defined
-      // by I so we have a measure of how big is a pixel at this point
-      float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
-      float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
-      // Take the average of both axis' length
-      pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
-    }
-
-    // Use half the width as the neighbor face will render the
-    // other half. And take the square for fast comparison
-    pixelwidth *= 0.5f * size;
-    pixelwidth *= pixelwidth;
-    for (int i = 0; i < np; i++) {
-      int i2 = i ? i - 1 : np - 1;
-      float3 dir = *P - Co[i];
-      float3 edge = Co[i] - Co[i2];
-      float3 crs = cross(edge, dir);
-      // At this point dot(crs, crs) / dot(edge, edge) is
-      // the square of area / length(edge) == square of the
-      // distance to the edge.
-      if (dot(crs, crs) < (dot(edge, edge) * pixelwidth))
-        return 1.0f;
-    }
-  }
-  return 0.0f;
-}
-
-ccl_device_noinline void svm_node_wireframe(KernelGlobals kg,
-                                            ccl_private ShaderData *sd,
-                                            ccl_private float *stack,
-                                            uint4 node)
-{
-  uint in_size = node.y;
-  uint out_fac = node.z;
-  uint use_pixel_size, bump_offset;
-  svm_unpack_node_uchar2(node.w, &use_pixel_size, &bump_offset);
-
-  /* Input Data */
-  float size = stack_load_float(stack, in_size);
-  int pixel_size = (int)use_pixel_size;
-
-  /* Calculate wireframe */
-  float f = wireframe(kg, sd, size, pixel_size, &sd->P);
-
-  /* TODO(sergey): Think of faster way to calculate derivatives. */
-  if (bump_offset == NODE_BUMP_OFFSET_DX) {
-    float3 Px = sd->P - sd->dP.dx;
-    f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
-  }
-  else if (bump_offset == NODE_BUMP_OFFSET_DY) {
-    float3 Py = sd->P - sd->dP.dy;
-    f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
-  }
-
-  if (stack_valid(out_fac))
-    stack_store_float(stack, out_fac, f);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h
new file mode 100644
index 00000000000..5e0debc968a
--- /dev/null
+++ b/intern/cycles/kernel/svm/tex_coord.h
@@ -0,0 +1,426 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "kernel/camera/camera.h"
+#include "kernel/geom/geom.h"
+#include "kernel/sample/mapping.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Texture Coordinate Node */
+
+ccl_device_noinline int svm_node_tex_coord(KernelGlobals kg,
+                                           ccl_private ShaderData *sd,
+                                           uint32_t path_flag,
+                                           ccl_private float *stack,
+                                           uint4 node,
+                                           int offset)
+{
+  float3 data;
+  uint type = node.y;
+  uint out_offset = node.z;
+
+  switch (type) {
+    case NODE_TEXCO_OBJECT: {
+      data = sd->P;
+      if (node.w == 0) {
+        if (sd->object != OBJECT_NONE) {
+          object_inverse_position_transform(kg, sd, &data);
+        }
+      }
+      else {
+        Transform tfm;
+        tfm.x = read_node_float(kg, &offset);
+        tfm.y = read_node_float(kg, &offset);
+        tfm.z = read_node_float(kg, &offset);
+        data = transform_point(&tfm, data);
+      }
+      break;
+    }
+    case NODE_TEXCO_NORMAL: {
+      data = sd->N;
+      object_inverse_normal_transform(kg, sd, &data);
+      break;
+    }
+    case NODE_TEXCO_CAMERA: {
+      Transform tfm = kernel_data.cam.worldtocamera;
+
+      if (sd->object != OBJECT_NONE)
+        data = transform_point(&tfm, sd->P);
+      else
+        data = transform_point(&tfm, sd->P + camera_position(kg));
+      break;
+    }
+    case NODE_TEXCO_WINDOW: {
+      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+        data = camera_world_to_ndc(kg, sd, sd->ray_P);
+      else
+        data = camera_world_to_ndc(kg, sd, sd->P);
+      data.z = 0.0f;
+      break;
+    }
+    case NODE_TEXCO_REFLECTION: {
+      if (sd->object != OBJECT_NONE)
+        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+      else
+        data = sd->I;
+      break;
+    }
+    case NODE_TEXCO_DUPLI_GENERATED: {
+      data = object_dupli_generated(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_DUPLI_UV: {
+      data = object_dupli_uv(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_VOLUME_GENERATED: {
+      data = sd->P;
+
+#ifdef __VOLUME__
+      if (sd->object != OBJECT_NONE)
+        data = volume_normalized_position(kg, sd, data);
+#endif
+      break;
+    }
+  }
+
+  stack_store_float3(stack, out_offset, data);
+  return offset;
+}
+
+ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
+                                                   ccl_private ShaderData *sd,
+                                                   uint32_t path_flag,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int offset)
+{
+#ifdef __RAY_DIFFERENTIALS__
+  float3 data;
+  uint type = node.y;
+  uint out_offset = node.z;
+
+  switch (type) {
+    case NODE_TEXCO_OBJECT: {
+      data = sd->P + sd->dP.dx;
+      if (node.w == 0) {
+        if (sd->object != OBJECT_NONE) {
+          object_inverse_position_transform(kg, sd, &data);
+        }
+      }
+      else {
+        Transform tfm;
+        tfm.x = read_node_float(kg, &offset);
+        tfm.y = read_node_float(kg, &offset);
+        tfm.z = read_node_float(kg, &offset);
+        data = transform_point(&tfm, data);
+      }
+      break;
+    }
+    case NODE_TEXCO_NORMAL: {
+      data = sd->N;
+      object_inverse_normal_transform(kg, sd, &data);
+      break;
+    }
+    case NODE_TEXCO_CAMERA: {
+      Transform tfm = kernel_data.cam.worldtocamera;
+
+      if (sd->object != OBJECT_NONE)
+        data = transform_point(&tfm, sd->P + sd->dP.dx);
+      else
+        data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
+      break;
+    }
+    case NODE_TEXCO_WINDOW: {
+      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+        data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(sd->ray_dP, 0.0f, 0.0f));
+      else
+        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
+      data.z = 0.0f;
+      break;
+    }
+    case NODE_TEXCO_REFLECTION: {
+      if (sd->object != OBJECT_NONE)
+        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+      else
+        data = sd->I;
+      break;
+    }
+    case NODE_TEXCO_DUPLI_GENERATED: {
+      data = object_dupli_generated(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_DUPLI_UV: {
+      data = object_dupli_uv(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_VOLUME_GENERATED: {
+      data = sd->P + sd->dP.dx;
+
+#  ifdef __VOLUME__
+      if (sd->object != OBJECT_NONE)
+        data = volume_normalized_position(kg, sd, data);
+#  endif
+      break;
+    }
+  }
+
+  stack_store_float3(stack, out_offset, data);
+  return offset;
+#else
+  return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+#endif
+}
+
+ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
+                                                   ccl_private ShaderData *sd,
+                                                   uint32_t path_flag,
+                                                   ccl_private float *stack,
+                                                   uint4 node,
+                                                   int offset)
+{
+#ifdef __RAY_DIFFERENTIALS__
+  float3 data;
+  uint type = node.y;
+  uint out_offset = node.z;
+
+  switch (type) {
+    case NODE_TEXCO_OBJECT: {
+      data = sd->P + sd->dP.dy;
+      if (node.w == 0) {
+        if (sd->object != OBJECT_NONE) {
+          object_inverse_position_transform(kg, sd, &data);
+        }
+      }
+      else {
+        Transform tfm;
+        tfm.x = read_node_float(kg, &offset);
+        tfm.y = read_node_float(kg, &offset);
+        tfm.z = read_node_float(kg, &offset);
+        data = transform_point(&tfm, data);
+      }
+      break;
+    }
+    case NODE_TEXCO_NORMAL: {
+      data = sd->N;
+      object_inverse_normal_transform(kg, sd, &data);
+      break;
+    }
+    case NODE_TEXCO_CAMERA: {
+      Transform tfm = kernel_data.cam.worldtocamera;
+
+      if (sd->object != OBJECT_NONE)
+        data = transform_point(&tfm, sd->P + sd->dP.dy);
+      else
+        data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
+      break;
+    }
+    case NODE_TEXCO_WINDOW: {
+      if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
+          kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+        data = camera_world_to_ndc(kg, sd, sd->ray_P + make_float3(0.0f, sd->ray_dP, 0.0f));
+      else
+        data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
+      data.z = 0.0f;
+      break;
+    }
+    case NODE_TEXCO_REFLECTION: {
+      if (sd->object != OBJECT_NONE)
+        data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I;
+      else
+        data = sd->I;
+      break;
+    }
+    case NODE_TEXCO_DUPLI_GENERATED: {
+      data = object_dupli_generated(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_DUPLI_UV: {
+      data = object_dupli_uv(kg, sd->object);
+      break;
+    }
+    case NODE_TEXCO_VOLUME_GENERATED: {
+      data = sd->P + sd->dP.dy;
+
+#  ifdef __VOLUME__
+      if (sd->object != OBJECT_NONE)
+        data = volume_normalized_position(kg, sd, data);
+#  endif
+      break;
+    }
+  }
+
+  stack_store_float3(stack, out_offset, data);
+  return offset;
+#else
+  return svm_node_tex_coord(kg, sd, path_flag, stack, node, offset);
+#endif
+}
+
+ccl_device_noinline void svm_node_normal_map(KernelGlobals kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint4 node)
+{
+  uint color_offset, strength_offset, normal_offset, space;
+  svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
+
+  float3 color = stack_load_float3(stack, color_offset);
+  color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
+
+  bool is_backfacing = (sd->flag & SD_BACKFACING) != 0;
+  float3 N;
+
+  if (space == NODE_NORMAL_MAP_TANGENT) {
+    /* tangent space */
+    if (sd->object == OBJECT_NONE || (sd->type & PRIMITIVE_ALL_TRIANGLE) == 0) {
+      /* Fallback to unperturbed normal. */
+      stack_store_float3(stack, normal_offset, sd->N);
+      return;
+    }
+
+    /* first try to get tangent attribute */
+    const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+    const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+
+    if (attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND) {
+      /* Fallback to unperturbed normal. */
+      stack_store_float3(stack, normal_offset, sd->N);
+      return;
+    }
+
+    /* get _unnormalized_ interpolated normal and tangent */
+    float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL);
+    float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL);
+    float3 normal;
+
+    if (sd->shader & SHADER_SMOOTH_NORMAL) {
+      normal = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
+    }
+    else {
+      normal = sd->Ng;
+
+      /* the normal is already inverted, which is too soon for the math here */
+      if (is_backfacing) {
+        normal = -normal;
+      }
+
+      object_inverse_normal_transform(kg, sd, &normal);
+    }
+
+    /* apply normal map */
+    float3 B = sign * cross(normal, tangent);
+    N = safe_normalize(color.x * tangent + color.y * B + color.z * normal);
+
+    /* transform to world space */
+    object_normal_transform(kg, sd, &N);
+  }
+  else {
+    /* strange blender convention */
+    if (space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) {
+      color.y = -color.y;
+      color.z = -color.z;
+    }
+
+    /* object, world space */
+    N = color;
+
+    if (space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT)
+      object_normal_transform(kg, sd, &N);
+    else
+      N = safe_normalize(N);
+  }
+
+  /* invert normal for backfacing polygons */
+  if (is_backfacing) {
+    N = -N;
+  }
+
+  float strength = stack_load_float(stack, strength_offset);
+
+  if (strength != 1.0f) {
+    strength = max(strength, 0.0f);
+    N = safe_normalize(sd->N + (N - sd->N) * strength);
+  }
+
+  if (is_zero(N)) {
+    N = sd->N;
+  }
+
+  stack_store_float3(stack, normal_offset, N);
+}
+
+ccl_device_noinline void svm_node_tangent(KernelGlobals kg,
+                                          ccl_private ShaderData *sd,
+                                          ccl_private float *stack,
+                                          uint4 node)
+{
+  uint tangent_offset, direction_type, axis;
+  svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis);
+
+  float3 tangent;
+  float3 attribute_value;
+  const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
+  if (desc.offset != ATTR_STD_NOT_FOUND) {
+    if (desc.type == NODE_ATTR_FLOAT2) {
+      float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL);
+      attribute_value.x = value.x;
+      attribute_value.y = value.y;
+      attribute_value.z = 0.0f;
+    }
+    else {
+      attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL);
+    }
+  }
+
+  if (direction_type == NODE_TANGENT_UVMAP) {
+    /* UV map */
+    if (desc.offset == ATTR_STD_NOT_FOUND) {
+      stack_store_float3(stack, tangent_offset, zero_float3());
+      return;
+    }
+    else {
+      tangent = attribute_value;
+    }
+  }
+  else {
+    /* radial */
+    float3 generated;
+
+    if (desc.offset == ATTR_STD_NOT_FOUND)
+      generated = sd->P;
+    else
+      generated = attribute_value;
+
+    if (axis == NODE_TANGENT_AXIS_X)
+      tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
+    else if (axis == NODE_TANGENT_AXIS_Y)
+      tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f));
+    else
+      tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f);
+  }
+
+  object_normal_transform(kg, sd, &tangent);
+  tangent = cross(sd->N, normalize(cross(tangent, sd->N)));
+  stack_store_float3(stack, tangent_offset, tangent);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h
new file mode 100644
index 00000000000..8c95c571815
--- /dev/null
+++ b/intern/cycles/kernel/svm/types.h
@@ -0,0 +1,601 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Stack */
+
+/* SVM stack has a fixed size */
+#define SVM_STACK_SIZE 255
+/* SVM stack offsets with this value indicate that it's not on the stack */
+#define SVM_STACK_INVALID 255
+
+#define SVM_BUMP_EVAL_STATE_SIZE 9
+
+/* Nodes */
+
+typedef enum ShaderNodeType {
+  NODE_END = 0,
+  NODE_SHADER_JUMP,
+  NODE_CLOSURE_BSDF,
+  NODE_CLOSURE_EMISSION,
+  NODE_CLOSURE_BACKGROUND,
+  NODE_CLOSURE_SET_WEIGHT,
+  NODE_CLOSURE_WEIGHT,
+  NODE_EMISSION_WEIGHT,
+  NODE_MIX_CLOSURE,
+  NODE_JUMP_IF_ZERO,
+  NODE_JUMP_IF_ONE,
+  NODE_GEOMETRY,
+  NODE_CONVERT,
+  NODE_TEX_COORD,
+  NODE_VALUE_F,
+  NODE_VALUE_V,
+  NODE_ATTR,
+  NODE_VERTEX_COLOR,
+  NODE_GEOMETRY_BUMP_DX,
+  NODE_GEOMETRY_BUMP_DY,
+  NODE_SET_DISPLACEMENT,
+  NODE_DISPLACEMENT,
+  NODE_VECTOR_DISPLACEMENT,
+  NODE_TEX_IMAGE,
+  NODE_TEX_IMAGE_BOX,
+  NODE_TEX_NOISE,
+  NODE_SET_BUMP,
+  NODE_ATTR_BUMP_DX,
+  NODE_ATTR_BUMP_DY,
+  NODE_VERTEX_COLOR_BUMP_DX,
+  NODE_VERTEX_COLOR_BUMP_DY,
+  NODE_TEX_COORD_BUMP_DX,
+  NODE_TEX_COORD_BUMP_DY,
+  NODE_CLOSURE_SET_NORMAL,
+  NODE_ENTER_BUMP_EVAL,
+  NODE_LEAVE_BUMP_EVAL,
+  NODE_HSV,
+  NODE_CLOSURE_HOLDOUT,
+  NODE_FRESNEL,
+  NODE_LAYER_WEIGHT,
+  NODE_CLOSURE_VOLUME,
+  NODE_PRINCIPLED_VOLUME,
+  NODE_MATH,
+  NODE_VECTOR_MATH,
+  NODE_RGB_RAMP,
+  NODE_GAMMA,
+  NODE_BRIGHTCONTRAST,
+  NODE_LIGHT_PATH,
+  NODE_OBJECT_INFO,
+  NODE_PARTICLE_INFO,
+  NODE_HAIR_INFO,
+  NODE_TEXTURE_MAPPING,
+  NODE_MAPPING,
+  NODE_MIN_MAX,
+  NODE_CAMERA,
+  NODE_TEX_ENVIRONMENT,
+  NODE_TEX_SKY,
+  NODE_TEX_GRADIENT,
+  NODE_TEX_VORONOI,
+  NODE_TEX_MUSGRAVE,
+  NODE_TEX_WAVE,
+  NODE_TEX_MAGIC,
+  NODE_TEX_CHECKER,
+  NODE_TEX_BRICK,
+  NODE_TEX_WHITE_NOISE,
+  NODE_NORMAL,
+  NODE_LIGHT_FALLOFF,
+  NODE_IES,
+  NODE_RGB_CURVES,
+  NODE_VECTOR_CURVES,
+  NODE_TANGENT,
+  NODE_NORMAL_MAP,
+  NODE_INVERT,
+  NODE_MIX,
+  NODE_SEPARATE_VECTOR,
+  NODE_COMBINE_VECTOR,
+  NODE_SEPARATE_HSV,
+  NODE_COMBINE_HSV,
+  NODE_VECTOR_ROTATE,
+  NODE_VECTOR_TRANSFORM,
+  NODE_WIREFRAME,
+  NODE_WAVELENGTH,
+  NODE_BLACKBODY,
+  NODE_MAP_RANGE,
+  NODE_CLAMP,
+  NODE_BEVEL,
+  NODE_AMBIENT_OCCLUSION,
+  NODE_TEX_VOXEL,
+  NODE_AOV_START,
+  NODE_AOV_COLOR,
+  NODE_AOV_VALUE,
+  NODE_FLOAT_CURVE,
+  /* NOTE: for best OpenCL performance, item definition in the enum must
+   * match the switch case order in svm.h. */
+} ShaderNodeType;
+
+typedef enum NodeAttributeOutputType {
+  NODE_ATTR_OUTPUT_FLOAT3 = 0,
+  NODE_ATTR_OUTPUT_FLOAT,
+  NODE_ATTR_OUTPUT_FLOAT_ALPHA,
+} NodeAttributeOutputType;
+
+typedef enum NodeAttributeType {
+  NODE_ATTR_FLOAT = 0,
+  NODE_ATTR_FLOAT2,
+  NODE_ATTR_FLOAT3,
+  NODE_ATTR_FLOAT4,
+  NODE_ATTR_RGBA,
+  NODE_ATTR_MATRIX
+} NodeAttributeType;
+
+typedef enum NodeGeometry {
+  NODE_GEOM_P = 0,
+  NODE_GEOM_N,
+  NODE_GEOM_T,
+  NODE_GEOM_I,
+  NODE_GEOM_Ng,
+  NODE_GEOM_uv
+} NodeGeometry;
+
+typedef enum NodeObjectInfo {
+  NODE_INFO_OB_LOCATION,
+  NODE_INFO_OB_COLOR,
+  NODE_INFO_OB_INDEX,
+  NODE_INFO_MAT_INDEX,
+  NODE_INFO_OB_RANDOM
+} NodeObjectInfo;
+
+typedef enum NodeParticleInfo {
+  NODE_INFO_PAR_INDEX,
+  NODE_INFO_PAR_RANDOM,
+  NODE_INFO_PAR_AGE,
+  NODE_INFO_PAR_LIFETIME,
+  NODE_INFO_PAR_LOCATION,
+  NODE_INFO_PAR_ROTATION,
+  NODE_INFO_PAR_SIZE,
+  NODE_INFO_PAR_VELOCITY,
+  NODE_INFO_PAR_ANGULAR_VELOCITY
+} NodeParticleInfo;
+
+typedef enum NodeHairInfo {
+  NODE_INFO_CURVE_IS_STRAND,
+  NODE_INFO_CURVE_INTERCEPT,
+  NODE_INFO_CURVE_LENGTH,
+  NODE_INFO_CURVE_THICKNESS,
+  /* Fade for minimum hair width transiency. */
+  // NODE_INFO_CURVE_FADE,
+  NODE_INFO_CURVE_TANGENT_NORMAL,
+  NODE_INFO_CURVE_RANDOM,
+} NodeHairInfo;
+
+typedef enum NodeLightPath {
+  NODE_LP_camera = 0,
+  NODE_LP_shadow,
+  NODE_LP_diffuse,
+  NODE_LP_glossy,
+  NODE_LP_singular,
+  NODE_LP_reflection,
+  NODE_LP_transmission,
+  NODE_LP_volume_scatter,
+  NODE_LP_backfacing,
+  NODE_LP_ray_length,
+  NODE_LP_ray_depth,
+  NODE_LP_ray_diffuse,
+  NODE_LP_ray_glossy,
+  NODE_LP_ray_transparent,
+  NODE_LP_ray_transmission,
+} NodeLightPath;
+
+typedef enum NodeLightFalloff {
+  NODE_LIGHT_FALLOFF_QUADRATIC,
+  NODE_LIGHT_FALLOFF_LINEAR,
+  NODE_LIGHT_FALLOFF_CONSTANT
+} NodeLightFalloff;
+
+typedef enum NodeTexCoord {
+  NODE_TEXCO_NORMAL,
+  NODE_TEXCO_OBJECT,
+  NODE_TEXCO_CAMERA,
+  NODE_TEXCO_WINDOW,
+  NODE_TEXCO_REFLECTION,
+  NODE_TEXCO_DUPLI_GENERATED,
+  NODE_TEXCO_DUPLI_UV,
+  NODE_TEXCO_VOLUME_GENERATED
+} NodeTexCoord;
+
+typedef enum NodeMix {
+  NODE_MIX_BLEND = 0,
+  NODE_MIX_ADD,
+  NODE_MIX_MUL,
+  NODE_MIX_SUB,
+  NODE_MIX_SCREEN,
+  NODE_MIX_DIV,
+  NODE_MIX_DIFF,
+  NODE_MIX_DARK,
+  NODE_MIX_LIGHT,
+  NODE_MIX_OVERLAY,
+  NODE_MIX_DODGE,
+  NODE_MIX_BURN,
+  NODE_MIX_HUE,
+  NODE_MIX_SAT,
+  NODE_MIX_VAL,
+  NODE_MIX_COLOR,
+  NODE_MIX_SOFT,
+  NODE_MIX_LINEAR,
+  NODE_MIX_CLAMP /* used for the clamp UI option */
+} NodeMix;
+
+typedef enum NodeMathType {
+  NODE_MATH_ADD,
+  NODE_MATH_SUBTRACT,
+  NODE_MATH_MULTIPLY,
+  NODE_MATH_DIVIDE,
+  NODE_MATH_SINE,
+  NODE_MATH_COSINE,
+  NODE_MATH_TANGENT,
+  NODE_MATH_ARCSINE,
+  NODE_MATH_ARCCOSINE,
+  NODE_MATH_ARCTANGENT,
+  NODE_MATH_POWER,
+  NODE_MATH_LOGARITHM,
+  NODE_MATH_MINIMUM,
+  NODE_MATH_MAXIMUM,
+  NODE_MATH_ROUND,
+  NODE_MATH_LESS_THAN,
+  NODE_MATH_GREATER_THAN,
+  NODE_MATH_MODULO,
+  NODE_MATH_ABSOLUTE,
+  NODE_MATH_ARCTAN2,
+  NODE_MATH_FLOOR,
+  NODE_MATH_CEIL,
+  NODE_MATH_FRACTION,
+  NODE_MATH_SQRT,
+  NODE_MATH_INV_SQRT,
+  NODE_MATH_SIGN,
+  NODE_MATH_EXPONENT,
+  NODE_MATH_RADIANS,
+  NODE_MATH_DEGREES,
+  NODE_MATH_SINH,
+  NODE_MATH_COSH,
+  NODE_MATH_TANH,
+  NODE_MATH_TRUNC,
+  NODE_MATH_SNAP,
+  NODE_MATH_WRAP,
+  NODE_MATH_COMPARE,
+  NODE_MATH_MULTIPLY_ADD,
+  NODE_MATH_PINGPONG,
+  NODE_MATH_SMOOTH_MIN,
+  NODE_MATH_SMOOTH_MAX,
+} NodeMathType;
+
+typedef enum NodeVectorMathType {
+  NODE_VECTOR_MATH_ADD,
+  NODE_VECTOR_MATH_SUBTRACT,
+  NODE_VECTOR_MATH_MULTIPLY,
+  NODE_VECTOR_MATH_DIVIDE,
+
+  NODE_VECTOR_MATH_CROSS_PRODUCT,
+  NODE_VECTOR_MATH_PROJECT,
+  NODE_VECTOR_MATH_REFLECT,
+  NODE_VECTOR_MATH_DOT_PRODUCT,
+
+  NODE_VECTOR_MATH_DISTANCE,
+  NODE_VECTOR_MATH_LENGTH,
+  NODE_VECTOR_MATH_SCALE,
+  NODE_VECTOR_MATH_NORMALIZE,
+
+  NODE_VECTOR_MATH_SNAP,
+  NODE_VECTOR_MATH_FLOOR,
+  NODE_VECTOR_MATH_CEIL,
+  NODE_VECTOR_MATH_MODULO,
+  NODE_VECTOR_MATH_FRACTION,
+  NODE_VECTOR_MATH_ABSOLUTE,
+  NODE_VECTOR_MATH_MINIMUM,
+  NODE_VECTOR_MATH_MAXIMUM,
+  NODE_VECTOR_MATH_WRAP,
+  NODE_VECTOR_MATH_SINE,
+  NODE_VECTOR_MATH_COSINE,
+  NODE_VECTOR_MATH_TANGENT,
+  NODE_VECTOR_MATH_REFRACT,
+  NODE_VECTOR_MATH_FACEFORWARD,
+  NODE_VECTOR_MATH_MULTIPLY_ADD,
+} NodeVectorMathType;
+
+typedef enum NodeClampType {
+  NODE_CLAMP_MINMAX,
+  NODE_CLAMP_RANGE,
+} NodeClampType;
+
+typedef enum NodeMapRangeType {
+  NODE_MAP_RANGE_LINEAR,
+  NODE_MAP_RANGE_STEPPED,
+  NODE_MAP_RANGE_SMOOTHSTEP,
+  NODE_MAP_RANGE_SMOOTHERSTEP,
+} NodeMapRangeType;
+
+typedef enum NodeMappingType {
+  NODE_MAPPING_TYPE_POINT,
+  NODE_MAPPING_TYPE_TEXTURE,
+  NODE_MAPPING_TYPE_VECTOR,
+  NODE_MAPPING_TYPE_NORMAL
+} NodeMappingType;
+
+typedef enum NodeVectorRotateType {
+  NODE_VECTOR_ROTATE_TYPE_AXIS,
+  NODE_VECTOR_ROTATE_TYPE_AXIS_X,
+  NODE_VECTOR_ROTATE_TYPE_AXIS_Y,
+  NODE_VECTOR_ROTATE_TYPE_AXIS_Z,
+  NODE_VECTOR_ROTATE_TYPE_EULER_XYZ,
+} NodeVectorRotateType;
+
+typedef enum NodeVectorTransformType {
+  NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
+  NODE_VECTOR_TRANSFORM_TYPE_POINT,
+  NODE_VECTOR_TRANSFORM_TYPE_NORMAL
+} NodeVectorTransformType;
+
+typedef enum NodeVectorTransformConvertSpace {
+  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD,
+  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT,
+  NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA
+} NodeVectorTransformConvertSpace;
+
+typedef enum NodeConvert {
+  NODE_CONVERT_FV,
+  NODE_CONVERT_FI,
+  NODE_CONVERT_CF,
+  NODE_CONVERT_CI,
+  NODE_CONVERT_VF,
+  NODE_CONVERT_VI,
+  NODE_CONVERT_IF,
+  NODE_CONVERT_IV
+} NodeConvert;
+
+typedef enum NodeMusgraveType {
+  NODE_MUSGRAVE_MULTIFRACTAL,
+  NODE_MUSGRAVE_FBM,
+  NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
+  NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
+  NODE_MUSGRAVE_HETERO_TERRAIN
+} NodeMusgraveType;
+
+typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType;
+
+typedef enum NodeWaveBandsDirection {
+  NODE_WAVE_BANDS_DIRECTION_X,
+  NODE_WAVE_BANDS_DIRECTION_Y,
+  NODE_WAVE_BANDS_DIRECTION_Z,
+  NODE_WAVE_BANDS_DIRECTION_DIAGONAL
+} NodeWaveBandsDirection;
+
+typedef enum NodeWaveRingsDirection {
+  NODE_WAVE_RINGS_DIRECTION_X,
+  NODE_WAVE_RINGS_DIRECTION_Y,
+  NODE_WAVE_RINGS_DIRECTION_Z,
+  NODE_WAVE_RINGS_DIRECTION_SPHERICAL
+} NodeWaveRingsDirection;
+
+typedef enum NodeWaveProfile {
+  NODE_WAVE_PROFILE_SIN,
+  NODE_WAVE_PROFILE_SAW,
+  NODE_WAVE_PROFILE_TRI,
+} NodeWaveProfile;
+
+typedef enum NodeSkyType { NODE_SKY_PREETHAM, NODE_SKY_HOSEK, NODE_SKY_NISHITA } NodeSkyType;
+
+typedef enum NodeGradientType {
+  NODE_BLEND_LINEAR,
+  NODE_BLEND_QUADRATIC,
+  NODE_BLEND_EASING,
+  NODE_BLEND_DIAGONAL,
+  NODE_BLEND_RADIAL,
+  NODE_BLEND_QUADRATIC_SPHERE,
+  NODE_BLEND_SPHERICAL
+} NodeGradientType;
+
+typedef enum NodeVoronoiDistanceMetric {
+  NODE_VORONOI_EUCLIDEAN,
+  NODE_VORONOI_MANHATTAN,
+  NODE_VORONOI_CHEBYCHEV,
+  NODE_VORONOI_MINKOWSKI,
+} NodeVoronoiDistanceMetric;
+
+typedef enum NodeVoronoiFeature {
+  NODE_VORONOI_F1,
+  NODE_VORONOI_F2,
+  NODE_VORONOI_SMOOTH_F1,
+  NODE_VORONOI_DISTANCE_TO_EDGE,
+  NODE_VORONOI_N_SPHERE_RADIUS,
+} NodeVoronoiFeature;
+
+typedef enum NodeBlendWeightType {
+  NODE_LAYER_WEIGHT_FRESNEL,
+  NODE_LAYER_WEIGHT_FACING
+} NodeBlendWeightType;
+
+typedef enum NodeTangentDirectionType {
+  NODE_TANGENT_RADIAL,
+  NODE_TANGENT_UVMAP
+} NodeTangentDirectionType;
+
+typedef enum NodeTangentAxis {
+  NODE_TANGENT_AXIS_X,
+  NODE_TANGENT_AXIS_Y,
+  NODE_TANGENT_AXIS_Z
+} NodeTangentAxis;
+
+typedef enum NodeNormalMapSpace {
+  NODE_NORMAL_MAP_TANGENT,
+  NODE_NORMAL_MAP_OBJECT,
+  NODE_NORMAL_MAP_WORLD,
+  NODE_NORMAL_MAP_BLENDER_OBJECT,
+  NODE_NORMAL_MAP_BLENDER_WORLD,
+} NodeNormalMapSpace;
+
+typedef enum NodeImageProjection {
+  NODE_IMAGE_PROJ_FLAT = 0,
+  NODE_IMAGE_PROJ_BOX = 1,
+  NODE_IMAGE_PROJ_SPHERE = 2,
+  NODE_IMAGE_PROJ_TUBE = 3,
+} NodeImageProjection;
+
+typedef enum NodeImageFlags {
+  NODE_IMAGE_COMPRESS_AS_SRGB = 1,
+  NODE_IMAGE_ALPHA_UNASSOCIATE = 2,
+} NodeImageFlags;
+
+typedef enum NodeEnvironmentProjection {
+  NODE_ENVIRONMENT_EQUIRECTANGULAR = 0,
+  NODE_ENVIRONMENT_MIRROR_BALL = 1,
+} NodeEnvironmentProjection;
+
+typedef enum NodeBumpOffset {
+  NODE_BUMP_OFFSET_CENTER,
+  NODE_BUMP_OFFSET_DX,
+  NODE_BUMP_OFFSET_DY,
+} NodeBumpOffset;
+
+typedef enum NodeTexVoxelSpace {
+  NODE_TEX_VOXEL_SPACE_OBJECT = 0,
+  NODE_TEX_VOXEL_SPACE_WORLD = 1,
+} NodeTexVoxelSpace;
+
+typedef enum NodeAO {
+  NODE_AO_ONLY_LOCAL = (1 << 0),
+  NODE_AO_INSIDE = (1 << 1),
+  NODE_AO_GLOBAL_RADIUS = (1 << 2),
+} NodeAO;
+
+typedef enum ShaderType {
+  SHADER_TYPE_SURFACE,
+  SHADER_TYPE_VOLUME,
+  SHADER_TYPE_DISPLACEMENT,
+  SHADER_TYPE_BUMP,
+} ShaderType;
+
+typedef enum NodePrincipledHairParametrization {
+  NODE_PRINCIPLED_HAIR_REFLECTANCE = 0,
+  NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1,
+  NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2,
+  NODE_PRINCIPLED_HAIR_NUM,
+} NodePrincipledHairParametrization;
+
+/* Closure */
+
+typedef enum ClosureType {
+  /* Special type, flags generic node as a non-BSDF. */
+  CLOSURE_NONE_ID,
+
+  CLOSURE_BSDF_ID,
+
+  /* Diffuse */
+  CLOSURE_BSDF_DIFFUSE_ID,
+  CLOSURE_BSDF_OREN_NAYAR_ID,
+  CLOSURE_BSDF_DIFFUSE_RAMP_ID,
+  CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID,
+  CLOSURE_BSDF_PRINCIPLED_SHEEN_ID,
+  CLOSURE_BSDF_DIFFUSE_TOON_ID,
+  CLOSURE_BSDF_TRANSLUCENT_ID,
+
+  /* Glossy */
+  CLOSURE_BSDF_REFLECTION_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID,
+  CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID,
+  CLOSURE_BSDF_ASHIKHMIN_VELVET_ID,
+  CLOSURE_BSDF_PHONG_RAMP_ID,
+  CLOSURE_BSDF_GLOSSY_TOON_ID,
+  CLOSURE_BSDF_HAIR_REFLECTION_ID,
+
+  /* Transmission */
+  CLOSURE_BSDF_REFRACTION_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID,
+  CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID,
+  CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID,
+  CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID,
+  CLOSURE_BSDF_SHARP_GLASS_ID,
+  CLOSURE_BSDF_HAIR_PRINCIPLED_ID,
+  CLOSURE_BSDF_HAIR_TRANSMISSION_ID,
+
+  /* Special cases */
+  CLOSURE_BSDF_TRANSPARENT_ID,
+
+  /* BSSRDF */
+  CLOSURE_BSSRDF_BURLEY_ID,
+  CLOSURE_BSSRDF_RANDOM_WALK_ID,
+  CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID,
+
+  /* Other */
+  CLOSURE_HOLDOUT_ID,
+
+  /* Volume */
+  CLOSURE_VOLUME_ID,
+  CLOSURE_VOLUME_ABSORPTION_ID,
+  CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID,
+
+  CLOSURE_BSDF_PRINCIPLED_ID,
+
+  NBUILTIN_CLOSURES
+} ClosureType;
+
+/* watch this, being lazy with memory usage */
+#define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID)
+#define CLOSURE_IS_BSDF_DIFFUSE(type) \
+  (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_TRANSLUCENT_ID)
+#define CLOSURE_IS_BSDF_GLOSSY(type) \
+  ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \
+   (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID))
+#define CLOSURE_IS_BSDF_TRANSMISSION(type) \
+  (type >= CLOSURE_BSDF_REFRACTION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
+#define CLOSURE_IS_BSDF_SINGULAR(type) \
+  (type == CLOSURE_BSDF_REFLECTION_ID || type == CLOSURE_BSDF_REFRACTION_ID || \
+   type == CLOSURE_BSDF_TRANSPARENT_ID)
+#define CLOSURE_IS_BSDF_TRANSPARENT(type) (type == CLOSURE_BSDF_TRANSPARENT_ID)
+#define CLOSURE_IS_BSDF_MULTISCATTER(type) \
+  (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID || \
+   type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID)
+#define CLOSURE_IS_BSDF_MICROFACET(type) \
+  ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID) || \
+   (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && \
+    type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) || \
+   (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID))
+#define CLOSURE_IS_BSDF_MICROFACET_FRESNEL(type) \
+  (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID || \
+   type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID || \
+   type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || \
+   type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID)
+#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID)
+#define CLOSURE_IS_BSSRDF(type) \
+  (type >= CLOSURE_BSSRDF_BURLEY_ID && type <= CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID)
+#define CLOSURE_IS_VOLUME(type) \
+  (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
+#define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
+#define CLOSURE_IS_VOLUME_ABSORPTION(type) (type == CLOSURE_VOLUME_ABSORPTION_ID)
+#define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)
+#define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
+#define CLOSURE_IS_GLASS(type) \
+  (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID)
+#define CLOSURE_IS_PRINCIPLED(type) (type == CLOSURE_BSDF_PRINCIPLED_ID)
+
+#define CLOSURE_WEIGHT_CUTOFF 1e-5f
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/value.h b/intern/cycles/kernel/svm/value.h
new file mode 100644
index 00000000000..cc62f1e2a82
--- /dev/null
+++ b/intern/cycles/kernel/svm/value.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Value Nodes */
+
+ccl_device void svm_node_value_f(KernelGlobals kg,
+                                 ccl_private ShaderData *sd,
+                                 ccl_private float *stack,
+                                 uint ivalue,
+                                 uint out_offset)
+{
+  stack_store_float(stack, out_offset, __uint_as_float(ivalue));
+}
+
+ccl_device int svm_node_value_v(KernelGlobals kg,
+                                ccl_private ShaderData *sd,
+                                ccl_private float *stack,
+                                uint out_offset,
+                                int offset)
+{
+  /* read extra data */
+  uint4 node1 = read_node(kg, &offset);
+  float3 p = make_float3(
+      __uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w));
+
+  stack_store_float3(stack, out_offset, p);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/vector_rotate.h b/intern/cycles/kernel/svm/vector_rotate.h
new file mode 100644
index 00000000000..2a0d331734c
--- /dev/null
+++ b/intern/cycles/kernel/svm/vector_rotate.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Vector Rotate */
+
+ccl_device_noinline void svm_node_vector_rotate(ccl_private ShaderData *sd,
+                                                ccl_private float *stack,
+                                                uint input_stack_offsets,
+                                                uint axis_stack_offsets,
+                                                uint result_stack_offset)
+{
+  uint type, vector_stack_offset, rotation_stack_offset, center_stack_offset, axis_stack_offset,
+      angle_stack_offset, invert;
+
+  svm_unpack_node_uchar4(
+      input_stack_offsets, &type, &vector_stack_offset, &rotation_stack_offset, &invert);
+  svm_unpack_node_uchar3(
+      axis_stack_offsets, &center_stack_offset, &axis_stack_offset, &angle_stack_offset);
+
+  if (stack_valid(result_stack_offset)) {
+
+    float3 vector = stack_load_float3(stack, vector_stack_offset);
+    float3 center = stack_load_float3(stack, center_stack_offset);
+    float3 result = make_float3(0.0f, 0.0f, 0.0f);
+
+    if (type == NODE_VECTOR_ROTATE_TYPE_EULER_XYZ) {
+      float3 rotation = stack_load_float3(stack, rotation_stack_offset);  // Default XYZ.
+      Transform rotationTransform = euler_to_transform(rotation);
+      if (invert) {
+        result = transform_direction_transposed(&rotationTransform, vector - center) + center;
+      }
+      else {
+        result = transform_direction(&rotationTransform, vector - center) + center;
+      }
+    }
+    else {
+      float3 axis;
+      float axis_length;
+      switch (type) {
+        case NODE_VECTOR_ROTATE_TYPE_AXIS_X:
+          axis = make_float3(1.0f, 0.0f, 0.0f);
+          axis_length = 1.0f;
+          break;
+        case NODE_VECTOR_ROTATE_TYPE_AXIS_Y:
+          axis = make_float3(0.0f, 1.0f, 0.0f);
+          axis_length = 1.0f;
+          break;
+        case NODE_VECTOR_ROTATE_TYPE_AXIS_Z:
+          axis = make_float3(0.0f, 0.0f, 1.0f);
+          axis_length = 1.0f;
+          break;
+        default:
+          axis = stack_load_float3(stack, axis_stack_offset);
+          axis_length = len(axis);
+          break;
+      }
+      float angle = stack_load_float(stack, angle_stack_offset);
+      angle = invert ? -angle : angle;
+      result = (axis_length != 0.0f) ?
+                   rotate_around_axis(vector - center, axis / axis_length, angle) + center :
+                   vector;
+    }
+
+    stack_store_float3(stack, result_stack_offset, result);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/vector_transform.h b/intern/cycles/kernel/svm/vector_transform.h
new file mode 100644
index 00000000000..d7a51078cea
--- /dev/null
+++ b/intern/cycles/kernel/svm/vector_transform.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Vector Transform */
+
+ccl_device_noinline void svm_node_vector_transform(KernelGlobals kg,
+                                                   ccl_private ShaderData *sd,
+                                                   ccl_private float *stack,
+                                                   uint4 node)
+{
+  uint itype, ifrom, ito;
+  uint vector_in, vector_out;
+
+  svm_unpack_node_uchar3(node.y, &itype, &ifrom, &ito);
+  svm_unpack_node_uchar2(node.z, &vector_in, &vector_out);
+
+  float3 in = stack_load_float3(stack, vector_in);
+
+  NodeVectorTransformType type = (NodeVectorTransformType)itype;
+  NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom;
+  NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
+
+  Transform tfm;
+  bool is_object = (sd->object != OBJECT_NONE);
+  bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR ||
+                       type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
+
+  /* From world */
+  if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) {
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+      tfm = kernel_data.cam.worldtocamera;
+      if (is_direction)
+        in = transform_direction(&tfm, in);
+      else
+        in = transform_point(&tfm, in);
+    }
+    else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+      if (is_direction)
+        object_inverse_dir_transform(kg, sd, &in);
+      else
+        object_inverse_position_transform(kg, sd, &in);
+    }
+  }
+
+  /* From camera */
+  else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
+        to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
+      tfm = kernel_data.cam.cameratoworld;
+      if (is_direction)
+        in = transform_direction(&tfm, in);
+      else
+        in = transform_point(&tfm, in);
+    }
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) {
+      if (is_direction)
+        object_inverse_dir_transform(kg, sd, &in);
+      else
+        object_inverse_position_transform(kg, sd, &in);
+    }
+  }
+
+  /* From object */
+  else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) {
+    if ((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD ||
+         to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) &&
+        is_object) {
+      if (is_direction)
+        object_dir_transform(kg, sd, &in);
+      else
+        object_position_transform(kg, sd, &in);
+    }
+    if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) {
+      tfm = kernel_data.cam.worldtocamera;
+      if (is_direction)
+        in = transform_direction(&tfm, in);
+      else
+        in = transform_point(&tfm, in);
+    }
+  }
+
+  /* Normalize Normal */
+  if (type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
+    in = normalize(in);
+
+  /* Output */
+  if (stack_valid(vector_out)) {
+    stack_store_float3(stack, vector_out, in);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/vertex_color.h b/intern/cycles/kernel/svm/vertex_color.h
new file mode 100644
index 00000000000..b676a28c0e3
--- /dev/null
+++ b/intern/cycles/kernel/svm/vertex_color.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_vertex_color(KernelGlobals kg,
+                                               ccl_private ShaderData *sd,
+                                               ccl_private float *stack,
+                                               uint layer_id,
+                                               uint color_offset,
+                                               uint alpha_offset)
+{
+  AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
+  if (descriptor.offset != ATTR_STD_NOT_FOUND) {
+    float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, NULL);
+    stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
+    stack_store_float(stack, alpha_offset, vertex_color.w);
+  }
+  else {
+    stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
+    stack_store_float(stack, alpha_offset, 0.0f);
+  }
+}
+
+ccl_device_noinline void svm_node_vertex_color_bump_dx(KernelGlobals kg,
+                                                       ccl_private ShaderData *sd,
+                                                       ccl_private float *stack,
+                                                       uint layer_id,
+                                                       uint color_offset,
+                                                       uint alpha_offset)
+{
+  AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
+  if (descriptor.offset != ATTR_STD_NOT_FOUND) {
+    float4 dx;
+    float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, &dx, NULL);
+    vertex_color += dx;
+    stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
+    stack_store_float(stack, alpha_offset, vertex_color.w);
+  }
+  else {
+    stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
+    stack_store_float(stack, alpha_offset, 0.0f);
+  }
+}
+
+ccl_device_noinline void svm_node_vertex_color_bump_dy(KernelGlobals kg,
+                                                       ccl_private ShaderData *sd,
+                                                       ccl_private float *stack,
+                                                       uint layer_id,
+                                                       uint color_offset,
+                                                       uint alpha_offset)
+{
+  AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
+  if (descriptor.offset != ATTR_STD_NOT_FOUND) {
+    float4 dy;
+    float4 vertex_color = primitive_surface_attribute_float4(kg, sd, descriptor, NULL, &dy);
+    vertex_color += dy;
+    stack_store_float3(stack, color_offset, float4_to_float3(vertex_color));
+    stack_store_float(stack, alpha_offset, vertex_color.w);
+  }
+  else {
+    stack_store_float3(stack, color_offset, make_float3(0.0f, 0.0f, 0.0f));
+    stack_store_float(stack, alpha_offset, 0.0f);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/voronoi.h b/intern/cycles/kernel/svm/voronoi.h
new file mode 100644
index 00000000000..730965b6aed
--- /dev/null
+++ b/intern/cycles/kernel/svm/voronoi.h
@@ -0,0 +1,1164 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/*
+ * Original code is under the MIT License, Copyright (c) 2013 Inigo Quilez.
+ *
+ * Smooth Voronoi:
+ *
+ * - https://wiki.blender.org/wiki/User:OmarSquircleArt/GSoC2019/Documentation/Smooth_Voronoi
+ *
+ * Distance To Edge based on:
+ *
+ * - https://www.iquilezles.org/www/articles/voronoilines/voronoilines.htm
+ * - https://www.shadertoy.com/view/ldl3W8
+ *
+ * With optimization to change -2..2 scan window to -1..1 for better performance,
+ * as explained in https://www.shadertoy.com/view/llG3zy.
+ */
+
+/* **** 1D Voronoi **** */
+
+ccl_device float voronoi_distance_1d(float a,
+                                     float b,
+                                     NodeVoronoiDistanceMetric metric,
+                                     float exponent)
+{
+  return fabsf(b - a);
+}
+
+ccl_device void voronoi_f1_1d(float w,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float *outW)
+{
+  float cellPosition = floorf(w);
+  float localPosition = w - cellPosition;
+
+  float minDistance = 8.0f;
+  float targetOffset = 0.0f;
+  float targetPosition = 0.0f;
+  for (int i = -1; i <= 1; i++) {
+    float cellOffset = i;
+    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+    float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
+    if (distanceToPoint < minDistance) {
+      targetOffset = cellOffset;
+      minDistance = distanceToPoint;
+      targetPosition = pointPosition;
+    }
+  }
+  *outDistance = minDistance;
+  *outColor = hash_float_to_float3(cellPosition + targetOffset);
+  *outW = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_1d(float w,
+                                     float smoothness,
+                                     float exponent,
+                                     float randomness,
+                                     NodeVoronoiDistanceMetric metric,
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float *outW)
+{
+  float cellPosition = floorf(w);
+  float localPosition = w - cellPosition;
+
+  float smoothDistance = 8.0f;
+  float smoothPosition = 0.0f;
+  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+  for (int i = -2; i <= 2; i++) {
+    float cellOffset = i;
+    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+    float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
+    float h = smoothstep(
+        0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+    float correctionFactor = smoothness * h * (1.0f - h);
+    smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+    correctionFactor /= 1.0f + 3.0f * smoothness;
+    float3 cellColor = hash_float_to_float3(cellPosition + cellOffset);
+    smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+    smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+  }
+  *outDistance = smoothDistance;
+  *outColor = smoothColor;
+  *outW = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_1d(float w,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float *outW)
+{
+  float cellPosition = floorf(w);
+  float localPosition = w - cellPosition;
+
+  float distanceF1 = 8.0f;
+  float distanceF2 = 8.0f;
+  float offsetF1 = 0.0f;
+  float positionF1 = 0.0f;
+  float offsetF2 = 0.0f;
+  float positionF2 = 0.0f;
+  for (int i = -1; i <= 1; i++) {
+    float cellOffset = i;
+    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+    float distanceToPoint = voronoi_distance_1d(pointPosition, localPosition, metric, exponent);
+    if (distanceToPoint < distanceF1) {
+      distanceF2 = distanceF1;
+      distanceF1 = distanceToPoint;
+      offsetF2 = offsetF1;
+      offsetF1 = cellOffset;
+      positionF2 = positionF1;
+      positionF1 = pointPosition;
+    }
+    else if (distanceToPoint < distanceF2) {
+      distanceF2 = distanceToPoint;
+      offsetF2 = cellOffset;
+      positionF2 = pointPosition;
+    }
+  }
+  *outDistance = distanceF2;
+  *outColor = hash_float_to_float3(cellPosition + offsetF2);
+  *outW = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_1d(float w,
+                                            float randomness,
+                                            ccl_private float *outDistance)
+{
+  float cellPosition = floorf(w);
+  float localPosition = w - cellPosition;
+
+  float midPointPosition = hash_float_to_float(cellPosition) * randomness;
+  float leftPointPosition = -1.0f + hash_float_to_float(cellPosition - 1.0f) * randomness;
+  float rightPointPosition = 1.0f + hash_float_to_float(cellPosition + 1.0f) * randomness;
+  float distanceToMidLeft = fabsf((midPointPosition + leftPointPosition) / 2.0f - localPosition);
+  float distanceToMidRight = fabsf((midPointPosition + rightPointPosition) / 2.0f - localPosition);
+
+  *outDistance = min(distanceToMidLeft, distanceToMidRight);
+}
+
+ccl_device void voronoi_n_sphere_radius_1d(float w, float randomness, ccl_private float *outRadius)
+{
+  float cellPosition = floorf(w);
+  float localPosition = w - cellPosition;
+
+  float closestPoint = 0.0f;
+  float closestPointOffset = 0.0f;
+  float minDistance = 8.0f;
+  for (int i = -1; i <= 1; i++) {
+    float cellOffset = i;
+    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+    float distanceToPoint = fabsf(pointPosition - localPosition);
+    if (distanceToPoint < minDistance) {
+      minDistance = distanceToPoint;
+      closestPoint = pointPosition;
+      closestPointOffset = cellOffset;
+    }
+  }
+
+  minDistance = 8.0f;
+  float closestPointToClosestPoint = 0.0f;
+  for (int i = -1; i <= 1; i++) {
+    if (i == 0) {
+      continue;
+    }
+    float cellOffset = i + closestPointOffset;
+    float pointPosition = cellOffset + hash_float_to_float(cellPosition + cellOffset) * randomness;
+    float distanceToPoint = fabsf(closestPoint - pointPosition);
+    if (distanceToPoint < minDistance) {
+      minDistance = distanceToPoint;
+      closestPointToClosestPoint = pointPosition;
+    }
+  }
+  *outRadius = fabsf(closestPointToClosestPoint - closestPoint) / 2.0f;
+}
+
+/* **** 2D Voronoi **** */
+
+ccl_device float voronoi_distance_2d(float2 a,
+                                     float2 b,
+                                     NodeVoronoiDistanceMetric metric,
+                                     float exponent)
+{
+  if (metric == NODE_VORONOI_EUCLIDEAN) {
+    return distance(a, b);
+  }
+  else if (metric == NODE_VORONOI_MANHATTAN) {
+    return fabsf(a.x - b.x) + fabsf(a.y - b.y);
+  }
+  else if (metric == NODE_VORONOI_CHEBYCHEV) {
+    return max(fabsf(a.x - b.x), fabsf(a.y - b.y));
+  }
+  else if (metric == NODE_VORONOI_MINKOWSKI) {
+    return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent),
+                1.0f / exponent);
+  }
+  else {
+    return 0.0f;
+  }
+}
+
+ccl_device void voronoi_f1_2d(float2 coord,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float2 *outPosition)
+{
+  float2 cellPosition = floor(coord);
+  float2 localPosition = coord - cellPosition;
+
+  float minDistance = 8.0f;
+  float2 targetOffset = make_float2(0.0f, 0.0f);
+  float2 targetPosition = make_float2(0.0f, 0.0f);
+  for (int j = -1; j <= 1; j++) {
+    for (int i = -1; i <= 1; i++) {
+      float2 cellOffset = make_float2(i, j);
+      float2 pointPosition = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+      float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
+      if (distanceToPoint < minDistance) {
+        targetOffset = cellOffset;
+        minDistance = distanceToPoint;
+        targetPosition = pointPosition;
+      }
+    }
+  }
+  *outDistance = minDistance;
+  *outColor = hash_float2_to_float3(cellPosition + targetOffset);
+  *outPosition = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_2d(float2 coord,
+                                     float smoothness,
+                                     float exponent,
+                                     float randomness,
+                                     NodeVoronoiDistanceMetric metric,
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float2 *outPosition)
+{
+  float2 cellPosition = floor(coord);
+  float2 localPosition = coord - cellPosition;
+
+  float smoothDistance = 8.0f;
+  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+  float2 smoothPosition = make_float2(0.0f, 0.0f);
+  for (int j = -2; j <= 2; j++) {
+    for (int i = -2; i <= 2; i++) {
+      float2 cellOffset = make_float2(i, j);
+      float2 pointPosition = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+      float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
+      float h = smoothstep(
+          0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+      float correctionFactor = smoothness * h * (1.0f - h);
+      smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+      correctionFactor /= 1.0f + 3.0f * smoothness;
+      float3 cellColor = hash_float2_to_float3(cellPosition + cellOffset);
+      smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+      smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+    }
+  }
+  *outDistance = smoothDistance;
+  *outColor = smoothColor;
+  *outPosition = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_2d(float2 coord,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float2 *outPosition)
+{
+  float2 cellPosition = floor(coord);
+  float2 localPosition = coord - cellPosition;
+
+  float distanceF1 = 8.0f;
+  float distanceF2 = 8.0f;
+  float2 offsetF1 = make_float2(0.0f, 0.0f);
+  float2 positionF1 = make_float2(0.0f, 0.0f);
+  float2 offsetF2 = make_float2(0.0f, 0.0f);
+  float2 positionF2 = make_float2(0.0f, 0.0f);
+  for (int j = -1; j <= 1; j++) {
+    for (int i = -1; i <= 1; i++) {
+      float2 cellOffset = make_float2(i, j);
+      float2 pointPosition = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+      float distanceToPoint = voronoi_distance_2d(pointPosition, localPosition, metric, exponent);
+      if (distanceToPoint < distanceF1) {
+        distanceF2 = distanceF1;
+        distanceF1 = distanceToPoint;
+        offsetF2 = offsetF1;
+        offsetF1 = cellOffset;
+        positionF2 = positionF1;
+        positionF1 = pointPosition;
+      }
+      else if (distanceToPoint < distanceF2) {
+        distanceF2 = distanceToPoint;
+        offsetF2 = cellOffset;
+        positionF2 = pointPosition;
+      }
+    }
+  }
+  *outDistance = distanceF2;
+  *outColor = hash_float2_to_float3(cellPosition + offsetF2);
+  *outPosition = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_2d(float2 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
+{
+  float2 cellPosition = floor(coord);
+  float2 localPosition = coord - cellPosition;
+
+  float2 vectorToClosest = make_float2(0.0f, 0.0f);
+  float minDistance = 8.0f;
+  for (int j = -1; j <= 1; j++) {
+    for (int i = -1; i <= 1; i++) {
+      float2 cellOffset = make_float2(i, j);
+      float2 vectorToPoint = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness -
+                             localPosition;
+      float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+      if (distanceToPoint < minDistance) {
+        minDistance = distanceToPoint;
+        vectorToClosest = vectorToPoint;
+      }
+    }
+  }
+
+  minDistance = 8.0f;
+  for (int j = -1; j <= 1; j++) {
+    for (int i = -1; i <= 1; i++) {
+      float2 cellOffset = make_float2(i, j);
+      float2 vectorToPoint = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness -
+                             localPosition;
+      float2 perpendicularToEdge = vectorToPoint - vectorToClosest;
+      if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
+        float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
+                                   normalize(perpendicularToEdge));
+        minDistance = min(minDistance, distanceToEdge);
+      }
+    }
+  }
+  *outDistance = minDistance;
+}
+
+ccl_device void voronoi_n_sphere_radius_2d(float2 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
+{
+  float2 cellPosition = floor(coord);
+  float2 localPosition = coord - cellPosition;
+
+  float2 closestPoint = make_float2(0.0f, 0.0f);
+  float2 closestPointOffset = make_float2(0.0f, 0.0f);
+  float minDistance = 8.0f;
+  for (int j = -1; j <= 1; j++) {
+    for (int i = -1; i <= 1; i++) {
+      float2 cellOffset = make_float2(i, j);
+      float2 pointPosition = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+      float distanceToPoint = distance(pointPosition, localPosition);
+      if (distanceToPoint < minDistance) {
+        minDistance = distanceToPoint;
+        closestPoint = pointPosition;
+        closestPointOffset = cellOffset;
+      }
+    }
+  }
+
+  minDistance = 8.0f;
+  float2 closestPointToClosestPoint = make_float2(0.0f, 0.0f);
+  for (int j = -1; j <= 1; j++) {
+    for (int i = -1; i <= 1; i++) {
+      if (i == 0 && j == 0) {
+        continue;
+      }
+      float2 cellOffset = make_float2(i, j) + closestPointOffset;
+      float2 pointPosition = cellOffset +
+                             hash_float2_to_float2(cellPosition + cellOffset) * randomness;
+      float distanceToPoint = distance(closestPoint, pointPosition);
+      if (distanceToPoint < minDistance) {
+        minDistance = distanceToPoint;
+        closestPointToClosestPoint = pointPosition;
+      }
+    }
+  }
+  *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
+}
+
+/* **** 3D Voronoi **** */
+
+ccl_device float voronoi_distance_3d(float3 a,
+                                     float3 b,
+                                     NodeVoronoiDistanceMetric metric,
+                                     float exponent)
+{
+  if (metric == NODE_VORONOI_EUCLIDEAN) {
+    return distance(a, b);
+  }
+  else if (metric == NODE_VORONOI_MANHATTAN) {
+    return fabsf(a.x - b.x) + fabsf(a.y - b.y) + fabsf(a.z - b.z);
+  }
+  else if (metric == NODE_VORONOI_CHEBYCHEV) {
+    return max(fabsf(a.x - b.x), max(fabsf(a.y - b.y), fabsf(a.z - b.z)));
+  }
+  else if (metric == NODE_VORONOI_MINKOWSKI) {
+    return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent) +
+                    powf(fabsf(a.z - b.z), exponent),
+                1.0f / exponent);
+  }
+  else {
+    return 0.0f;
+  }
+}
+
+ccl_device void voronoi_f1_3d(float3 coord,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float3 *outPosition)
+{
+  float3 cellPosition = floor(coord);
+  float3 localPosition = coord - cellPosition;
+
+  float minDistance = 8.0f;
+  float3 targetOffset = make_float3(0.0f, 0.0f, 0.0f);
+  float3 targetPosition = make_float3(0.0f, 0.0f, 0.0f);
+  for (int k = -1; k <= 1; k++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int i = -1; i <= 1; i++) {
+        float3 cellOffset = make_float3(i, j, k);
+        float3 pointPosition = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+        float distanceToPoint = voronoi_distance_3d(
+            pointPosition, localPosition, metric, exponent);
+        if (distanceToPoint < minDistance) {
+          targetOffset = cellOffset;
+          minDistance = distanceToPoint;
+          targetPosition = pointPosition;
+        }
+      }
+    }
+  }
+  *outDistance = minDistance;
+  *outColor = hash_float3_to_float3(cellPosition + targetOffset);
+  *outPosition = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_3d(float3 coord,
+                                     float smoothness,
+                                     float exponent,
+                                     float randomness,
+                                     NodeVoronoiDistanceMetric metric,
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float3 *outPosition)
+{
+  float3 cellPosition = floor(coord);
+  float3 localPosition = coord - cellPosition;
+
+  float smoothDistance = 8.0f;
+  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+  float3 smoothPosition = make_float3(0.0f, 0.0f, 0.0f);
+  for (int k = -2; k <= 2; k++) {
+    for (int j = -2; j <= 2; j++) {
+      for (int i = -2; i <= 2; i++) {
+        float3 cellOffset = make_float3(i, j, k);
+        float3 pointPosition = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+        float distanceToPoint = voronoi_distance_3d(
+            pointPosition, localPosition, metric, exponent);
+        float h = smoothstep(
+            0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+        float correctionFactor = smoothness * h * (1.0f - h);
+        smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+        correctionFactor /= 1.0f + 3.0f * smoothness;
+        float3 cellColor = hash_float3_to_float3(cellPosition + cellOffset);
+        smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+        smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+      }
+    }
+  }
+  *outDistance = smoothDistance;
+  *outColor = smoothColor;
+  *outPosition = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_3d(float3 coord,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float3 *outPosition)
+{
+  float3 cellPosition = floor(coord);
+  float3 localPosition = coord - cellPosition;
+
+  float distanceF1 = 8.0f;
+  float distanceF2 = 8.0f;
+  float3 offsetF1 = make_float3(0.0f, 0.0f, 0.0f);
+  float3 positionF1 = make_float3(0.0f, 0.0f, 0.0f);
+  float3 offsetF2 = make_float3(0.0f, 0.0f, 0.0f);
+  float3 positionF2 = make_float3(0.0f, 0.0f, 0.0f);
+  for (int k = -1; k <= 1; k++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int i = -1; i <= 1; i++) {
+        float3 cellOffset = make_float3(i, j, k);
+        float3 pointPosition = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+        float distanceToPoint = voronoi_distance_3d(
+            pointPosition, localPosition, metric, exponent);
+        if (distanceToPoint < distanceF1) {
+          distanceF2 = distanceF1;
+          distanceF1 = distanceToPoint;
+          offsetF2 = offsetF1;
+          offsetF1 = cellOffset;
+          positionF2 = positionF1;
+          positionF1 = pointPosition;
+        }
+        else if (distanceToPoint < distanceF2) {
+          distanceF2 = distanceToPoint;
+          offsetF2 = cellOffset;
+          positionF2 = pointPosition;
+        }
+      }
+    }
+  }
+  *outDistance = distanceF2;
+  *outColor = hash_float3_to_float3(cellPosition + offsetF2);
+  *outPosition = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_3d(float3 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
+{
+  float3 cellPosition = floor(coord);
+  float3 localPosition = coord - cellPosition;
+
+  float3 vectorToClosest = make_float3(0.0f, 0.0f, 0.0f);
+  float minDistance = 8.0f;
+  for (int k = -1; k <= 1; k++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int i = -1; i <= 1; i++) {
+        float3 cellOffset = make_float3(i, j, k);
+        float3 vectorToPoint = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness -
+                               localPosition;
+        float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+        if (distanceToPoint < minDistance) {
+          minDistance = distanceToPoint;
+          vectorToClosest = vectorToPoint;
+        }
+      }
+    }
+  }
+
+  minDistance = 8.0f;
+  for (int k = -1; k <= 1; k++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int i = -1; i <= 1; i++) {
+        float3 cellOffset = make_float3(i, j, k);
+        float3 vectorToPoint = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness -
+                               localPosition;
+        float3 perpendicularToEdge = vectorToPoint - vectorToClosest;
+        if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
+          float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
+                                     normalize(perpendicularToEdge));
+          minDistance = min(minDistance, distanceToEdge);
+        }
+      }
+    }
+  }
+  *outDistance = minDistance;
+}
+
+ccl_device void voronoi_n_sphere_radius_3d(float3 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
+{
+  float3 cellPosition = floor(coord);
+  float3 localPosition = coord - cellPosition;
+
+  float3 closestPoint = make_float3(0.0f, 0.0f, 0.0f);
+  float3 closestPointOffset = make_float3(0.0f, 0.0f, 0.0f);
+  float minDistance = 8.0f;
+  for (int k = -1; k <= 1; k++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int i = -1; i <= 1; i++) {
+        float3 cellOffset = make_float3(i, j, k);
+        float3 pointPosition = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+        float distanceToPoint = distance(pointPosition, localPosition);
+        if (distanceToPoint < minDistance) {
+          minDistance = distanceToPoint;
+          closestPoint = pointPosition;
+          closestPointOffset = cellOffset;
+        }
+      }
+    }
+  }
+
+  minDistance = 8.0f;
+  float3 closestPointToClosestPoint = make_float3(0.0f, 0.0f, 0.0f);
+  for (int k = -1; k <= 1; k++) {
+    for (int j = -1; j <= 1; j++) {
+      for (int i = -1; i <= 1; i++) {
+        if (i == 0 && j == 0 && k == 0) {
+          continue;
+        }
+        float3 cellOffset = make_float3(i, j, k) + closestPointOffset;
+        float3 pointPosition = cellOffset +
+                               hash_float3_to_float3(cellPosition + cellOffset) * randomness;
+        float distanceToPoint = distance(closestPoint, pointPosition);
+        if (distanceToPoint < minDistance) {
+          minDistance = distanceToPoint;
+          closestPointToClosestPoint = pointPosition;
+        }
+      }
+    }
+  }
+  *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
+}
+
+/* **** 4D Voronoi **** */
+
+ccl_device float voronoi_distance_4d(float4 a,
+                                     float4 b,
+                                     NodeVoronoiDistanceMetric metric,
+                                     float exponent)
+{
+  if (metric == NODE_VORONOI_EUCLIDEAN) {
+    return distance(a, b);
+  }
+  else if (metric == NODE_VORONOI_MANHATTAN) {
+    return fabsf(a.x - b.x) + fabsf(a.y - b.y) + fabsf(a.z - b.z) + fabsf(a.w - b.w);
+  }
+  else if (metric == NODE_VORONOI_CHEBYCHEV) {
+    return max(fabsf(a.x - b.x), max(fabsf(a.y - b.y), max(fabsf(a.z - b.z), fabsf(a.w - b.w))));
+  }
+  else if (metric == NODE_VORONOI_MINKOWSKI) {
+    return powf(powf(fabsf(a.x - b.x), exponent) + powf(fabsf(a.y - b.y), exponent) +
+                    powf(fabsf(a.z - b.z), exponent) + powf(fabsf(a.w - b.w), exponent),
+                1.0f / exponent);
+  }
+  else {
+    return 0.0f;
+  }
+}
+
+ccl_device void voronoi_f1_4d(float4 coord,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float4 *outPosition)
+{
+  float4 cellPosition = floor(coord);
+  float4 localPosition = coord - cellPosition;
+
+  float minDistance = 8.0f;
+  float4 targetOffset = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  for (int u = -1; u <= 1; u++) {
+    for (int k = -1; k <= 1; k++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
+        for (int i = -1; i <= 1; i++) {
+          float4 cellOffset = make_float4(i, j, k, u);
+          float4 pointPosition = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+          float distanceToPoint = voronoi_distance_4d(
+              pointPosition, localPosition, metric, exponent);
+          if (distanceToPoint < minDistance) {
+            targetOffset = cellOffset;
+            minDistance = distanceToPoint;
+            targetPosition = pointPosition;
+          }
+        }
+      }
+    }
+  }
+  *outDistance = minDistance;
+  *outColor = hash_float4_to_float3(cellPosition + targetOffset);
+  *outPosition = targetPosition + cellPosition;
+}
+
+ccl_device void voronoi_smooth_f1_4d(float4 coord,
+                                     float smoothness,
+                                     float exponent,
+                                     float randomness,
+                                     NodeVoronoiDistanceMetric metric,
+                                     ccl_private float *outDistance,
+                                     ccl_private float3 *outColor,
+                                     ccl_private float4 *outPosition)
+{
+  float4 cellPosition = floor(coord);
+  float4 localPosition = coord - cellPosition;
+
+  float smoothDistance = 8.0f;
+  float3 smoothColor = make_float3(0.0f, 0.0f, 0.0f);
+  float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  for (int u = -2; u <= 2; u++) {
+    for (int k = -2; k <= 2; k++) {
+      ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
+      {
+        for (int i = -2; i <= 2; i++) {
+          float4 cellOffset = make_float4(i, j, k, u);
+          float4 pointPosition = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+          float distanceToPoint = voronoi_distance_4d(
+              pointPosition, localPosition, metric, exponent);
+          float h = smoothstep(
+              0.0f, 1.0f, 0.5f + 0.5f * (smoothDistance - distanceToPoint) / smoothness);
+          float correctionFactor = smoothness * h * (1.0f - h);
+          smoothDistance = mix(smoothDistance, distanceToPoint, h) - correctionFactor;
+          correctionFactor /= 1.0f + 3.0f * smoothness;
+          float3 cellColor = hash_float4_to_float3(cellPosition + cellOffset);
+          smoothColor = mix(smoothColor, cellColor, h) - correctionFactor;
+          smoothPosition = mix(smoothPosition, pointPosition, h) - correctionFactor;
+        }
+      }
+    }
+  }
+  *outDistance = smoothDistance;
+  *outColor = smoothColor;
+  *outPosition = cellPosition + smoothPosition;
+}
+
+ccl_device void voronoi_f2_4d(float4 coord,
+                              float exponent,
+                              float randomness,
+                              NodeVoronoiDistanceMetric metric,
+                              ccl_private float *outDistance,
+                              ccl_private float3 *outColor,
+                              ccl_private float4 *outPosition)
+{
+  float4 cellPosition = floor(coord);
+  float4 localPosition = coord - cellPosition;
+
+  float distanceF1 = 8.0f;
+  float distanceF2 = 8.0f;
+  float4 offsetF1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float4 positionF1 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float4 offsetF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  for (int u = -1; u <= 1; u++) {
+    for (int k = -1; k <= 1; k++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
+        for (int i = -1; i <= 1; i++) {
+          float4 cellOffset = make_float4(i, j, k, u);
+          float4 pointPosition = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+          float distanceToPoint = voronoi_distance_4d(
+              pointPosition, localPosition, metric, exponent);
+          if (distanceToPoint < distanceF1) {
+            distanceF2 = distanceF1;
+            distanceF1 = distanceToPoint;
+            offsetF2 = offsetF1;
+            offsetF1 = cellOffset;
+            positionF2 = positionF1;
+            positionF1 = pointPosition;
+          }
+          else if (distanceToPoint < distanceF2) {
+            distanceF2 = distanceToPoint;
+            offsetF2 = cellOffset;
+            positionF2 = pointPosition;
+          }
+        }
+      }
+    }
+  }
+  *outDistance = distanceF2;
+  *outColor = hash_float4_to_float3(cellPosition + offsetF2);
+  *outPosition = positionF2 + cellPosition;
+}
+
+ccl_device void voronoi_distance_to_edge_4d(float4 coord,
+                                            float randomness,
+                                            ccl_private float *outDistance)
+{
+  float4 cellPosition = floor(coord);
+  float4 localPosition = coord - cellPosition;
+
+  float4 vectorToClosest = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float minDistance = 8.0f;
+  for (int u = -1; u <= 1; u++) {
+    for (int k = -1; k <= 1; k++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
+        for (int i = -1; i <= 1; i++) {
+          float4 cellOffset = make_float4(i, j, k, u);
+          float4 vectorToPoint = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness -
+                                 localPosition;
+          float distanceToPoint = dot(vectorToPoint, vectorToPoint);
+          if (distanceToPoint < minDistance) {
+            minDistance = distanceToPoint;
+            vectorToClosest = vectorToPoint;
+          }
+        }
+      }
+    }
+  }
+
+  minDistance = 8.0f;
+  for (int u = -1; u <= 1; u++) {
+    for (int k = -1; k <= 1; k++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
+        for (int i = -1; i <= 1; i++) {
+          float4 cellOffset = make_float4(i, j, k, u);
+          float4 vectorToPoint = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness -
+                                 localPosition;
+          float4 perpendicularToEdge = vectorToPoint - vectorToClosest;
+          if (dot(perpendicularToEdge, perpendicularToEdge) > 0.0001f) {
+            float distanceToEdge = dot((vectorToClosest + vectorToPoint) / 2.0f,
+                                       normalize(perpendicularToEdge));
+            minDistance = min(minDistance, distanceToEdge);
+          }
+        }
+      }
+    }
+  }
+  *outDistance = minDistance;
+}
+
+ccl_device void voronoi_n_sphere_radius_4d(float4 coord,
+                                           float randomness,
+                                           ccl_private float *outRadius)
+{
+  float4 cellPosition = floor(coord);
+  float4 localPosition = coord - cellPosition;
+
+  float4 closestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float4 closestPointOffset = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  float minDistance = 8.0f;
+  for (int u = -1; u <= 1; u++) {
+    for (int k = -1; k <= 1; k++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
+        for (int i = -1; i <= 1; i++) {
+          float4 cellOffset = make_float4(i, j, k, u);
+          float4 pointPosition = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+          float distanceToPoint = distance(pointPosition, localPosition);
+          if (distanceToPoint < minDistance) {
+            minDistance = distanceToPoint;
+            closestPoint = pointPosition;
+            closestPointOffset = cellOffset;
+          }
+        }
+      }
+    }
+  }
+
+  minDistance = 8.0f;
+  float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+  for (int u = -1; u <= 1; u++) {
+    for (int k = -1; k <= 1; k++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
+        for (int i = -1; i <= 1; i++) {
+          if (i == 0 && j == 0 && k == 0 && u == 0) {
+            continue;
+          }
+          float4 cellOffset = make_float4(i, j, k, u) + closestPointOffset;
+          float4 pointPosition = cellOffset +
+                                 hash_float4_to_float4(cellPosition + cellOffset) * randomness;
+          float distanceToPoint = distance(closestPoint, pointPosition);
+          if (distanceToPoint < minDistance) {
+            minDistance = distanceToPoint;
+            closestPointToClosestPoint = pointPosition;
+          }
+        }
+      }
+    }
+  }
+  *outRadius = distance(closestPointToClosestPoint, closestPoint) / 2.0f;
+}
+
+template<uint node_feature_mask>
+ccl_device_noinline int svm_node_tex_voronoi(KernelGlobals kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint dimensions,
+                                             uint feature,
+                                             uint metric,
+                                             int offset)
+{
+  uint4 stack_offsets = read_node(kg, &offset);
+  uint4 defaults = read_node(kg, &offset);
+
+  uint coord_stack_offset, w_stack_offset, scale_stack_offset, smoothness_stack_offset;
+  uint exponent_stack_offset, randomness_stack_offset, distance_out_stack_offset,
+      color_out_stack_offset;
+  uint position_out_stack_offset, w_out_stack_offset, radius_out_stack_offset;
+
+  svm_unpack_node_uchar4(stack_offsets.x,
+                         &coord_stack_offset,
+                         &w_stack_offset,
+                         &scale_stack_offset,
+                         &smoothness_stack_offset);
+  svm_unpack_node_uchar4(stack_offsets.y,
+                         &exponent_stack_offset,
+                         &randomness_stack_offset,
+                         &distance_out_stack_offset,
+                         &color_out_stack_offset);
+  svm_unpack_node_uchar3(
+      stack_offsets.z, &position_out_stack_offset, &w_out_stack_offset, &radius_out_stack_offset);
+
+  float3 coord = stack_load_float3(stack, coord_stack_offset);
+  float w = stack_load_float_default(stack, w_stack_offset, stack_offsets.w);
+  float scale = stack_load_float_default(stack, scale_stack_offset, defaults.x);
+  float smoothness = stack_load_float_default(stack, smoothness_stack_offset, defaults.y);
+  float exponent = stack_load_float_default(stack, exponent_stack_offset, defaults.z);
+  float randomness = stack_load_float_default(stack, randomness_stack_offset, defaults.w);
+
+  NodeVoronoiFeature voronoi_feature = (NodeVoronoiFeature)feature;
+  NodeVoronoiDistanceMetric voronoi_metric = (NodeVoronoiDistanceMetric)metric;
+
+  float distance_out = 0.0f, w_out = 0.0f, radius_out = 0.0f;
+  float3 color_out = make_float3(0.0f, 0.0f, 0.0f);
+  float3 position_out = make_float3(0.0f, 0.0f, 0.0f);
+
+  randomness = clamp(randomness, 0.0f, 1.0f);
+  smoothness = clamp(smoothness / 2.0f, 0.0f, 0.5f);
+
+  w *= scale;
+  coord *= scale;
+
+  switch (dimensions) {
+    case 1: {
+      switch (voronoi_feature) {
+        case NODE_VORONOI_F1:
+          voronoi_f1_1d(
+              w, exponent, randomness, voronoi_metric, &distance_out, &color_out, &w_out);
+          break;
+        case NODE_VORONOI_SMOOTH_F1:
+          voronoi_smooth_f1_1d(w,
+                               smoothness,
+                               exponent,
+                               randomness,
+                               voronoi_metric,
+                               &distance_out,
+                               &color_out,
+                               &w_out);
+          break;
+        case NODE_VORONOI_F2:
+          voronoi_f2_1d(
+              w, exponent, randomness, voronoi_metric, &distance_out, &color_out, &w_out);
+          break;
+        case NODE_VORONOI_DISTANCE_TO_EDGE:
+          voronoi_distance_to_edge_1d(w, randomness, &distance_out);
+          break;
+        case NODE_VORONOI_N_SPHERE_RADIUS:
+          voronoi_n_sphere_radius_1d(w, randomness, &radius_out);
+          break;
+        default:
+          kernel_assert(0);
+      }
+      w_out = safe_divide(w_out, scale);
+      break;
+    }
+    case 2: {
+      float2 coord_2d = make_float2(coord.x, coord.y);
+      float2 position_out_2d = zero_float2();
+      switch (voronoi_feature) {
+        case NODE_VORONOI_F1:
+          voronoi_f1_2d(coord_2d,
+                        exponent,
+                        randomness,
+                        voronoi_metric,
+                        &distance_out,
+                        &color_out,
+                        &position_out_2d);
+          break;
+        case NODE_VORONOI_SMOOTH_F1:
+          IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
+          {
+            voronoi_smooth_f1_2d(coord_2d,
+                                 smoothness,
+                                 exponent,
+                                 randomness,
+                                 voronoi_metric,
+                                 &distance_out,
+                                 &color_out,
+                                 &position_out_2d);
+          }
+          break;
+        case NODE_VORONOI_F2:
+          voronoi_f2_2d(coord_2d,
+                        exponent,
+                        randomness,
+                        voronoi_metric,
+                        &distance_out,
+                        &color_out,
+                        &position_out_2d);
+          break;
+        case NODE_VORONOI_DISTANCE_TO_EDGE:
+          voronoi_distance_to_edge_2d(coord_2d, randomness, &distance_out);
+          break;
+        case NODE_VORONOI_N_SPHERE_RADIUS:
+          voronoi_n_sphere_radius_2d(coord_2d, randomness, &radius_out);
+          break;
+        default:
+          kernel_assert(0);
+      }
+      position_out_2d = safe_divide_float2_float(position_out_2d, scale);
+      position_out = make_float3(position_out_2d.x, position_out_2d.y, 0.0f);
+      break;
+    }
+    case 3: {
+      switch (voronoi_feature) {
+        case NODE_VORONOI_F1:
+          voronoi_f1_3d(coord,
+                        exponent,
+                        randomness,
+                        voronoi_metric,
+                        &distance_out,
+                        &color_out,
+                        &position_out);
+          break;
+        case NODE_VORONOI_SMOOTH_F1:
+          IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
+          {
+            voronoi_smooth_f1_3d(coord,
+                                 smoothness,
+                                 exponent,
+                                 randomness,
+                                 voronoi_metric,
+                                 &distance_out,
+                                 &color_out,
+                                 &position_out);
+          }
+          break;
+        case NODE_VORONOI_F2:
+          voronoi_f2_3d(coord,
+                        exponent,
+                        randomness,
+                        voronoi_metric,
+                        &distance_out,
+                        &color_out,
+                        &position_out);
+          break;
+        case NODE_VORONOI_DISTANCE_TO_EDGE:
+          voronoi_distance_to_edge_3d(coord, randomness, &distance_out);
+          break;
+        case NODE_VORONOI_N_SPHERE_RADIUS:
+          voronoi_n_sphere_radius_3d(coord, randomness, &radius_out);
+          break;
+        default:
+          kernel_assert(0);
+      }
+      position_out = safe_divide_float3_float(position_out, scale);
+      break;
+    }
+
+    case 4: {
+      IF_KERNEL_NODES_FEATURE(VORONOI_EXTRA)
+      {
+        float4 coord_4d = make_float4(coord.x, coord.y, coord.z, w);
+        float4 position_out_4d;
+        switch (voronoi_feature) {
+          case NODE_VORONOI_F1:
+            voronoi_f1_4d(coord_4d,
+                          exponent,
+                          randomness,
+                          voronoi_metric,
+                          &distance_out,
+                          &color_out,
+                          &position_out_4d);
+            break;
+          case NODE_VORONOI_SMOOTH_F1:
+            voronoi_smooth_f1_4d(coord_4d,
+                                 smoothness,
+                                 exponent,
+                                 randomness,
+                                 voronoi_metric,
+                                 &distance_out,
+                                 &color_out,
+                                 &position_out_4d);
+            break;
+          case NODE_VORONOI_F2:
+            voronoi_f2_4d(coord_4d,
+                          exponent,
+                          randomness,
+                          voronoi_metric,
+                          &distance_out,
+                          &color_out,
+                          &position_out_4d);
+            break;
+          case NODE_VORONOI_DISTANCE_TO_EDGE:
+            voronoi_distance_to_edge_4d(coord_4d, randomness, &distance_out);
+            break;
+          case NODE_VORONOI_N_SPHERE_RADIUS:
+            voronoi_n_sphere_radius_4d(coord_4d, randomness, &radius_out);
+            break;
+          default:
+            kernel_assert(0);
+        }
+        position_out_4d = safe_divide_float4_float(position_out_4d, scale);
+        position_out = make_float3(position_out_4d.x, position_out_4d.y, position_out_4d.z);
+        w_out = position_out_4d.w;
+      }
+      break;
+    }
+    default:
+      kernel_assert(0);
+  }
+
+  if (stack_valid(distance_out_stack_offset))
+    stack_store_float(stack, distance_out_stack_offset, distance_out);
+  if (stack_valid(color_out_stack_offset))
+    stack_store_float3(stack, color_out_stack_offset, color_out);
+  if (stack_valid(position_out_stack_offset))
+    stack_store_float3(stack, position_out_stack_offset, position_out);
+  if (stack_valid(w_out_stack_offset))
+    stack_store_float(stack, w_out_stack_offset, w_out);
+  if (stack_valid(radius_out_stack_offset))
+    stack_store_float(stack, radius_out_stack_offset, radius_out);
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/voxel.h b/intern/cycles/kernel/svm/voxel.h
new file mode 100644
index 00000000000..43947fbc54f
--- /dev/null
+++ b/intern/cycles/kernel/svm/voxel.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO(sergey): Think of making it more generic volume-type attribute
+ * sampler.
+ */
+ccl_device_noinline int svm_node_tex_voxel(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint co_offset, density_out_offset, color_out_offset, space;
+  svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
+#ifdef __VOLUME__
+  int id = node.y;
+  float3 co = stack_load_float3(stack, co_offset);
+  if (space == NODE_TEX_VOXEL_SPACE_OBJECT) {
+    co = volume_normalized_position(kg, sd, co);
+  }
+  else {
+    kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD);
+    Transform tfm;
+    tfm.x = read_node_float(kg, &offset);
+    tfm.y = read_node_float(kg, &offset);
+    tfm.z = read_node_float(kg, &offset);
+    co = transform_point(&tfm, co);
+  }
+
+  float4 r = kernel_tex_image_interp_3d(kg, id, co, INTERPOLATION_NONE);
+#else
+  float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#endif
+  if (stack_valid(density_out_offset))
+    stack_store_float(stack, density_out_offset, r.w);
+  if (stack_valid(color_out_offset))
+    stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z));
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/wave.h b/intern/cycles/kernel/svm/wave.h
new file mode 100644
index 00000000000..40e71b9d5df
--- /dev/null
+++ b/intern/cycles/kernel/svm/wave.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Wave */
+
+ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
+                                       NodeWaveBandsDirection bands_dir,
+                                       NodeWaveRingsDirection rings_dir,
+                                       NodeWaveProfile profile,
+                                       float3 p,
+                                       float distortion,
+                                       float detail,
+                                       float dscale,
+                                       float droughness,
+                                       float phase)
+{
+  /* Prevent precision issues on unit coordinates. */
+  p = (p + 0.000001f) * 0.999999f;
+
+  float n;
+
+  if (type == NODE_WAVE_BANDS) {
+    if (bands_dir == NODE_WAVE_BANDS_DIRECTION_X) {
+      n = p.x * 20.0f;
+    }
+    else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Y) {
+      n = p.y * 20.0f;
+    }
+    else if (bands_dir == NODE_WAVE_BANDS_DIRECTION_Z) {
+      n = p.z * 20.0f;
+    }
+    else { /* NODE_WAVE_BANDS_DIRECTION_DIAGONAL */
+      n = (p.x + p.y + p.z) * 10.0f;
+    }
+  }
+  else { /* NODE_WAVE_RINGS */
+    float3 rp = p;
+    if (rings_dir == NODE_WAVE_RINGS_DIRECTION_X) {
+      rp *= make_float3(0.0f, 1.0f, 1.0f);
+    }
+    else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Y) {
+      rp *= make_float3(1.0f, 0.0f, 1.0f);
+    }
+    else if (rings_dir == NODE_WAVE_RINGS_DIRECTION_Z) {
+      rp *= make_float3(1.0f, 1.0f, 0.0f);
+    }
+    /* else: NODE_WAVE_RINGS_DIRECTION_SPHERICAL */
+
+    n = len(rp) * 20.0f;
+  }
+
+  n += phase;
+
+  if (distortion != 0.0f)
+    n += distortion * (fractal_noise_3d(p * dscale, detail, droughness) * 2.0f - 1.0f);
+
+  if (profile == NODE_WAVE_PROFILE_SIN) {
+    return 0.5f + 0.5f * sinf(n - M_PI_2_F);
+  }
+  else if (profile == NODE_WAVE_PROFILE_SAW) {
+    n /= M_2PI_F;
+    return n - floorf(n);
+  }
+  else { /* NODE_WAVE_PROFILE_TRI */
+    n /= M_2PI_F;
+    return fabsf(n - floorf(n + 0.5f)) * 2.0f;
+  }
+}
+
+ccl_device_noinline int svm_node_tex_wave(
+    KernelGlobals kg, ccl_private ShaderData *sd, ccl_private float *stack, uint4 node, int offset)
+{
+  uint4 node2 = read_node(kg, &offset);
+  uint4 node3 = read_node(kg, &offset);
+
+  /* RNA properties */
+  uint type_offset, bands_dir_offset, rings_dir_offset, profile_offset;
+  /* Inputs, Outputs */
+  uint co_offset, scale_offset, distortion_offset, detail_offset, dscale_offset, droughness_offset,
+      phase_offset;
+  uint color_offset, fac_offset;
+
+  svm_unpack_node_uchar4(
+      node.y, &type_offset, &bands_dir_offset, &rings_dir_offset, &profile_offset);
+  svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
+  svm_unpack_node_uchar4(
+      node.w, &detail_offset, &dscale_offset, &droughness_offset, &phase_offset);
+  svm_unpack_node_uchar2(node2.x, &color_offset, &fac_offset);
+
+  float3 co = stack_load_float3(stack, co_offset);
+  float scale = stack_load_float_default(stack, scale_offset, node2.y);
+  float distortion = stack_load_float_default(stack, distortion_offset, node2.z);
+  float detail = stack_load_float_default(stack, detail_offset, node2.w);
+  float dscale = stack_load_float_default(stack, dscale_offset, node3.x);
+  float droughness = stack_load_float_default(stack, droughness_offset, node3.y);
+  float phase = stack_load_float_default(stack, phase_offset, node3.z);
+
+  float f = svm_wave((NodeWaveType)type_offset,
+                     (NodeWaveBandsDirection)bands_dir_offset,
+                     (NodeWaveRingsDirection)rings_dir_offset,
+                     (NodeWaveProfile)profile_offset,
+                     co * scale,
+                     distortion,
+                     detail,
+                     dscale,
+                     droughness,
+                     phase);
+
+  if (stack_valid(fac_offset))
+    stack_store_float(stack, fac_offset, f);
+  if (stack_valid(color_offset))
+    stack_store_float3(stack, color_offset, make_float3(f, f, f));
+  return offset;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/wavelength.h b/intern/cycles/kernel/svm/wavelength.h
new file mode 100644
index 00000000000..28fd172abc7
--- /dev/null
+++ b/intern/cycles/kernel/svm/wavelength.h
@@ -0,0 +1,103 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2013, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Wavelength to RGB */
+
+ccl_device_noinline void svm_node_wavelength(KernelGlobals kg,
+                                             ccl_private ShaderData *sd,
+                                             ccl_private float *stack,
+                                             uint wavelength,
+                                             uint color_out)
+{
+  // CIE colour matching functions xBar, yBar, and zBar for
+  //   wavelengths from 380 through 780 nanometers, every 5
+  //   nanometers.  For a wavelength lambda in this range:
+  //        cie_colour_match[(lambda - 380) / 5][0] = xBar
+  //        cie_colour_match[(lambda - 380) / 5][1] = yBar
+  //        cie_colour_match[(lambda - 380) / 5][2] = zBar
+  const float cie_colour_match[81][3] = {
+      {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f},
+      {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f},
+      {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f},
+      {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f},
+      {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f},
+      {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f},
+      {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f},
+      {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f},
+      {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f},
+      {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f},
+      {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f},
+      {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f},
+      {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f},
+      {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f},
+      {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f},
+      {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f},
+      {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f},
+      {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f},
+      {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f},
+      {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f},
+      {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f},
+      {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f},
+      {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f},
+      {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f},
+      {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f},
+      {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f},
+      {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}};
+
+  float lambda_nm = stack_load_float(stack, wavelength);
+  float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f);  // scaled 0..80
+  int i = float_to_int(ii);
+  float3 color;
+
+  if (i < 0 || i >= 80) {
+    color = make_float3(0.0f, 0.0f, 0.0f);
+  }
+  else {
+    ii -= i;
+    ccl_constant float *c = cie_colour_match[i];
+    color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii);
+  }
+
+  color = xyz_to_rgb(kg, color);
+  color *= 1.0f / 2.52f;  // Empirical scale from lg to make all comps <= 1
+
+  /* Clamp to zero if values are smaller */
+  color = max(color, make_float3(0.0f, 0.0f, 0.0f));
+
+  stack_store_float3(stack, color_out, color);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/white_noise.h b/intern/cycles/kernel/svm/white_noise.h
new file mode 100644
index 00000000000..d275a3f7068
--- /dev/null
+++ b/intern/cycles/kernel/svm/white_noise.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_noinline void svm_node_tex_white_noise(KernelGlobals kg,
+                                                  ccl_private ShaderData *sd,
+                                                  ccl_private float *stack,
+                                                  uint dimensions,
+                                                  uint inputs_stack_offsets,
+                                                  uint ouptuts_stack_offsets)
+{
+  uint vector_stack_offset, w_stack_offset, value_stack_offset, color_stack_offset;
+  svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset);
+  svm_unpack_node_uchar2(ouptuts_stack_offsets, &value_stack_offset, &color_stack_offset);
+
+  float3 vector = stack_load_float3(stack, vector_stack_offset);
+  float w = stack_load_float(stack, w_stack_offset);
+
+  if (stack_valid(color_stack_offset)) {
+    float3 color;
+    switch (dimensions) {
+      case 1:
+        color = hash_float_to_float3(w);
+        break;
+      case 2:
+        color = hash_float2_to_float3(make_float2(vector.x, vector.y));
+        break;
+      case 3:
+        color = hash_float3_to_float3(vector);
+        break;
+      case 4:
+        color = hash_float4_to_float3(make_float4(vector.x, vector.y, vector.z, w));
+        break;
+      default:
+        color = make_float3(1.0f, 0.0f, 1.0f);
+        kernel_assert(0);
+        break;
+    }
+    stack_store_float3(stack, color_stack_offset, color);
+  }
+
+  if (stack_valid(value_stack_offset)) {
+    float value;
+    switch (dimensions) {
+      case 1:
+        value = hash_float_to_float(w);
+        break;
+      case 2:
+        value = hash_float2_to_float(make_float2(vector.x, vector.y));
+        break;
+      case 3:
+        value = hash_float3_to_float(vector);
+        break;
+      case 4:
+        value = hash_float4_to_float(make_float4(vector.x, vector.y, vector.z, w));
+        break;
+      default:
+        value = 0.0f;
+        kernel_assert(0);
+        break;
+    }
+    stack_store_float(stack, value_stack_offset, value);
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/wireframe.h b/intern/cycles/kernel/svm/wireframe.h
new file mode 100644
index 00000000000..530a9601bce
--- /dev/null
+++ b/intern/cycles/kernel/svm/wireframe.h
@@ -0,0 +1,127 @@
+/*
+ * Adapted from Open Shading Language with this license:
+ *
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2013, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of Sony Pictures Imageworks nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Wireframe Node */
+
+ccl_device_inline float wireframe(KernelGlobals kg,
+                                  ccl_private ShaderData *sd,
+                                  float size,
+                                  int pixel_size,
+                                  ccl_private float3 *P)
+{
+#ifdef __HAIR__
+  if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
+#else
+  if (sd->prim != PRIM_NONE)
+#endif
+  {
+    float3 Co[3];
+    float pixelwidth = 1.0f;
+
+    /* Triangles */
+    int np = 3;
+
+    if (sd->type & PRIMITIVE_TRIANGLE)
+      triangle_vertices(kg, sd->prim, Co);
+    else
+      motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
+
+    if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+      object_position_transform(kg, sd, &Co[0]);
+      object_position_transform(kg, sd, &Co[1]);
+      object_position_transform(kg, sd, &Co[2]);
+    }
+
+    if (pixel_size) {
+      // Project the derivatives of P to the viewing plane defined
+      // by I so we have a measure of how big is a pixel at this point
+      float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
+      float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
+      // Take the average of both axis' length
+      pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
+    }
+
+    // Use half the width as the neighbor face will render the
+    // other half. And take the square for fast comparison
+    pixelwidth *= 0.5f * size;
+    pixelwidth *= pixelwidth;
+    for (int i = 0; i < np; i++) {
+      int i2 = i ? i - 1 : np - 1;
+      float3 dir = *P - Co[i];
+      float3 edge = Co[i] - Co[i2];
+      float3 crs = cross(edge, dir);
+      // At this point dot(crs, crs) / dot(edge, edge) is
+      // the square of area / length(edge) == square of the
+      // distance to the edge.
+      if (dot(crs, crs) < (dot(edge, edge) * pixelwidth))
+        return 1.0f;
+    }
+  }
+  return 0.0f;
+}
+
+ccl_device_noinline void svm_node_wireframe(KernelGlobals kg,
+                                            ccl_private ShaderData *sd,
+                                            ccl_private float *stack,
+                                            uint4 node)
+{
+  uint in_size = node.y;
+  uint out_fac = node.z;
+  uint use_pixel_size, bump_offset;
+  svm_unpack_node_uchar2(node.w, &use_pixel_size, &bump_offset);
+
+  /* Input Data */
+  float size = stack_load_float(stack, in_size);
+  int pixel_size = (int)use_pixel_size;
+
+  /* Calculate wireframe */
+  float f = wireframe(kg, sd, size, pixel_size, &sd->P);
+
+  /* TODO(sergey): Think of faster way to calculate derivatives. */
+  if (bump_offset == NODE_BUMP_OFFSET_DX) {
+    float3 Px = sd->P - sd->dP.dx;
+    f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
+  }
+  else if (bump_offset == NODE_BUMP_OFFSET_DY) {
+    float3 Py = sd->P - sd->dP.dy;
+    f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
+  }
+
+  if (stack_valid(out_fac))
+    stack_store_float(stack, out_fac, f);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/textures.h b/intern/cycles/kernel/textures.h
new file mode 100644
index 00000000000..464ecb183cb
--- /dev/null
+++ b/intern/cycles/kernel/textures.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KERNEL_TEX
+#  define KERNEL_TEX(type, name)
+#endif
+
+/* BVH2, not used for OptiX or Embree. */
+KERNEL_TEX(float4, __bvh_nodes)
+KERNEL_TEX(float4, __bvh_leaf_nodes)
+KERNEL_TEX(uint, __prim_type)
+KERNEL_TEX(uint, __prim_visibility)
+KERNEL_TEX(uint, __prim_index)
+KERNEL_TEX(uint, __prim_object)
+KERNEL_TEX(uint, __object_node)
+KERNEL_TEX(float2, __prim_time)
+
+/* objects */
+KERNEL_TEX(KernelObject, __objects)
+KERNEL_TEX(Transform, __object_motion_pass)
+KERNEL_TEX(DecomposedTransform, __object_motion)
+KERNEL_TEX(uint, __object_flag)
+KERNEL_TEX(float, __object_volume_step)
+
+/* cameras */
+KERNEL_TEX(DecomposedTransform, __camera_motion)
+
+/* triangles */
+KERNEL_TEX(uint, __tri_shader)
+KERNEL_TEX(float4, __tri_vnormal)
+KERNEL_TEX(uint4, __tri_vindex)
+KERNEL_TEX(uint, __tri_patch)
+KERNEL_TEX(float2, __tri_patch_uv)
+KERNEL_TEX(float4, __tri_verts)
+
+/* curves */
+KERNEL_TEX(KernelCurve, __curves)
+KERNEL_TEX(float4, __curve_keys)
+KERNEL_TEX(KernelCurveSegment, __curve_segments)
+
+/* patches */
+KERNEL_TEX(uint, __patches)
+
+/* attributes */
+KERNEL_TEX(uint4, __attributes_map)
+KERNEL_TEX(float, __attributes_float)
+KERNEL_TEX(float2, __attributes_float2)
+KERNEL_TEX(float4, __attributes_float3)
+KERNEL_TEX(uchar4, __attributes_uchar4)
+
+/* lights */
+KERNEL_TEX(KernelLightDistribution, __light_distribution)
+KERNEL_TEX(KernelLight, __lights)
+KERNEL_TEX(float2, __light_background_marginal_cdf)
+KERNEL_TEX(float2, __light_background_conditional_cdf)
+
+/* particles */
+KERNEL_TEX(KernelParticle, __particles)
+
+/* shaders */
+KERNEL_TEX(uint4, __svm_nodes)
+KERNEL_TEX(KernelShader, __shaders)
+
+/* lookup tables */
+KERNEL_TEX(float, __lookup_table)
+
+/* sobol */
+KERNEL_TEX(float, __sample_pattern_lut)
+
+/* image textures */
+KERNEL_TEX(TextureInfo, __texture_info)
+
+/* ies lights */
+KERNEL_TEX(float, __ies)
+
+#undef KERNEL_TEX
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
new file mode 100644
index 00000000000..4109dd6a486
--- /dev/null
+++ b/intern/cycles/kernel/types.h
@@ -0,0 +1,1608 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#if !defined(__KERNEL_GPU__) && defined(WITH_EMBREE)
+#  include <embree3/rtcore.h>
+#  include <embree3/rtcore_scene.h>
+#  define __EMBREE__
+#endif
+
+#include "util/math.h"
+#include "util/math_fast.h"
+#include "util/math_intersect.h"
+#include "util/projection.h"
+#include "util/static_assert.h"
+#include "util/texture.h"
+#include "util/transform.h"
+
+#include "kernel/svm/types.h"
+
+#ifndef __KERNEL_GPU__
+#  define __KERNEL_CPU__
+#endif
+
+/* TODO(sergey): This is only to make it possible to include this header
+ * from outside of the kernel. but this could be done somewhat cleaner?
+ */
+#ifndef ccl_addr_space
+#  define ccl_addr_space
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Constants */
+#define OBJECT_MOTION_PASS_SIZE 2
+#define FILTER_TABLE_SIZE 1024
+#define RAMP_TABLE_SIZE 256
+#define SHUTTER_TABLE_SIZE 256
+
+#define BSSRDF_MIN_RADIUS 1e-8f
+#define BSSRDF_MAX_HITS 4
+#define BSSRDF_MAX_BOUNCES 256
+#define LOCAL_MAX_HITS 4
+
+#define VOLUME_BOUNDS_MAX 1024
+
+#define BECKMANN_TABLE_SIZE 256
+
+#define SHADER_NONE (~0)
+#define OBJECT_NONE (~0)
+#define PRIM_NONE (~0)
+#define LAMP_NONE (~0)
+#define ID_NONE (0.0f)
+#define PASS_UNUSED (~0)
+
+#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
+#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
+
+#ifdef __KERNEL_CPU__
+#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
+#else
+#  define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
+#endif
+
+/* Kernel features */
+#define __SOBOL__
+#define __DPDU__
+#define __BACKGROUND__
+#define __CAUSTICS_TRICKS__
+#define __VISIBILITY_FLAG__
+#define __RAY_DIFFERENTIALS__
+#define __CAMERA_CLIPPING__
+#define __INTERSECTION_REFINE__
+#define __CLAMP_SAMPLE__
+#define __PATCH_EVAL__
+#define __SHADOW_CATCHER__
+#define __DENOISING_FEATURES__
+#define __SHADER_RAYTRACE__
+#define __AO__
+#define __PASSES__
+#define __HAIR__
+#define __SVM__
+#define __EMISSION__
+#define __HOLDOUT__
+#define __TRANSPARENT_SHADOWS__
+#define __BACKGROUND_MIS__
+#define __LAMP_MIS__
+#define __CAMERA_MOTION__
+#define __OBJECT_MOTION__
+#define __BAKING__
+#define __PRINCIPLED__
+#define __SUBSURFACE__
+#define __VOLUME__
+#define __CMJ__
+#define __SHADOW_RECORD_ALL__
+#define __BRANCHED_PATH__
+
+/* Device specific features */
+#ifdef __KERNEL_CPU__
+#  ifdef WITH_OSL
+#    define __OSL__
+#  endif
+#  define __VOLUME_RECORD_ALL__
+#endif /* __KERNEL_CPU__ */
+
+#ifdef __KERNEL_OPTIX__
+#  undef __BAKING__
+#endif /* __KERNEL_OPTIX__ */
+
+/* Scene-based selective features compilation. */
+#ifdef __KERNEL_FEATURES__
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION)
+#    undef __CAMERA_MOTION__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_OBJECT_MOTION)
+#    undef __OBJECT_MOTION__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_HAIR)
+#    undef __HAIR__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_VOLUME)
+#    undef __VOLUME__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE)
+#    undef __SUBSURFACE__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING)
+#    undef __BAKING__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION)
+#    undef __PATCH_EVAL__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_TRANSPARENT)
+#    undef __TRANSPARENT_SHADOWS__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_SHADOW_CATCHER)
+#    undef __SHADOW_CATCHER__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_PRINCIPLED)
+#    undef __PRINCIPLED__
+#  endif
+#  if !(__KERNEL_FEATURES & KERNEL_FEATURE_DENOISING)
+#    undef __DENOISING_FEATURES__
+#  endif
+#endif
+
+#ifdef WITH_CYCLES_DEBUG_NAN
+#  define __KERNEL_DEBUG_NAN__
+#endif
+
+/* Features that enable others */
+
+#if defined(__SUBSURFACE__) || defined(__SHADER_RAYTRACE__)
+#  define __BVH_LOCAL__
+#endif
+
+/* Path Tracing
+ * note we need to keep the u/v pairs at even values */
+
+enum PathTraceDimension {
+  PRNG_FILTER_U = 0,
+  PRNG_FILTER_V = 1,
+  PRNG_LENS_U = 2,
+  PRNG_LENS_V = 3,
+  PRNG_TIME = 4,
+  PRNG_UNUSED_0 = 5,
+  PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
+  PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
+  PRNG_BASE_NUM = 10,
+
+  PRNG_BSDF_U = 0,
+  PRNG_BSDF_V = 1,
+  PRNG_LIGHT_U = 2,
+  PRNG_LIGHT_V = 3,
+  PRNG_LIGHT_TERMINATE = 4,
+  PRNG_TERMINATE = 5,
+  PRNG_PHASE_CHANNEL = 6,
+  PRNG_SCATTER_DISTANCE = 7,
+  PRNG_BOUNCE_NUM = 8,
+
+  PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
+  PRNG_BEVEL_V = 7,
+};
+
+enum SamplingPattern {
+  SAMPLING_PATTERN_SOBOL = 0,
+  SAMPLING_PATTERN_PMJ = 1,
+
+  SAMPLING_NUM_PATTERNS,
+};
+
+/* These flags values correspond to `raytypes` in `osl.cpp`, so keep them in sync! */
+
+enum PathRayFlag {
+  /* --------------------------------------------------------------------
+   * Ray visibility.
+   *
+   * NOTE: Recalculated after a surface bounce.
+   */
+
+  PATH_RAY_CAMERA = (1U << 0U),
+  PATH_RAY_REFLECT = (1U << 1U),
+  PATH_RAY_TRANSMIT = (1U << 2U),
+  PATH_RAY_DIFFUSE = (1U << 3U),
+  PATH_RAY_GLOSSY = (1U << 4U),
+  PATH_RAY_SINGULAR = (1U << 5U),
+  PATH_RAY_TRANSPARENT = (1U << 6U),
+  PATH_RAY_VOLUME_SCATTER = (1U << 7U),
+
+  /* Shadow ray visibility. */
+  PATH_RAY_SHADOW_OPAQUE = (1U << 8U),
+  PATH_RAY_SHADOW_TRANSPARENT = (1U << 9U),
+  PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT),
+
+  /* Special flag to tag unaligned BVH nodes.
+   * Only set and used in BVH nodes to distinguish how to interpret bounding box information stored
+   * in the node (either it should be intersected as AABB or as OBBU). */
+  PATH_RAY_NODE_UNALIGNED = (1U << 10U),
+
+  /* Subset of flags used for ray visibility for intersection.
+   *
+   * NOTE: SHADOW_CATCHER macros below assume there are no more than
+   * 16 visibility bits. */
+  PATH_RAY_ALL_VISIBILITY = ((1U << 11U) - 1U),
+
+  /* --------------------------------------------------------------------
+   * Path flags.
+   */
+
+  /* Don't apply multiple importance sampling weights to emission from
+   * lamp or surface hits, because they were not direct light sampled. */
+  PATH_RAY_MIS_SKIP = (1U << 11U),
+
+  /* Diffuse bounce earlier in the path, skip SSS to improve performance
+   * and avoid branching twice with disk sampling SSS. */
+  PATH_RAY_DIFFUSE_ANCESTOR = (1U << 12U),
+
+  /* Single pass has been written. */
+  PATH_RAY_SINGLE_PASS_DONE = (1U << 13U),
+
+  /* Zero background alpha, for camera or transparent glass rays. */
+  PATH_RAY_TRANSPARENT_BACKGROUND = (1U << 14U),
+
+  /* Terminate ray immediately at next bounce. */
+  PATH_RAY_TERMINATE_ON_NEXT_SURFACE = (1U << 15U),
+  PATH_RAY_TERMINATE_IN_NEXT_VOLUME = (1U << 16U),
+
+  /* Ray is to be terminated, but continue with transparent bounces and
+   * emission as long as we encounter them. This is required to make the
+   * MIS between direct and indirect light rays match, as shadow rays go
+   * through transparent surfaces to reach emission too. */
+  PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1U << 17U),
+
+  /* Terminate ray immediately after volume shading. */
+  PATH_RAY_TERMINATE_AFTER_VOLUME = (1U << 18U),
+
+  /* Ray is to be terminated. */
+  PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_ON_NEXT_SURFACE | PATH_RAY_TERMINATE_IN_NEXT_VOLUME |
+                        PATH_RAY_TERMINATE_AFTER_TRANSPARENT | PATH_RAY_TERMINATE_AFTER_VOLUME),
+
+  /* Path and shader is being evaluated for direct lighting emission. */
+  PATH_RAY_EMISSION = (1U << 19U),
+
+  /* Perform subsurface scattering. */
+  PATH_RAY_SUBSURFACE_RANDOM_WALK = (1U << 20U),
+  PATH_RAY_SUBSURFACE_DISK = (1U << 21U),
+  PATH_RAY_SUBSURFACE_USE_FRESNEL = (1U << 22U),
+  PATH_RAY_SUBSURFACE = (PATH_RAY_SUBSURFACE_RANDOM_WALK | PATH_RAY_SUBSURFACE_DISK |
+                         PATH_RAY_SUBSURFACE_USE_FRESNEL),
+
+  /* Contribute to denoising features. */
+  PATH_RAY_DENOISING_FEATURES = (1U << 23U),
+
+  /* Render pass categories. */
+  PATH_RAY_REFLECT_PASS = (1U << 24U),
+  PATH_RAY_TRANSMISSION_PASS = (1U << 25U),
+  PATH_RAY_VOLUME_PASS = (1U << 26U),
+  PATH_RAY_ANY_PASS = (PATH_RAY_REFLECT_PASS | PATH_RAY_TRANSMISSION_PASS | PATH_RAY_VOLUME_PASS),
+
+  /* Shadow ray is for a light or surface, or AO. */
+  PATH_RAY_SHADOW_FOR_LIGHT = (1U << 27U),
+  PATH_RAY_SHADOW_FOR_AO = (1U << 28U),
+
+  /* A shadow catcher object was hit and the path was split into two. */
+  PATH_RAY_SHADOW_CATCHER_HIT = (1U << 29U),
+
+  /* A shadow catcher object was hit and this path traces only shadow catchers, writing them into
+   * their dedicated pass for later division.
+   *
+   * NOTE: Is not covered with `PATH_RAY_ANY_PASS` because shadow catcher does special handling
+   * which is separate from the light passes. */
+  PATH_RAY_SHADOW_CATCHER_PASS = (1U << 30U),
+
+  /* Path is evaluating background for an approximate shadow catcher with non-transparent film. */
+  PATH_RAY_SHADOW_CATCHER_BACKGROUND = (1U << 31U),
+};
+
+/* Configure ray visibility bits for rays and objects respectively,
+ * to make shadow catchers work.
+ *
+ * On shadow catcher paths we want to ignore any intersections with non-catchers,
+ * whereas on regular paths we want to intersect all objects. */
+
+#define SHADOW_CATCHER_VISIBILITY_SHIFT(visibility) ((visibility) << 16)
+
+#define SHADOW_CATCHER_PATH_VISIBILITY(path_flag, visibility) \
+  (((path_flag)&PATH_RAY_SHADOW_CATCHER_PASS) ? SHADOW_CATCHER_VISIBILITY_SHIFT(visibility) : \
+                                                (visibility))
+
+#define SHADOW_CATCHER_OBJECT_VISIBILITY(is_shadow_catcher, visibility) \
+  (((is_shadow_catcher) ? SHADOW_CATCHER_VISIBILITY_SHIFT(visibility) : 0) | (visibility))
+
+/* Closure Label */
+
+typedef enum ClosureLabel {
+  LABEL_NONE = 0,
+  LABEL_TRANSMIT = 1,
+  LABEL_REFLECT = 2,
+  LABEL_DIFFUSE = 4,
+  LABEL_GLOSSY = 8,
+  LABEL_SINGULAR = 16,
+  LABEL_TRANSPARENT = 32,
+  LABEL_VOLUME_SCATTER = 64,
+  LABEL_TRANSMIT_TRANSPARENT = 128,
+  LABEL_SUBSURFACE_SCATTER = 256,
+} ClosureLabel;
+
+/* Render Passes */
+
+#define PASS_NAME_JOIN(a, b) a##_##b
+#define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32))
+
+// NOTE: Keep in sync with `Pass::get_type_enum()`.
+typedef enum PassType {
+  PASS_NONE = 0,
+
+  /* Light Passes */
+  PASS_COMBINED = 1,
+  PASS_EMISSION,
+  PASS_BACKGROUND,
+  PASS_AO,
+  PASS_SHADOW,
+  PASS_DIFFUSE,
+  PASS_DIFFUSE_DIRECT,
+  PASS_DIFFUSE_INDIRECT,
+  PASS_GLOSSY,
+  PASS_GLOSSY_DIRECT,
+  PASS_GLOSSY_INDIRECT,
+  PASS_TRANSMISSION,
+  PASS_TRANSMISSION_DIRECT,
+  PASS_TRANSMISSION_INDIRECT,
+  PASS_VOLUME,
+  PASS_VOLUME_DIRECT,
+  PASS_VOLUME_INDIRECT,
+  PASS_CATEGORY_LIGHT_END = 31,
+
+  /* Data passes */
+  PASS_DEPTH = 32,
+  PASS_POSITION,
+  PASS_NORMAL,
+  PASS_ROUGHNESS,
+  PASS_UV,
+  PASS_OBJECT_ID,
+  PASS_MATERIAL_ID,
+  PASS_MOTION,
+  PASS_MOTION_WEIGHT,
+  PASS_CRYPTOMATTE,
+  PASS_AOV_COLOR,
+  PASS_AOV_VALUE,
+  PASS_ADAPTIVE_AUX_BUFFER,
+  PASS_SAMPLE_COUNT,
+  PASS_DIFFUSE_COLOR,
+  PASS_GLOSSY_COLOR,
+  PASS_TRANSMISSION_COLOR,
+  /* No Scatter color since it's tricky to define what it would even mean. */
+  PASS_MIST,
+  PASS_DENOISING_NORMAL,
+  PASS_DENOISING_ALBEDO,
+  PASS_DENOISING_DEPTH,
+
+  /* PASS_SHADOW_CATCHER accumulates contribution of shadow catcher object which is not affected by
+   * any other object. The pass accessor will divide the combined pass by the shadow catcher. The
+   * result of this division is then to be multiplied with the backdrop. The alpha channel of this
+   * pass contains number of samples which contributed to the color components of the pass.
+   *
+   * PASS_SHADOW_CATCHER_SAMPLE_COUNT contains number of samples for which the path split
+   * happened.
+   *
+   * PASS_SHADOW_CATCHER_MATTE contains pass which contains non-catcher objects. This pass is to be
+   * alpha-overed onto the backdrop (after multiplication). */
+  PASS_SHADOW_CATCHER,
+  PASS_SHADOW_CATCHER_SAMPLE_COUNT,
+  PASS_SHADOW_CATCHER_MATTE,
+
+  PASS_CATEGORY_DATA_END = 63,
+
+  PASS_BAKE_PRIMITIVE,
+  PASS_BAKE_DIFFERENTIAL,
+  PASS_CATEGORY_BAKE_END = 95,
+
+  PASS_NUM,
+} PassType;
+
+#define PASS_ANY (~0)
+
+typedef enum CryptomatteType {
+  CRYPT_NONE = 0,
+  CRYPT_OBJECT = (1 << 0),
+  CRYPT_MATERIAL = (1 << 1),
+  CRYPT_ASSET = (1 << 2),
+  CRYPT_ACCURATE = (1 << 3),
+} CryptomatteType;
+
+typedef struct BsdfEval {
+  float3 diffuse;
+  float3 glossy;
+} BsdfEval;
+
+/* Shader Flag */
+
+typedef enum ShaderFlag {
+  SHADER_SMOOTH_NORMAL = (1 << 31),
+  SHADER_CAST_SHADOW = (1 << 30),
+  SHADER_AREA_LIGHT = (1 << 29),
+  SHADER_USE_MIS = (1 << 28),
+  SHADER_EXCLUDE_DIFFUSE = (1 << 27),
+  SHADER_EXCLUDE_GLOSSY = (1 << 26),
+  SHADER_EXCLUDE_TRANSMIT = (1 << 25),
+  SHADER_EXCLUDE_CAMERA = (1 << 24),
+  SHADER_EXCLUDE_SCATTER = (1 << 23),
+  SHADER_EXCLUDE_SHADOW_CATCHER = (1 << 22),
+  SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE | SHADER_EXCLUDE_GLOSSY | SHADER_EXCLUDE_TRANSMIT |
+                        SHADER_EXCLUDE_CAMERA | SHADER_EXCLUDE_SCATTER |
+                        SHADER_EXCLUDE_SHADOW_CATCHER),
+
+  SHADER_MASK = ~(SHADER_SMOOTH_NORMAL | SHADER_CAST_SHADOW | SHADER_AREA_LIGHT | SHADER_USE_MIS |
+                  SHADER_EXCLUDE_ANY)
+} ShaderFlag;
+
+/* Light Type */
+
+typedef enum LightType {
+  LIGHT_POINT,
+  LIGHT_DISTANT,
+  LIGHT_BACKGROUND,
+  LIGHT_AREA,
+  LIGHT_SPOT,
+  LIGHT_TRIANGLE
+} LightType;
+
+/* Camera Type */
+
+enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA };
+
+/* Panorama Type */
+
+enum PanoramaType {
+  PANORAMA_EQUIRECTANGULAR = 0,
+  PANORAMA_FISHEYE_EQUIDISTANT = 1,
+  PANORAMA_FISHEYE_EQUISOLID = 2,
+  PANORAMA_MIRRORBALL = 3,
+
+  PANORAMA_NUM_TYPES,
+};
+
+/* Differential */
+
+typedef struct differential3 {
+  float3 dx;
+  float3 dy;
+} differential3;
+
+typedef struct differential {
+  float dx;
+  float dy;
+} differential;
+
+/* Ray */
+
+typedef struct Ray {
+  float3 P;   /* origin */
+  float3 D;   /* direction */
+  float t;    /* length of the ray */
+  float time; /* time (for motion blur) */
+
+#ifdef __RAY_DIFFERENTIALS__
+  float dP;
+  float dD;
+#endif
+} Ray;
+
+/* Intersection */
+
+typedef struct Intersection {
+  float t, u, v;
+  int prim;
+  int object;
+  int type;
+} Intersection;
+
+/* Primitives */
+
+typedef enum PrimitiveType {
+  PRIMITIVE_NONE = 0,
+  PRIMITIVE_TRIANGLE = (1 << 0),
+  PRIMITIVE_MOTION_TRIANGLE = (1 << 1),
+  PRIMITIVE_CURVE_THICK = (1 << 2),
+  PRIMITIVE_MOTION_CURVE_THICK = (1 << 3),
+  PRIMITIVE_CURVE_RIBBON = (1 << 4),
+  PRIMITIVE_MOTION_CURVE_RIBBON = (1 << 5),
+  PRIMITIVE_VOLUME = (1 << 6),
+  PRIMITIVE_LAMP = (1 << 7),
+
+  PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE),
+  PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE_THICK | PRIMITIVE_MOTION_CURVE_THICK |
+                         PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON),
+  PRIMITIVE_ALL_VOLUME = (PRIMITIVE_VOLUME),
+  PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE_THICK |
+                          PRIMITIVE_MOTION_CURVE_RIBBON),
+  PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE | PRIMITIVE_ALL_VOLUME |
+                   PRIMITIVE_LAMP),
+
+  PRIMITIVE_NUM = 8,
+} PrimitiveType;
+
+#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM) | (type))
+#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> PRIMITIVE_NUM)
+
+typedef enum CurveShapeType {
+  CURVE_RIBBON = 0,
+  CURVE_THICK = 1,
+
+  CURVE_NUM_SHAPE_TYPES,
+} CurveShapeType;
+
+/* Attributes */
+
+typedef enum AttributePrimitive {
+  ATTR_PRIM_GEOMETRY = 0,
+  ATTR_PRIM_SUBD,
+
+  ATTR_PRIM_TYPES
+} AttributePrimitive;
+
+typedef enum AttributeElement {
+  ATTR_ELEMENT_NONE = 0,
+  ATTR_ELEMENT_OBJECT = (1 << 0),
+  ATTR_ELEMENT_MESH = (1 << 1),
+  ATTR_ELEMENT_FACE = (1 << 2),
+  ATTR_ELEMENT_VERTEX = (1 << 3),
+  ATTR_ELEMENT_VERTEX_MOTION = (1 << 4),
+  ATTR_ELEMENT_CORNER = (1 << 5),
+  ATTR_ELEMENT_CORNER_BYTE = (1 << 6),
+  ATTR_ELEMENT_CURVE = (1 << 7),
+  ATTR_ELEMENT_CURVE_KEY = (1 << 8),
+  ATTR_ELEMENT_CURVE_KEY_MOTION = (1 << 9),
+  ATTR_ELEMENT_VOXEL = (1 << 10)
+} AttributeElement;
+
+typedef enum AttributeStandard {
+  ATTR_STD_NONE = 0,
+  ATTR_STD_VERTEX_NORMAL,
+  ATTR_STD_FACE_NORMAL,
+  ATTR_STD_UV,
+  ATTR_STD_UV_TANGENT,
+  ATTR_STD_UV_TANGENT_SIGN,
+  ATTR_STD_VERTEX_COLOR,
+  ATTR_STD_GENERATED,
+  ATTR_STD_GENERATED_TRANSFORM,
+  ATTR_STD_POSITION_UNDEFORMED,
+  ATTR_STD_POSITION_UNDISPLACED,
+  ATTR_STD_MOTION_VERTEX_POSITION,
+  ATTR_STD_MOTION_VERTEX_NORMAL,
+  ATTR_STD_PARTICLE,
+  ATTR_STD_CURVE_INTERCEPT,
+  ATTR_STD_CURVE_LENGTH,
+  ATTR_STD_CURVE_RANDOM,
+  ATTR_STD_PTEX_FACE_ID,
+  ATTR_STD_PTEX_UV,
+  ATTR_STD_VOLUME_DENSITY,
+  ATTR_STD_VOLUME_COLOR,
+  ATTR_STD_VOLUME_FLAME,
+  ATTR_STD_VOLUME_HEAT,
+  ATTR_STD_VOLUME_TEMPERATURE,
+  ATTR_STD_VOLUME_VELOCITY,
+  ATTR_STD_POINTINESS,
+  ATTR_STD_RANDOM_PER_ISLAND,
+  ATTR_STD_SHADOW_TRANSPARENCY,
+  ATTR_STD_NUM,
+
+  ATTR_STD_NOT_FOUND = ~0
+} AttributeStandard;
+
+typedef enum AttributeFlag {
+  ATTR_FINAL_SIZE = (1 << 0),
+  ATTR_SUBDIVIDED = (1 << 1),
+} AttributeFlag;
+
+typedef struct AttributeDescriptor {
+  AttributeElement element;
+  NodeAttributeType type;
+  uint flags; /* see enum AttributeFlag */
+  int offset;
+} AttributeDescriptor;
+
+/* Closure data */
+
+#ifndef __MAX_CLOSURE__
+#  define MAX_CLOSURE 64
+#else
+#  define MAX_CLOSURE __MAX_CLOSURE__
+#endif
+
+#ifndef __MAX_VOLUME_STACK_SIZE__
+#  define MAX_VOLUME_STACK_SIZE 32
+#else
+#  define MAX_VOLUME_STACK_SIZE __MAX_VOLUME_STACK_SIZE__
+#endif
+
+#define MAX_VOLUME_CLOSURE 8
+
+/* This struct is the base class for all closures. The common members are
+ * duplicated in all derived classes since we don't have C++ in the kernel
+ * yet, and because it lets us lay out the members to minimize padding. The
+ * weight member is located at the beginning of the struct for this reason.
+ *
+ * ShaderClosure has a fixed size, and any extra space must be allocated
+ * with closure_alloc_extra().
+ *
+ * We pad the struct to align to 16 bytes. All shader closures are assumed
+ * to fit in this struct size. CPU sizes are a bit larger because float3 is
+ * padded to be 16 bytes, while it's only 12 bytes on the GPU. */
+
+#define SHADER_CLOSURE_BASE \
+  float3 weight; \
+  ClosureType type; \
+  float sample_weight; \
+  float3 N
+
+typedef struct ccl_align(16) ShaderClosure
+{
+  SHADER_CLOSURE_BASE;
+
+#ifdef __KERNEL_CPU__
+  float pad[2];
+#endif
+  float data[10];
+}
+ShaderClosure;
+
+/* Shader Data
+ *
+ * Main shader state at a point on the surface or in a volume. All coordinates
+ * are in world space.
+ */
+
+enum ShaderDataFlag {
+  /* Runtime flags. */
+
+  /* Set when ray hits backside of surface. */
+  SD_BACKFACING = (1 << 0),
+  /* Shader has non-zero emission. */
+  SD_EMISSION = (1 << 1),
+  /* Shader has BSDF closure. */
+  SD_BSDF = (1 << 2),
+  /* Shader has non-singular BSDF closure. */
+  SD_BSDF_HAS_EVAL = (1 << 3),
+  /* Shader has BSSRDF closure. */
+  SD_BSSRDF = (1 << 4),
+  /* Shader has holdout closure. */
+  SD_HOLDOUT = (1 << 5),
+  /* Shader has non-zero volume extinction. */
+  SD_EXTINCTION = (1 << 6),
+  /* Shader has have volume phase (scatter) closure. */
+  SD_SCATTER = (1 << 7),
+  /* Shader has transparent closure. */
+  SD_TRANSPARENT = (1 << 9),
+  /* BSDF requires LCG for evaluation. */
+  SD_BSDF_NEEDS_LCG = (1 << 10),
+
+  SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT |
+                      SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG),
+
+  /* Shader flags. */
+
+  /* direct light sample */
+  SD_USE_MIS = (1 << 16),
+  /* Has transparent shadow. */
+  SD_HAS_TRANSPARENT_SHADOW = (1 << 17),
+  /* Has volume shader. */
+  SD_HAS_VOLUME = (1 << 18),
+  /* Has only volume shader, no surface. */
+  SD_HAS_ONLY_VOLUME = (1 << 19),
+  /* Has heterogeneous volume. */
+  SD_HETEROGENEOUS_VOLUME = (1 << 20),
+  /* BSSRDF normal uses bump. */
+  SD_HAS_BSSRDF_BUMP = (1 << 21),
+  /* Use equiangular volume sampling */
+  SD_VOLUME_EQUIANGULAR = (1 << 22),
+  /* Use multiple importance volume sampling. */
+  SD_VOLUME_MIS = (1 << 23),
+  /* Use cubic interpolation for voxels. */
+  SD_VOLUME_CUBIC = (1 << 24),
+  /* Has data connected to the displacement input or uses bump map. */
+  SD_HAS_BUMP = (1 << 25),
+  /* Has true displacement. */
+  SD_HAS_DISPLACEMENT = (1 << 26),
+  /* Has constant emission (value stored in __shaders) */
+  SD_HAS_CONSTANT_EMISSION = (1 << 27),
+  /* Needs to access attributes for volume rendering */
+  SD_NEED_VOLUME_ATTRIBUTES = (1 << 28),
+  /* Shader has emission */
+  SD_HAS_EMISSION = (1 << 29),
+  /* Shader has raytracing */
+  SD_HAS_RAYTRACE = (1 << 30),
+
+  SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME |
+                     SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR |
+                     SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT |
+                     SD_HAS_CONSTANT_EMISSION | SD_NEED_VOLUME_ATTRIBUTES | SD_HAS_EMISSION |
+                     SD_HAS_RAYTRACE)
+};
+
+/* Object flags. */
+enum ShaderDataObjectFlag {
+  /* Holdout for camera rays. */
+  SD_OBJECT_HOLDOUT_MASK = (1 << 0),
+  /* Has object motion blur. */
+  SD_OBJECT_MOTION = (1 << 1),
+  /* Vertices have transform applied. */
+  SD_OBJECT_TRANSFORM_APPLIED = (1 << 2),
+  /* Vertices have negative scale applied. */
+  SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3),
+  /* Object has a volume shader. */
+  SD_OBJECT_HAS_VOLUME = (1 << 4),
+  /* Object intersects AABB of an object with volume shader. */
+  SD_OBJECT_INTERSECTS_VOLUME = (1 << 5),
+  /* Has position for motion vertices. */
+  SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6),
+  /* object is used to catch shadows */
+  SD_OBJECT_SHADOW_CATCHER = (1 << 7),
+  /* object has volume attributes */
+  SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8),
+
+  SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED |
+                     SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME |
+                     SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER |
+                     SD_OBJECT_HAS_VOLUME_ATTRIBUTES)
+};
+
+typedef struct ccl_align(16) ShaderData
+{
+  /* position */
+  float3 P;
+  /* smooth normal for shading */
+  float3 N;
+  /* true geometric normal */
+  float3 Ng;
+  /* view/incoming direction */
+  float3 I;
+  /* shader id */
+  int shader;
+  /* booleans describing shader, see ShaderDataFlag */
+  int flag;
+  /* booleans describing object of the shader, see ShaderDataObjectFlag */
+  int object_flag;
+
+  /* primitive id if there is one, ~0 otherwise */
+  int prim;
+
+  /* combined type and curve segment for hair */
+  int type;
+
+  /* parametric coordinates
+   * - barycentric weights for triangles */
+  float u;
+  float v;
+  /* object id if there is one, ~0 otherwise */
+  int object;
+  /* lamp id if there is one, ~0 otherwise */
+  int lamp;
+
+  /* motion blur sample time */
+  float time;
+
+  /* length of the ray being shaded */
+  float ray_length;
+
+#ifdef __RAY_DIFFERENTIALS__
+  /* differential of P. these are orthogonal to Ng, not N */
+  differential3 dP;
+  /* differential of I */
+  differential3 dI;
+  /* differential of u, v */
+  differential du;
+  differential dv;
+#endif
+#ifdef __DPDU__
+  /* differential of P w.r.t. parametric coordinates. note that dPdu is
+   * not readily suitable as a tangent for shading on triangles. */
+  float3 dPdu;
+  float3 dPdv;
+#endif
+
+#ifdef __OBJECT_MOTION__
+  /* Object <-> world space transformations for motion blur, cached to avoid
+   * re-interpolating them constantly for shading. */
+  Transform ob_tfm_motion;
+  Transform ob_itfm_motion;
+#endif
+
+  /* ray start position, only set for backgrounds */
+  float3 ray_P;
+  float ray_dP;
+
+#ifdef __OSL__
+  const struct KernelGlobalsCPU *osl_globals;
+  const struct IntegratorStateCPU *osl_path_state;
+  const struct IntegratorShadowStateCPU *osl_shadow_path_state;
+#endif
+
+  /* LCG state for closures that require additional random numbers. */
+  uint lcg_state;
+
+  /* Closure data, we store a fixed array of closures */
+  int num_closure;
+  int num_closure_left;
+  float3 svm_closure_weight;
+
+  /* Closure weights summed directly, so we can evaluate
+   * emission and shadow transparency with MAX_CLOSURE 0. */
+  float3 closure_emission_background;
+  float3 closure_transparent_extinction;
+
+  /* At the end so we can adjust size in ShaderDataTinyStorage. */
+  struct ShaderClosure closure[MAX_CLOSURE];
+}
+ShaderData;
+
+/* ShaderDataTinyStorage needs the same alignment as ShaderData, or else
+ * the pointer cast in AS_SHADER_DATA invokes undefined behavior. */
+typedef struct ccl_align(16) ShaderDataTinyStorage
+{
+  char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE];
+}
+ShaderDataTinyStorage;
+#define AS_SHADER_DATA(shader_data_tiny_storage) \
+  ((ccl_private ShaderData *)shader_data_tiny_storage)
+
+/* Compact volume closures storage.
+ *
+ * Used for decoupled direct/indirect light closure storage. */
+
+typedef struct ShaderVolumeClosure {
+  float3 weight;
+  float sample_weight;
+  float g;
+} ShaderVolumeClosure;
+
+typedef struct ShaderVolumePhases {
+  ShaderVolumeClosure closure[MAX_VOLUME_CLOSURE];
+  int num_closure;
+} ShaderVolumePhases;
+
+/* Volume Stack */
+
+#ifdef __VOLUME__
+typedef struct VolumeStack {
+  int object;
+  int shader;
+} VolumeStack;
+#endif
+
+/* Struct to gather multiple nearby intersections. */
+typedef struct LocalIntersection {
+  int num_hits;
+  struct Intersection hits[LOCAL_MAX_HITS];
+  float3 Ng[LOCAL_MAX_HITS];
+} LocalIntersection;
+
+/* Constant Kernel Data
+ *
+ * These structs are passed from CPU to various devices, and the struct layout
+ * must match exactly. Structs are padded to ensure 16 byte alignment, and we
+ * do not use float3 because its size may not be the same on all devices. */
+
+typedef struct KernelCamera {
+  /* type */
+  int type;
+
+  /* panorama */
+  int panorama_type;
+  float fisheye_fov;
+  float fisheye_lens;
+  float4 equirectangular_range;
+
+  /* stereo */
+  float interocular_offset;
+  float convergence_distance;
+  float pole_merge_angle_from;
+  float pole_merge_angle_to;
+
+  /* matrices */
+  Transform cameratoworld;
+  ProjectionTransform rastertocamera;
+
+  /* differentials */
+  float4 dx;
+  float4 dy;
+
+  /* depth of field */
+  float aperturesize;
+  float blades;
+  float bladesrotation;
+  float focaldistance;
+
+  /* motion blur */
+  float shuttertime;
+  int num_motion_steps, have_perspective_motion;
+
+  /* clipping */
+  float nearclip;
+  float cliplength;
+
+  /* sensor size */
+  float sensorwidth;
+  float sensorheight;
+
+  /* render size */
+  float width, height;
+  int pad1;
+
+  /* anamorphic lens bokeh */
+  float inv_aperture_ratio;
+
+  int is_inside_volume;
+
+  /* more matrices */
+  ProjectionTransform screentoworld;
+  ProjectionTransform rastertoworld;
+  ProjectionTransform ndctoworld;
+  ProjectionTransform worldtoscreen;
+  ProjectionTransform worldtoraster;
+  ProjectionTransform worldtondc;
+  Transform worldtocamera;
+
+  /* Stores changes in the projection matrix. Use for camera zoom motion
+   * blur and motion pass output for perspective camera. */
+  ProjectionTransform perspective_pre;
+  ProjectionTransform perspective_post;
+
+  /* Transforms for motion pass. */
+  Transform motion_pass_pre;
+  Transform motion_pass_post;
+
+  int shutter_table_offset;
+
+  /* Rolling shutter */
+  int rolling_shutter_type;
+  float rolling_shutter_duration;
+
+  int pad;
+} KernelCamera;
+static_assert_align(KernelCamera, 16);
+
+typedef struct KernelFilm {
+  float exposure;
+  int pass_flag;
+
+  int light_pass_flag;
+  int pass_stride;
+
+  int pass_combined;
+  int pass_depth;
+  int pass_position;
+  int pass_normal;
+  int pass_roughness;
+  int pass_motion;
+
+  int pass_motion_weight;
+  int pass_uv;
+  int pass_object_id;
+  int pass_material_id;
+
+  int pass_diffuse_color;
+  int pass_glossy_color;
+  int pass_transmission_color;
+
+  int pass_diffuse_indirect;
+  int pass_glossy_indirect;
+  int pass_transmission_indirect;
+  int pass_volume_indirect;
+
+  int pass_diffuse_direct;
+  int pass_glossy_direct;
+  int pass_transmission_direct;
+  int pass_volume_direct;
+
+  int pass_emission;
+  int pass_background;
+  int pass_ao;
+  float pass_alpha_threshold;
+
+  int pass_shadow;
+  float pass_shadow_scale;
+
+  int pass_shadow_catcher;
+  int pass_shadow_catcher_sample_count;
+  int pass_shadow_catcher_matte;
+
+  int filter_table_offset;
+
+  int cryptomatte_passes;
+  int cryptomatte_depth;
+  int pass_cryptomatte;
+
+  int pass_adaptive_aux_buffer;
+  int pass_sample_count;
+
+  int pass_mist;
+  float mist_start;
+  float mist_inv_depth;
+  float mist_falloff;
+
+  int pass_denoising_normal;
+  int pass_denoising_albedo;
+  int pass_denoising_depth;
+
+  int pass_aov_color;
+  int pass_aov_value;
+
+  /* XYZ to rendering color space transform. float4 instead of float3 to
+   * ensure consistent padding/alignment across devices. */
+  float4 xyz_to_r;
+  float4 xyz_to_g;
+  float4 xyz_to_b;
+  float4 rgb_to_y;
+
+  int pass_bake_primitive;
+  int pass_bake_differential;
+
+  int use_approximate_shadow_catcher;
+
+  int pad1, pad2;
+} KernelFilm;
+static_assert_align(KernelFilm, 16);
+
+typedef struct KernelFilmConvert {
+  int pass_offset;
+  int pass_stride;
+
+  int pass_use_exposure;
+  int pass_use_filter;
+
+  int pass_divide;
+  int pass_indirect;
+
+  int pass_combined;
+  int pass_sample_count;
+  int pass_adaptive_aux_buffer;
+  int pass_motion_weight;
+  int pass_shadow_catcher;
+  int pass_shadow_catcher_sample_count;
+  int pass_shadow_catcher_matte;
+  int pass_background;
+
+  float scale;
+  float exposure;
+  float scale_exposure;
+
+  int use_approximate_shadow_catcher;
+  int use_approximate_shadow_catcher_background;
+  int show_active_pixels;
+
+  /* Number of components to write to. */
+  int num_components;
+
+  /* Number of floats per pixel. When zero is the same as `num_components`.
+   * NOTE: Is ignored for half4 destination. */
+  int pixel_stride;
+
+  int is_denoised;
+
+  /* Padding. */
+  int pad1;
+} KernelFilmConvert;
+static_assert_align(KernelFilmConvert, 16);
+
+typedef struct KernelBackground {
+  /* only shader index */
+  int surface_shader;
+  int volume_shader;
+  float volume_step_size;
+  int transparent;
+  float transparent_roughness_squared_threshold;
+
+  /* portal sampling */
+  float portal_weight;
+  int num_portals;
+  int portal_offset;
+
+  /* sun sampling */
+  float sun_weight;
+  /* xyz store direction, w the angle. float4 instead of float3 is used
+   * to ensure consistent padding/alignment across devices. */
+  float4 sun;
+
+  /* map sampling */
+  float map_weight;
+  int map_res_x;
+  int map_res_y;
+
+  int use_mis;
+
+  /* Padding */
+  int pad1, pad2, pad3;
+} KernelBackground;
+static_assert_align(KernelBackground, 16);
+
+typedef struct KernelIntegrator {
+  /* emission */
+  int use_direct_light;
+  int num_distribution;
+  int num_all_lights;
+  float pdf_triangles;
+  float pdf_lights;
+  float light_inv_rr_threshold;
+
+  /* bounces */
+  int min_bounce;
+  int max_bounce;
+
+  int max_diffuse_bounce;
+  int max_glossy_bounce;
+  int max_transmission_bounce;
+  int max_volume_bounce;
+
+  /* AO bounces */
+  int ao_bounces;
+  float ao_bounces_distance;
+  float ao_bounces_factor;
+  float ao_additive_factor;
+
+  /* transparent */
+  int transparent_min_bounce;
+  int transparent_max_bounce;
+  int transparent_shadows;
+
+  /* caustics */
+  int caustics_reflective;
+  int caustics_refractive;
+  float filter_glossy;
+
+  /* seed */
+  int seed;
+
+  /* clamp */
+  float sample_clamp_direct;
+  float sample_clamp_indirect;
+
+  /* mis */
+  int use_lamp_mis;
+
+  /* sampler */
+  int sampling_pattern;
+
+  /* volume render */
+  int use_volumes;
+  int volume_max_steps;
+  float volume_step_rate;
+
+  int has_shadow_catcher;
+
+  /* padding */
+  int pad1;
+} KernelIntegrator;
+static_assert_align(KernelIntegrator, 16);
+
+typedef enum KernelBVHLayout {
+  BVH_LAYOUT_NONE = 0,
+
+  BVH_LAYOUT_BVH2 = (1 << 0),
+  BVH_LAYOUT_EMBREE = (1 << 1),
+  BVH_LAYOUT_OPTIX = (1 << 2),
+  BVH_LAYOUT_MULTI_OPTIX = (1 << 3),
+  BVH_LAYOUT_MULTI_OPTIX_EMBREE = (1 << 4),
+
+  /* Default BVH layout to use for CPU. */
+  BVH_LAYOUT_AUTO = BVH_LAYOUT_EMBREE,
+  BVH_LAYOUT_ALL = BVH_LAYOUT_BVH2 | BVH_LAYOUT_EMBREE | BVH_LAYOUT_OPTIX,
+} KernelBVHLayout;
+
+typedef struct KernelBVH {
+  /* Own BVH */
+  int root;
+  int have_motion;
+  int have_curves;
+  int bvh_layout;
+  int use_bvh_steps;
+  int curve_subdivisions;
+
+  /* Custom BVH */
+#ifdef __KERNEL_OPTIX__
+  OptixTraversableHandle scene;
+#else
+#  ifdef __EMBREE__
+  RTCScene scene;
+#    ifndef __KERNEL_64_BIT__
+  int pad2;
+#    endif
+#  else
+  int scene, pad2;
+#  endif
+#endif
+} KernelBVH;
+static_assert_align(KernelBVH, 16);
+
+typedef struct KernelTables {
+  int beckmann_offset;
+  int pad1, pad2, pad3;
+} KernelTables;
+static_assert_align(KernelTables, 16);
+
+typedef struct KernelBake {
+  int use;
+  int object_index;
+  int tri_offset;
+  int pad1;
+} KernelBake;
+static_assert_align(KernelBake, 16);
+
+typedef struct KernelData {
+  uint kernel_features;
+  uint max_closures;
+  uint max_shaders;
+  uint volume_stack_size;
+
+  KernelCamera cam;
+  KernelFilm film;
+  KernelBackground background;
+  KernelIntegrator integrator;
+  KernelBVH bvh;
+  KernelTables tables;
+  KernelBake bake;
+} KernelData;
+static_assert_align(KernelData, 16);
+
+/* Kernel data structures. */
+
+typedef struct KernelObject {
+  Transform tfm;
+  Transform itfm;
+
+  float volume_density;
+  float pass_id;
+  float random_number;
+  float color[3];
+  int particle_index;
+
+  float dupli_generated[3];
+  float dupli_uv[2];
+
+  int numkeys;
+  int numsteps;
+  int numverts;
+
+  uint patch_map_offset;
+  uint attribute_map_offset;
+  uint motion_offset;
+
+  float cryptomatte_object;
+  float cryptomatte_asset;
+
+  float shadow_terminator_shading_offset;
+  float shadow_terminator_geometry_offset;
+
+  float ao_distance;
+
+  uint visibility;
+  int primitive_type;
+} KernelObject;
+static_assert_align(KernelObject, 16);
+
+typedef struct KernelCurve {
+  int shader_id;
+  int first_key;
+  int num_keys;
+  int type;
+} KernelCurve;
+static_assert_align(KernelCurve, 16);
+
+typedef struct KernelCurveSegment {
+  int prim;
+  int type;
+} KernelCurveSegment;
+static_assert_align(KernelCurveSegment, 8);
+
+typedef struct KernelSpotLight {
+  float radius;
+  float invarea;
+  float spot_angle;
+  float spot_smooth;
+  float dir[3];
+  float pad;
+} KernelSpotLight;
+
+/* PointLight is SpotLight with only radius and invarea being used. */
+
+typedef struct KernelAreaLight {
+  float axisu[3];
+  float invarea;
+  float axisv[3];
+  float tan_spread;
+  float dir[3];
+  float normalize_spread;
+} KernelAreaLight;
+
+typedef struct KernelDistantLight {
+  float radius;
+  float cosangle;
+  float invarea;
+  float pad;
+} KernelDistantLight;
+
+typedef struct KernelLight {
+  int type;
+  float co[3];
+  int shader_id;
+  float max_bounces;
+  float random;
+  float strength[3];
+  float pad1, pad2;
+  Transform tfm;
+  Transform itfm;
+  union {
+    KernelSpotLight spot;
+    KernelAreaLight area;
+    KernelDistantLight distant;
+  };
+} KernelLight;
+static_assert_align(KernelLight, 16);
+
+typedef struct KernelLightDistribution {
+  float totarea;
+  int prim;
+  union {
+    struct {
+      int shader_flag;
+      int object_id;
+    } mesh_light;
+    struct {
+      float pad;
+      float size;
+    } lamp;
+  };
+} KernelLightDistribution;
+static_assert_align(KernelLightDistribution, 16);
+
+typedef struct KernelParticle {
+  int index;
+  float age;
+  float lifetime;
+  float size;
+  float4 rotation;
+  /* Only xyz are used of the following. float4 instead of float3 are used
+   * to ensure consistent padding/alignment across devices. */
+  float4 location;
+  float4 velocity;
+  float4 angular_velocity;
+} KernelParticle;
+static_assert_align(KernelParticle, 16);
+
+typedef struct KernelShader {
+  float constant_emission[3];
+  float cryptomatte_id;
+  int flags;
+  int pass_id;
+  int pad2, pad3;
+} KernelShader;
+static_assert_align(KernelShader, 16);
+
+/* Patches */
+
+#define PATCH_MAX_CONTROL_VERTS 16
+
+/* Patch map node flags */
+
+#define PATCH_MAP_NODE_IS_SET (1 << 30)
+#define PATCH_MAP_NODE_IS_LEAF (1u << 31)
+#define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF))
+
+/* Work Tiles */
+
+typedef struct KernelWorkTile {
+  uint x, y, w, h;
+
+  uint start_sample;
+  uint num_samples;
+
+  int offset;
+  uint stride;
+
+  /* Precalculated parameters used by init_from_camera kernel on GPU. */
+  int path_index_offset;
+  int work_size;
+} KernelWorkTile;
+
+/* Shader Evaluation.
+ *
+ * Position on a primitive on an object at which we want to evaluate the
+ * shader for e.g. mesh displacement or light importance map. */
+
+typedef struct KernelShaderEvalInput {
+  int object;
+  int prim;
+  float u, v;
+} KernelShaderEvalInput;
+static_assert_align(KernelShaderEvalInput, 16);
+
+/* Pre-computed sample table sizes for PMJ02 sampler. */
+#define NUM_PMJ_DIVISIONS 32
+#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
+#define NUM_PMJ_PATTERNS 1
+
+/* Device kernels.
+ *
+ * Identifier for kernels that can be executed in device queues.
+ *
+ * Some implementation details.
+ *
+ * If the kernel uses shared CUDA memory, `CUDADeviceQueue::enqueue` is to be modified.
+ * The path iteration kernels are handled in `PathTraceWorkGPU::enqueue_path_iteration`. */
+
+typedef enum DeviceKernel {
+  DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA = 0,
+  DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE,
+  DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST,
+  DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW,
+  DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE,
+  DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK,
+  DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND,
+  DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT,
+  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE,
+  DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE,
+  DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME,
+  DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW,
+  DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL,
+
+  DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES,
+  DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY,
+  DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES,
+  DEVICE_KERNEL_INTEGRATOR_RESET,
+  DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS,
+
+  DEVICE_KERNEL_SHADER_EVAL_DISPLACE,
+  DEVICE_KERNEL_SHADER_EVAL_BACKGROUND,
+  DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
+
+#define DECLARE_FILM_CONVERT_KERNEL(variant) \
+  DEVICE_KERNEL_FILM_CONVERT_##variant, DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA
+
+  DECLARE_FILM_CONVERT_KERNEL(DEPTH),
+  DECLARE_FILM_CONVERT_KERNEL(MIST),
+  DECLARE_FILM_CONVERT_KERNEL(SAMPLE_COUNT),
+  DECLARE_FILM_CONVERT_KERNEL(FLOAT),
+  DECLARE_FILM_CONVERT_KERNEL(LIGHT_PATH),
+  DECLARE_FILM_CONVERT_KERNEL(FLOAT3),
+  DECLARE_FILM_CONVERT_KERNEL(MOTION),
+  DECLARE_FILM_CONVERT_KERNEL(CRYPTOMATTE),
+  DECLARE_FILM_CONVERT_KERNEL(SHADOW_CATCHER),
+  DECLARE_FILM_CONVERT_KERNEL(SHADOW_CATCHER_MATTE_WITH_SHADOW),
+  DECLARE_FILM_CONVERT_KERNEL(COMBINED),
+  DECLARE_FILM_CONVERT_KERNEL(FLOAT4),
+
+#undef DECLARE_FILM_CONVERT_KERNEL
+
+  DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK,
+  DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X,
+  DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y,
+
+  DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS,
+  DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO,
+  DEVICE_KERNEL_FILTER_COLOR_PREPROCESS,
+  DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS,
+
+  DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS,
+
+  DEVICE_KERNEL_PREFIX_SUM,
+
+  DEVICE_KERNEL_NUM,
+} DeviceKernel;
+
+enum {
+  DEVICE_KERNEL_INTEGRATOR_NUM = DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL + 1,
+};
+
+/* Kernel Features */
+
+enum KernelFeatureFlag : unsigned int {
+  /* Shader nodes. */
+  KERNEL_FEATURE_NODE_BSDF = (1U << 0U),
+  KERNEL_FEATURE_NODE_EMISSION = (1U << 1U),
+  KERNEL_FEATURE_NODE_VOLUME = (1U << 2U),
+  KERNEL_FEATURE_NODE_HAIR = (1U << 3U),
+  KERNEL_FEATURE_NODE_BUMP = (1U << 4U),
+  KERNEL_FEATURE_NODE_BUMP_STATE = (1U << 5U),
+  KERNEL_FEATURE_NODE_VORONOI_EXTRA = (1U << 6U),
+  KERNEL_FEATURE_NODE_RAYTRACE = (1U << 7U),
+  KERNEL_FEATURE_NODE_AOV = (1U << 8U),
+  KERNEL_FEATURE_NODE_LIGHT_PATH = (1U << 9U),
+
+  /* Use denoising kernels and output denoising passes. */
+  KERNEL_FEATURE_DENOISING = (1U << 10U),
+
+  /* Use path tracing kernels. */
+  KERNEL_FEATURE_PATH_TRACING = (1U << 11U),
+
+  /* BVH/sampling kernel features. */
+  KERNEL_FEATURE_HAIR = (1U << 12U),
+  KERNEL_FEATURE_HAIR_THICK = (1U << 13U),
+  KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U),
+  KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U),
+
+  /* Denotes whether baking functionality is needed. */
+  KERNEL_FEATURE_BAKING = (1U << 16U),
+
+  /* Use subsurface scattering materials. */
+  KERNEL_FEATURE_SUBSURFACE = (1U << 17U),
+
+  /* Use volume materials. */
+  KERNEL_FEATURE_VOLUME = (1U << 18U),
+
+  /* Use OpenSubdiv patch evaluation */
+  KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U),
+
+  /* Use Transparent shadows */
+  KERNEL_FEATURE_TRANSPARENT = (1U << 20U),
+
+  /* Use shadow catcher. */
+  KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U),
+
+  /* Per-uber shader usage flags. */
+  KERNEL_FEATURE_PRINCIPLED = (1U << 22U),
+
+  /* Light render passes. */
+  KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U),
+
+  /* Shadow render pass. */
+  KERNEL_FEATURE_SHADOW_PASS = (1U << 24U),
+
+  /* AO. */
+  KERNEL_FEATURE_AO_PASS = (1U << 25U),
+  KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
+  KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
+};
+
+/* Shader node feature mask, to specialize shader evaluation for kernels. */
+
+#define KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT \
+  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VORONOI_EXTRA | \
+   KERNEL_FEATURE_NODE_LIGHT_PATH)
+#define KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW \
+  (KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
+   KERNEL_FEATURE_NODE_HAIR | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE | \
+   KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH)
+#define KERNEL_FEATURE_NODE_MASK_SURFACE \
+  (KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW | KERNEL_FEATURE_NODE_RAYTRACE | \
+   KERNEL_FEATURE_NODE_AOV | KERNEL_FEATURE_NODE_LIGHT_PATH)
+#define KERNEL_FEATURE_NODE_MASK_VOLUME \
+  (KERNEL_FEATURE_NODE_EMISSION | KERNEL_FEATURE_NODE_VOLUME | \
+   KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_LIGHT_PATH)
+#define KERNEL_FEATURE_NODE_MASK_DISPLACEMENT \
+  (KERNEL_FEATURE_NODE_VORONOI_EXTRA | KERNEL_FEATURE_NODE_BUMP | KERNEL_FEATURE_NODE_BUMP_STATE)
+#define KERNEL_FEATURE_NODE_MASK_BUMP KERNEL_FEATURE_NODE_MASK_DISPLACEMENT
+
+/* Must be constexpr on the CPU to avoid compile errors because the state types
+ * are different depending on the main, shadow or null path. For GPU we don't have
+ * C++17 everywhere so can't use it. */
+#ifdef __KERNEL_CPU__
+#  define IF_KERNEL_NODES_FEATURE(feature) \
+    if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#else
+#  define IF_KERNEL_NODES_FEATURE(feature) \
+    if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/color.h b/intern/cycles/kernel/util/color.h
new file mode 100644
index 00000000000..6d17647c9f8
--- /dev/null
+++ b/intern/cycles/kernel/util/color.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "util/color.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz)
+{
+  return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
+                     dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
+                     dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
+}
+
+ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c)
+{
+  return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/differential.h b/intern/cycles/kernel/util/differential.h
new file mode 100644
index 00000000000..17187083019
--- /dev/null
+++ b/intern/cycles/kernel/util/differential.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* See "Tracing Ray Differentials", Homan Igehy, 1999. */
+
+ccl_device void differential_transfer(ccl_private differential3 *surface_dP,
+                                      const differential3 ray_dP,
+                                      float3 ray_D,
+                                      const differential3 ray_dD,
+                                      float3 surface_Ng,
+                                      float ray_t)
+{
+  /* ray differential transfer through homogeneous medium, to
+   * compute dPdx/dy at a shading point from the incoming ray */
+
+  float3 tmp = ray_D / dot(ray_D, surface_Ng);
+  float3 tmpx = ray_dP.dx + ray_t * ray_dD.dx;
+  float3 tmpy = ray_dP.dy + ray_t * ray_dD.dy;
+
+  surface_dP->dx = tmpx - dot(tmpx, surface_Ng) * tmp;
+  surface_dP->dy = tmpy - dot(tmpy, surface_Ng) * tmp;
+}
+
+ccl_device void differential_incoming(ccl_private differential3 *dI, const differential3 dD)
+{
+  /* compute dIdx/dy at a shading point, we just need to negate the
+   * differential of the ray direction */
+
+  dI->dx = -dD.dx;
+  dI->dy = -dD.dy;
+}
+
+ccl_device void differential_dudv(ccl_private differential *du,
+                                  ccl_private differential *dv,
+                                  float3 dPdu,
+                                  float3 dPdv,
+                                  differential3 dP,
+                                  float3 Ng)
+{
+  /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
+   * from the primitive, we can compute dudx/dy and dvdx/dy. these are
+   * mainly used for differentials of arbitrary mesh attributes. */
+
+  /* find most stable axis to project to 2D */
+  float xn = fabsf(Ng.x);
+  float yn = fabsf(Ng.y);
+  float zn = fabsf(Ng.z);
+
+  if (zn < xn || zn < yn) {
+    if (yn < xn || yn < zn) {
+      dPdu.x = dPdu.y;
+      dPdv.x = dPdv.y;
+      dP.dx.x = dP.dx.y;
+      dP.dy.x = dP.dy.y;
+    }
+
+    dPdu.y = dPdu.z;
+    dPdv.y = dPdv.z;
+    dP.dx.y = dP.dx.z;
+    dP.dy.y = dP.dy.z;
+  }
+
+  /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system,
+   * and the same for dudy and dvdy. the denominator is the same for both
+   * solutions, so we compute it only once.
+   *
+   * dP.dx = dPdu * dudx + dPdv * dvdx;
+   * dP.dy = dPdu * dudy + dPdv * dvdy; */
+
+  float det = (dPdu.x * dPdv.y - dPdv.x * dPdu.y);
+
+  if (det != 0.0f)
+    det = 1.0f / det;
+
+  du->dx = (dP.dx.x * dPdv.y - dP.dx.y * dPdv.x) * det;
+  dv->dx = (dP.dx.y * dPdu.x - dP.dx.x * dPdu.y) * det;
+
+  du->dy = (dP.dy.x * dPdv.y - dP.dy.y * dPdv.x) * det;
+  dv->dy = (dP.dy.y * dPdu.x - dP.dy.x * dPdu.y) * det;
+}
+
+ccl_device differential differential_zero()
+{
+  differential d;
+  d.dx = 0.0f;
+  d.dy = 0.0f;
+
+  return d;
+}
+
+ccl_device differential3 differential3_zero()
+{
+  differential3 d;
+  d.dx = zero_float3();
+  d.dy = zero_float3();
+
+  return d;
+}
+
+/* Compact ray differentials that are just a scale to reduce memory usage and
+ * access cost in GPU.
+ *
+ * See above for more accurate reference implementations.
+ *
+ * TODO: also store the more compact version in ShaderData and recompute where
+ * needed? */
+
+ccl_device_forceinline float differential_zero_compact()
+{
+  return 0.0f;
+}
+
+ccl_device_forceinline float differential_make_compact(const differential3 D)
+{
+  return 0.5f * (len(D.dx) + len(D.dy));
+}
+
+ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP,
+                                                          const float ray_dP,
+                                                          const float3 /* ray_D */,
+                                                          const float ray_dD,
+                                                          const float3 surface_Ng,
+                                                          const float ray_t)
+{
+  /* ray differential transfer through homogeneous medium, to
+   * compute dPdx/dy at a shading point from the incoming ray */
+  float scale = ray_dP + ray_t * ray_dD;
+
+  float3 dx, dy;
+  make_orthonormals(surface_Ng, &dx, &dy);
+  surface_dP->dx = dx * scale;
+  surface_dP->dy = dy * scale;
+}
+
+ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI,
+                                                          const float3 D,
+                                                          const float dD)
+{
+  /* compute dIdx/dy at a shading point, we just need to negate the
+   * differential of the ray direction */
+
+  float3 dx, dy;
+  make_orthonormals(D, &dx, &dy);
+
+  dI->dx = dD * dx;
+  dI->dy = dD * dy;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/lookup_table.h b/intern/cycles/kernel/util/lookup_table.h
new file mode 100644
index 00000000000..2c26e668d7b
--- /dev/null
+++ b/intern/cycles/kernel/util/lookup_table.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Interpolated lookup table access */
+
+ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int size)
+{
+  x = saturate(x) * (size - 1);
+
+  int index = min(float_to_int(x), size - 1);
+  int nindex = min(index + 1, size - 1);
+  float t = x - index;
+
+  float data0 = kernel_tex_fetch(__lookup_table, index + offset);
+  if (t == 0.0f)
+    return data0;
+
+  float data1 = kernel_tex_fetch(__lookup_table, nindex + offset);
+  return (1.0f - t) * data0 + t * data1;
+}
+
+ccl_device float lookup_table_read_2D(
+    KernelGlobals kg, float x, float y, int offset, int xsize, int ysize)
+{
+  y = saturate(y) * (ysize - 1);
+
+  int index = min(float_to_int(y), ysize - 1);
+  int nindex = min(index + 1, ysize - 1);
+  float t = y - index;
+
+  float data0 = lookup_table_read(kg, x, offset + xsize * index, xsize);
+  if (t == 0.0f)
+    return data0;
+
+  float data1 = lookup_table_read(kg, x, offset + xsize * nindex, xsize);
+  return (1.0f - t) * data0 + t * data1;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/profiling.h b/intern/cycles/kernel/util/profiling.h
new file mode 100644
index 00000000000..12ce441ccbf
--- /dev/null
+++ b/intern/cycles/kernel/util/profiling.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifdef __KERNEL_CPU__
+#  include "util/profiling.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_CPU__
+#  define PROFILING_INIT(kg, event) \
+    ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event)
+#  define PROFILING_EVENT(event) profiling_helper.set_event(event)
+#  define PROFILING_INIT_FOR_SHADER(kg, event) \
+    ProfilingWithShaderHelper profiling_helper((ProfilingState *)&kg->profiler, event)
+#  define PROFILING_SHADER(object, shader) \
+    profiling_helper.set_shader(object, (shader)&SHADER_MASK);
+#else
+#  define PROFILING_INIT(kg, event)
+#  define PROFILING_EVENT(event)
+#  define PROFILING_INIT_FOR_SHADER(kg, event)
+#  define PROFILING_SHADER(object, shader)
+#endif /* __KERNEL_CPU__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/util_color.h b/intern/cycles/kernel/util/util_color.h
deleted file mode 100644
index 0d7bfecd5f3..00000000000
--- a/intern/cycles/kernel/util/util_color.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "util/util_color.h"
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz)
-{
-  return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz),
-                     dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz),
-                     dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
-}
-
-ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c)
-{
-  return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/util_differential.h b/intern/cycles/kernel/util/util_differential.h
deleted file mode 100644
index 17187083019..00000000000
--- a/intern/cycles/kernel/util/util_differential.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* See "Tracing Ray Differentials", Homan Igehy, 1999. */
-
-ccl_device void differential_transfer(ccl_private differential3 *surface_dP,
-                                      const differential3 ray_dP,
-                                      float3 ray_D,
-                                      const differential3 ray_dD,
-                                      float3 surface_Ng,
-                                      float ray_t)
-{
-  /* ray differential transfer through homogeneous medium, to
-   * compute dPdx/dy at a shading point from the incoming ray */
-
-  float3 tmp = ray_D / dot(ray_D, surface_Ng);
-  float3 tmpx = ray_dP.dx + ray_t * ray_dD.dx;
-  float3 tmpy = ray_dP.dy + ray_t * ray_dD.dy;
-
-  surface_dP->dx = tmpx - dot(tmpx, surface_Ng) * tmp;
-  surface_dP->dy = tmpy - dot(tmpy, surface_Ng) * tmp;
-}
-
-ccl_device void differential_incoming(ccl_private differential3 *dI, const differential3 dD)
-{
-  /* compute dIdx/dy at a shading point, we just need to negate the
-   * differential of the ray direction */
-
-  dI->dx = -dD.dx;
-  dI->dy = -dD.dy;
-}
-
-ccl_device void differential_dudv(ccl_private differential *du,
-                                  ccl_private differential *dv,
-                                  float3 dPdu,
-                                  float3 dPdv,
-                                  differential3 dP,
-                                  float3 Ng)
-{
-  /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv
-   * from the primitive, we can compute dudx/dy and dvdx/dy. these are
-   * mainly used for differentials of arbitrary mesh attributes. */
-
-  /* find most stable axis to project to 2D */
-  float xn = fabsf(Ng.x);
-  float yn = fabsf(Ng.y);
-  float zn = fabsf(Ng.z);
-
-  if (zn < xn || zn < yn) {
-    if (yn < xn || yn < zn) {
-      dPdu.x = dPdu.y;
-      dPdv.x = dPdv.y;
-      dP.dx.x = dP.dx.y;
-      dP.dy.x = dP.dy.y;
-    }
-
-    dPdu.y = dPdu.z;
-    dPdv.y = dPdv.z;
-    dP.dx.y = dP.dx.z;
-    dP.dy.y = dP.dy.z;
-  }
-
-  /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system,
-   * and the same for dudy and dvdy. the denominator is the same for both
-   * solutions, so we compute it only once.
-   *
-   * dP.dx = dPdu * dudx + dPdv * dvdx;
-   * dP.dy = dPdu * dudy + dPdv * dvdy; */
-
-  float det = (dPdu.x * dPdv.y - dPdv.x * dPdu.y);
-
-  if (det != 0.0f)
-    det = 1.0f / det;
-
-  du->dx = (dP.dx.x * dPdv.y - dP.dx.y * dPdv.x) * det;
-  dv->dx = (dP.dx.y * dPdu.x - dP.dx.x * dPdu.y) * det;
-
-  du->dy = (dP.dy.x * dPdv.y - dP.dy.y * dPdv.x) * det;
-  dv->dy = (dP.dy.y * dPdu.x - dP.dy.x * dPdu.y) * det;
-}
-
-ccl_device differential differential_zero()
-{
-  differential d;
-  d.dx = 0.0f;
-  d.dy = 0.0f;
-
-  return d;
-}
-
-ccl_device differential3 differential3_zero()
-{
-  differential3 d;
-  d.dx = zero_float3();
-  d.dy = zero_float3();
-
-  return d;
-}
-
-/* Compact ray differentials that are just a scale to reduce memory usage and
- * access cost in GPU.
- *
- * See above for more accurate reference implementations.
- *
- * TODO: also store the more compact version in ShaderData and recompute where
- * needed? */
-
-ccl_device_forceinline float differential_zero_compact()
-{
-  return 0.0f;
-}
-
-ccl_device_forceinline float differential_make_compact(const differential3 D)
-{
-  return 0.5f * (len(D.dx) + len(D.dy));
-}
-
-ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP,
-                                                          const float ray_dP,
-                                                          const float3 /* ray_D */,
-                                                          const float ray_dD,
-                                                          const float3 surface_Ng,
-                                                          const float ray_t)
-{
-  /* ray differential transfer through homogeneous medium, to
-   * compute dPdx/dy at a shading point from the incoming ray */
-  float scale = ray_dP + ray_t * ray_dD;
-
-  float3 dx, dy;
-  make_orthonormals(surface_Ng, &dx, &dy);
-  surface_dP->dx = dx * scale;
-  surface_dP->dy = dy * scale;
-}
-
-ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI,
-                                                          const float3 D,
-                                                          const float dD)
-{
-  /* compute dIdx/dy at a shading point, we just need to negate the
-   * differential of the ray direction */
-
-  float3 dx, dy;
-  make_orthonormals(D, &dx, &dy);
-
-  dI->dx = dD * dx;
-  dI->dy = dD * dy;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/util_lookup_table.h b/intern/cycles/kernel/util/util_lookup_table.h
deleted file mode 100644
index 2c26e668d7b..00000000000
--- a/intern/cycles/kernel/util/util_lookup_table.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-CCL_NAMESPACE_BEGIN
-
-/* Interpolated lookup table access */
-
-ccl_device float lookup_table_read(KernelGlobals kg, float x, int offset, int size)
-{
-  x = saturate(x) * (size - 1);
-
-  int index = min(float_to_int(x), size - 1);
-  int nindex = min(index + 1, size - 1);
-  float t = x - index;
-
-  float data0 = kernel_tex_fetch(__lookup_table, index + offset);
-  if (t == 0.0f)
-    return data0;
-
-  float data1 = kernel_tex_fetch(__lookup_table, nindex + offset);
-  return (1.0f - t) * data0 + t * data1;
-}
-
-ccl_device float lookup_table_read_2D(
-    KernelGlobals kg, float x, float y, int offset, int xsize, int ysize)
-{
-  y = saturate(y) * (ysize - 1);
-
-  int index = min(float_to_int(y), ysize - 1);
-  int nindex = min(index + 1, ysize - 1);
-  float t = y - index;
-
-  float data0 = lookup_table_read(kg, x, offset + xsize * index, xsize);
-  if (t == 0.0f)
-    return data0;
-
-  float data1 = lookup_table_read(kg, x, offset + xsize * nindex, xsize);
-  return (1.0f - t) * data0 + t * data1;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/util_profiling.h b/intern/cycles/kernel/util/util_profiling.h
deleted file mode 100644
index db8644005ea..00000000000
--- a/intern/cycles/kernel/util/util_profiling.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef __KERNEL_CPU__
-#  include "util/util_profiling.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_CPU__
-#  define PROFILING_INIT(kg, event) \
-    ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event)
-#  define PROFILING_EVENT(event) profiling_helper.set_event(event)
-#  define PROFILING_INIT_FOR_SHADER(kg, event) \
-    ProfilingWithShaderHelper profiling_helper((ProfilingState *)&kg->profiler, event)
-#  define PROFILING_SHADER(object, shader) \
-    profiling_helper.set_shader(object, (shader)&SHADER_MASK);
-#else
-#  define PROFILING_INIT(kg, event)
-#  define PROFILING_EVENT(event)
-#  define PROFILING_INIT_FOR_SHADER(kg, event)
-#  define PROFILING_SHADER(object, shader)
-#endif /* __KERNEL_CPU__ */
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/scene/alembic.cpp b/intern/cycles/scene/alembic.cpp
index 07a969e88d3..39b5f467736 100644
--- a/intern/cycles/scene/alembic.cpp
+++ b/intern/cycles/scene/alembic.cpp
@@ -24,11 +24,11 @@
 #include "scene/scene.h"
 #include "scene/shader.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_transform.h"
-#include "util/util_vector.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/transform.h"
+#include "util/vector.h"
 
 #ifdef WITH_ALEMBIC
 
diff --git a/intern/cycles/scene/alembic.h b/intern/cycles/scene/alembic.h
index 9aeef273910..77aafd0ab32 100644
--- a/intern/cycles/scene/alembic.h
+++ b/intern/cycles/scene/alembic.h
@@ -19,9 +19,9 @@
 #include "graph/node.h"
 #include "scene/attribute.h"
 #include "scene/procedural.h"
-#include "util/util_set.h"
-#include "util/util_transform.h"
-#include "util/util_vector.h"
+#include "util/set.h"
+#include "util/transform.h"
+#include "util/vector.h"
 
 #ifdef WITH_ALEMBIC
 
diff --git a/intern/cycles/scene/alembic_read.cpp b/intern/cycles/scene/alembic_read.cpp
index 1ce64d9ee41..35f4854127a 100644
--- a/intern/cycles/scene/alembic_read.cpp
+++ b/intern/cycles/scene/alembic_read.cpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "scene/alembic.h"
 #include "scene/alembic_read.h"
+#include "scene/alembic.h"
 #include "scene/mesh.h"
 
-#include "util/util_color.h"
-#include "util/util_progress.h"
+#include "util/color.h"
+#include "util/progress.h"
 
 #ifdef WITH_ALEMBIC
 
diff --git a/intern/cycles/scene/alembic_read.h b/intern/cycles/scene/alembic_read.h
index 9cc8622a1ba..6b656b59481 100644
--- a/intern/cycles/scene/alembic_read.h
+++ b/intern/cycles/scene/alembic_read.h
@@ -21,7 +21,7 @@
 #  include <Alembic/AbcCoreFactory/All.h>
 #  include <Alembic/AbcGeom/All.h>
 
-#  include "util/util_vector.h"
+#  include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/attribute.cpp b/intern/cycles/scene/attribute.cpp
index 0c440fb9fd1..3401eea307f 100644
--- a/intern/cycles/scene/attribute.cpp
+++ b/intern/cycles/scene/attribute.cpp
@@ -19,9 +19,9 @@
 #include "scene/image.h"
 #include "scene/mesh.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_transform.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/transform.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/attribute.h b/intern/cycles/scene/attribute.h
index 9af3dcaee26..4a25a900c14 100644
--- a/intern/cycles/scene/attribute.h
+++ b/intern/cycles/scene/attribute.h
@@ -19,13 +19,13 @@
 
 #include "scene/image.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "util/util_list.h"
-#include "util/util_param.h"
-#include "util/util_set.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/list.h"
+#include "util/param.h"
+#include "util/set.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/background.cpp b/intern/cycles/scene/background.cpp
index db5be885538..72dccc6f9a8 100644
--- a/intern/cycles/scene/background.cpp
+++ b/intern/cycles/scene/background.cpp
@@ -23,10 +23,10 @@
 #include "scene/shader_nodes.h"
 #include "scene/stats.h"
 
-#include "util/util_foreach.h"
-#include "util/util_math.h"
-#include "util/util_time.h"
-#include "util/util_types.h"
+#include "util/foreach.h"
+#include "util/math.h"
+#include "util/time.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/background.h b/intern/cycles/scene/background.h
index 2f7ef0f7737..31f15d09749 100644
--- a/intern/cycles/scene/background.h
+++ b/intern/cycles/scene/background.h
@@ -19,7 +19,7 @@
 
 #include "graph/node.h"
 
-#include "util/util_types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/bake.cpp b/intern/cycles/scene/bake.cpp
index 86c5c4c02af..90c9e0e4ae8 100644
--- a/intern/cycles/scene/bake.cpp
+++ b/intern/cycles/scene/bake.cpp
@@ -22,7 +22,7 @@
 #include "scene/stats.h"
 #include "session/buffers.h"
 
-#include "util/util_foreach.h"
+#include "util/foreach.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/bake.h b/intern/cycles/scene/bake.h
index 044383d2d43..370cc20ae4f 100644
--- a/intern/cycles/scene/bake.h
+++ b/intern/cycles/scene/bake.h
@@ -20,8 +20,8 @@
 #include "device/device.h"
 #include "scene/scene.h"
 
-#include "util/util_progress.h"
-#include "util/util_vector.h"
+#include "util/progress.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/camera.cpp b/intern/cycles/scene/camera.cpp
index 1e78f8dd36f..5877b82ead5 100644
--- a/intern/cycles/scene/camera.cpp
+++ b/intern/cycles/scene/camera.cpp
@@ -23,13 +23,13 @@
 
 #include "device/device.h"
 
-#include "util/util_foreach.h"
-#include "util/util_function.h"
-#include "util/util_logging.h"
-#include "util/util_math_cdf.h"
-#include "util/util_task.h"
-#include "util/util_time.h"
-#include "util/util_vector.h"
+#include "util/foreach.h"
+#include "util/function.h"
+#include "util/log.h"
+#include "util/math_cdf.h"
+#include "util/task.h"
+#include "util/time.h"
+#include "util/vector.h"
 
 /* needed for calculating differentials */
 #include "kernel/device/cpu/compat.h"
diff --git a/intern/cycles/scene/camera.h b/intern/cycles/scene/camera.h
index cb8ecac1a7e..58e39599267 100644
--- a/intern/cycles/scene/camera.h
+++ b/intern/cycles/scene/camera.h
@@ -17,15 +17,15 @@
 #ifndef __CAMERA_H__
 #define __CAMERA_H__
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 #include "graph/node.h"
 
-#include "util/util_array.h"
-#include "util/util_boundbox.h"
-#include "util/util_projection.h"
-#include "util/util_transform.h"
-#include "util/util_types.h"
+#include "util/array.h"
+#include "util/boundbox.h"
+#include "util/projection.h"
+#include "util/transform.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/colorspace.cpp b/intern/cycles/scene/colorspace.cpp
index 8584a6f5dd7..c1a308fcbaa 100644
--- a/intern/cycles/scene/colorspace.cpp
+++ b/intern/cycles/scene/colorspace.cpp
@@ -16,13 +16,13 @@
 
 #include "scene/colorspace.h"
 
-#include "util/util_color.h"
-#include "util/util_half.h"
-#include "util/util_image.h"
-#include "util/util_logging.h"
-#include "util/util_math.h"
-#include "util/util_thread.h"
-#include "util/util_vector.h"
+#include "util/color.h"
+#include "util/half.h"
+#include "util/image.h"
+#include "util/log.h"
+#include "util/math.h"
+#include "util/thread.h"
+#include "util/vector.h"
 
 #ifdef WITH_OCIO
 #  include <OpenColorIO/OpenColorIO.h>
diff --git a/intern/cycles/scene/colorspace.h b/intern/cycles/scene/colorspace.h
index 51d0b121cc0..7f7bc604f07 100644
--- a/intern/cycles/scene/colorspace.h
+++ b/intern/cycles/scene/colorspace.h
@@ -17,8 +17,8 @@
 #ifndef __COLORSPACE_H__
 #define __COLORSPACE_H__
 
-#include "util/util_map.h"
-#include "util/util_param.h"
+#include "util/map.h"
+#include "util/param.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/constant_fold.cpp b/intern/cycles/scene/constant_fold.cpp
index b2a17198c93..ca065e3f678 100644
--- a/intern/cycles/scene/constant_fold.cpp
+++ b/intern/cycles/scene/constant_fold.cpp
@@ -17,8 +17,8 @@
 #include "scene/constant_fold.h"
 #include "scene/shader_graph.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
+#include "util/foreach.h"
+#include "util/log.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/constant_fold.h b/intern/cycles/scene/constant_fold.h
index fec4123c361..36b249920d0 100644
--- a/intern/cycles/scene/constant_fold.h
+++ b/intern/cycles/scene/constant_fold.h
@@ -17,8 +17,8 @@
 #ifndef __CONSTANT_FOLD_H__
 #define __CONSTANT_FOLD_H__
 
-#include "kernel/svm/svm_types.h"
-#include "util/util_types.h"
+#include "kernel/svm/types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/curves.cpp b/intern/cycles/scene/curves.cpp
index 6e45905d367..7863ce6c666 100644
--- a/intern/cycles/scene/curves.cpp
+++ b/intern/cycles/scene/curves.cpp
@@ -20,10 +20,10 @@
 #include "scene/object.h"
 #include "scene/scene.h"
 
-#include "util/util_foreach.h"
-#include "util/util_map.h"
-#include "util/util_progress.h"
-#include "util/util_vector.h"
+#include "util/foreach.h"
+#include "util/map.h"
+#include "util/progress.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/curves.h b/intern/cycles/scene/curves.h
index 9b0e2a29977..3076f4291f5 100644
--- a/intern/cycles/scene/curves.h
+++ b/intern/cycles/scene/curves.h
@@ -17,8 +17,8 @@
 #ifndef __CURVES_H__
 #define __CURVES_H__
 
-#include "util/util_array.h"
-#include "util/util_types.h"
+#include "util/array.h"
+#include "util/types.h"
 
 #include "scene/hair.h"
 
diff --git a/intern/cycles/scene/film.cpp b/intern/cycles/scene/film.cpp
index 3f91e0321b2..b6480fa64f1 100644
--- a/intern/cycles/scene/film.cpp
+++ b/intern/cycles/scene/film.cpp
@@ -26,11 +26,11 @@
 #include "scene/stats.h"
 #include "scene/tables.h"
 
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_math.h"
-#include "util/util_math_cdf.h"
-#include "util/util_time.h"
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/math.h"
+#include "util/math_cdf.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/film.h b/intern/cycles/scene/film.h
index ede0d6298a6..5207c5e62b5 100644
--- a/intern/cycles/scene/film.h
+++ b/intern/cycles/scene/film.h
@@ -18,10 +18,10 @@
 #define __FILM_H__
 
 #include "scene/pass.h"
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 #include "graph/node.h"
 
diff --git a/intern/cycles/scene/geometry.cpp b/intern/cycles/scene/geometry.cpp
index 9fe34ac6e99..5141e1f8358 100644
--- a/intern/cycles/scene/geometry.cpp
+++ b/intern/cycles/scene/geometry.cpp
@@ -32,15 +32,15 @@
 #include "scene/stats.h"
 #include "scene/volume.h"
 
-#include "subd/subd_patch_table.h"
-#include "subd/subd_split.h"
+#include "subd/patch_table.h"
+#include "subd/split.h"
 
-#include "kernel/osl/osl_globals.h"
+#include "kernel/osl/globals.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_task.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/task.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/geometry.h b/intern/cycles/scene/geometry.h
index 8133132229e..335bcdcd0b7 100644
--- a/intern/cycles/scene/geometry.h
+++ b/intern/cycles/scene/geometry.h
@@ -19,15 +19,15 @@
 
 #include "graph/node.h"
 
-#include "bvh/bvh_params.h"
+#include "bvh/params.h"
 
 #include "scene/attribute.h"
 
-#include "util/util_boundbox.h"
-#include "util/util_set.h"
-#include "util/util_transform.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/boundbox.h"
+#include "util/set.h"
+#include "util/transform.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/hair.cpp b/intern/cycles/scene/hair.cpp
index 2390da5bf88..2951a609ae9 100644
--- a/intern/cycles/scene/hair.cpp
+++ b/intern/cycles/scene/hair.cpp
@@ -23,7 +23,7 @@
 
 #include "integrator/shader_eval.h"
 
-#include "util/util_progress.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/image.cpp b/intern/cycles/scene/image.cpp
index ac85cf5185e..80091e01b8c 100644
--- a/intern/cycles/scene/image.cpp
+++ b/intern/cycles/scene/image.cpp
@@ -22,15 +22,15 @@
 #include "scene/scene.h"
 #include "scene/stats.h"
 
-#include "util/util_foreach.h"
-#include "util/util_image.h"
-#include "util/util_image_impl.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
-#include "util/util_progress.h"
-#include "util/util_task.h"
-#include "util/util_texture.h"
-#include "util/util_unique_ptr.h"
+#include "util/foreach.h"
+#include "util/image.h"
+#include "util/image_impl.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/progress.h"
+#include "util/task.h"
+#include "util/texture.h"
+#include "util/unique_ptr.h"
 
 #ifdef WITH_OSL
 #  include <OSL/oslexec.h>
diff --git a/intern/cycles/scene/image.h b/intern/cycles/scene/image.h
index 0c0bbff170a..6447b028ebf 100644
--- a/intern/cycles/scene/image.h
+++ b/intern/cycles/scene/image.h
@@ -17,15 +17,15 @@
 #ifndef __IMAGE_H__
 #define __IMAGE_H__
 
-#include "device/device_memory.h"
+#include "device/memory.h"
 
 #include "scene/colorspace.h"
 
-#include "util/util_string.h"
-#include "util/util_thread.h"
-#include "util/util_transform.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/thread.h"
+#include "util/transform.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/image_oiio.cpp b/intern/cycles/scene/image_oiio.cpp
index 256a7aeb7d4..feafae035a1 100644
--- a/intern/cycles/scene/image_oiio.cpp
+++ b/intern/cycles/scene/image_oiio.cpp
@@ -16,9 +16,9 @@
 
 #include "scene/image_oiio.h"
 
-#include "util/util_image.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
+#include "util/image.h"
+#include "util/log.h"
+#include "util/path.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/image_sky.cpp b/intern/cycles/scene/image_sky.cpp
index cd8b8d0d991..4f0877aeb99 100644
--- a/intern/cycles/scene/image_sky.cpp
+++ b/intern/cycles/scene/image_sky.cpp
@@ -18,10 +18,10 @@
 
 #include "sky_model.h"
 
-#include "util/util_image.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
-#include "util/util_task.h"
+#include "util/image.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/task.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/image_vdb.cpp b/intern/cycles/scene/image_vdb.cpp
index 466df82dd73..d3315670390 100644
--- a/intern/cycles/scene/image_vdb.cpp
+++ b/intern/cycles/scene/image_vdb.cpp
@@ -16,8 +16,8 @@
 
 #include "scene/image_vdb.h"
 
-#include "util/util_logging.h"
-#include "util/util_openvdb.h"
+#include "util/log.h"
+#include "util/openvdb.h"
 
 #ifdef WITH_OPENVDB
 #  include <openvdb/tools/Dense.h>
diff --git a/intern/cycles/scene/integrator.cpp b/intern/cycles/scene/integrator.cpp
index 5bf82898958..3e795b30e7f 100644
--- a/intern/cycles/scene/integrator.cpp
+++ b/intern/cycles/scene/integrator.cpp
@@ -27,13 +27,13 @@
 #include "scene/sobol.h"
 #include "scene/stats.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_task.h"
-#include "util/util_time.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/task.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/integrator.h b/intern/cycles/scene/integrator.h
index 468971986c5..c380203f4f3 100644
--- a/intern/cycles/scene/integrator.h
+++ b/intern/cycles/scene/integrator.h
@@ -17,9 +17,9 @@
 #ifndef __INTEGRATOR_H__
 #define __INTEGRATOR_H__
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "device/device_denoise.h" /* For the parameters and type enum. */
+#include "device/denoise.h" /* For the parameters and type enum. */
 #include "graph/node.h"
 #include "integrator/adaptive_sampling.h"
 
diff --git a/intern/cycles/scene/jitter.h b/intern/cycles/scene/jitter.h
index ed34c7a4f4d..756e4a1de78 100644
--- a/intern/cycles/scene/jitter.h
+++ b/intern/cycles/scene/jitter.h
@@ -17,7 +17,7 @@
 #ifndef __JITTER_H__
 #define __JITTER_H__
 
-#include "util/util_types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp
index 26f208d58e5..83e531f42ef 100644
--- a/intern/cycles/scene/light.cpp
+++ b/intern/cycles/scene/light.cpp
@@ -30,12 +30,12 @@
 
 #include "integrator/shader_eval.h"
 
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
-#include "util/util_progress.h"
-#include "util/util_task.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/progress.h"
+#include "util/task.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/light.h b/intern/cycles/scene/light.h
index 9820508d3a5..97ec9792860 100644
--- a/intern/cycles/scene/light.h
+++ b/intern/cycles/scene/light.h
@@ -17,7 +17,7 @@
 #ifndef __LIGHT_H__
 #define __LIGHT_H__
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 #include "graph/node.h"
 
@@ -25,10 +25,10 @@
  * the right Node::set overload as it does not know that Shader is a Node */
 #include "scene/shader.h"
 
-#include "util/util_ies.h"
-#include "util/util_thread.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/ies.h"
+#include "util/thread.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/mesh.cpp b/intern/cycles/scene/mesh.cpp
index 1ed0eb4c30a..f47dab30869 100644
--- a/intern/cycles/scene/mesh.cpp
+++ b/intern/cycles/scene/mesh.cpp
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
+#include "bvh/build.h"
 #include "bvh/bvh.h"
-#include "bvh/bvh_build.h"
 
 #include "device/device.h"
 
@@ -25,13 +25,13 @@
 #include "scene/scene.h"
 #include "scene/shader_graph.h"
 
-#include "subd/subd_patch_table.h"
-#include "subd/subd_split.h"
+#include "subd/patch_table.h"
+#include "subd/split.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_set.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/set.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/mesh.h b/intern/cycles/scene/mesh.h
index bc549fa55fa..d13b3003164 100644
--- a/intern/cycles/scene/mesh.h
+++ b/intern/cycles/scene/mesh.h
@@ -19,19 +19,19 @@
 
 #include "graph/node.h"
 
-#include "bvh/bvh_params.h"
+#include "bvh/params.h"
 #include "scene/attribute.h"
 #include "scene/geometry.h"
 #include "scene/shader.h"
 
-#include "util/util_array.h"
-#include "util/util_boundbox.h"
-#include "util/util_list.h"
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_set.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/array.h"
+#include "util/boundbox.h"
+#include "util/list.h"
+#include "util/map.h"
+#include "util/param.h"
+#include "util/set.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/mesh_displace.cpp b/intern/cycles/scene/mesh_displace.cpp
index c673d79f8fe..e69c2d1c3be 100644
--- a/intern/cycles/scene/mesh_displace.cpp
+++ b/intern/cycles/scene/mesh_displace.cpp
@@ -23,10 +23,10 @@
 #include "scene/scene.h"
 #include "scene/shader.h"
 
-#include "util/util_foreach.h"
-#include "util/util_map.h"
-#include "util/util_progress.h"
-#include "util/util_set.h"
+#include "util/foreach.h"
+#include "util/map.h"
+#include "util/progress.h"
+#include "util/set.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/mesh_subdivision.cpp b/intern/cycles/scene/mesh_subdivision.cpp
index 2b27d4b3b2a..a0c0bc68f8b 100644
--- a/intern/cycles/scene/mesh_subdivision.cpp
+++ b/intern/cycles/scene/mesh_subdivision.cpp
@@ -18,13 +18,13 @@
 #include "scene/camera.h"
 #include "scene/mesh.h"
 
-#include "subd/subd_patch.h"
-#include "subd/subd_patch_table.h"
-#include "subd/subd_split.h"
+#include "subd/patch.h"
+#include "subd/patch_table.h"
+#include "subd/split.h"
 
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/hash.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/object.cpp b/intern/cycles/scene/object.cpp
index 8b0cc752049..69a2365f17c 100644
--- a/intern/cycles/scene/object.cpp
+++ b/intern/cycles/scene/object.cpp
@@ -27,16 +27,16 @@
 #include "scene/stats.h"
 #include "scene/volume.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_murmurhash.h"
-#include "util/util_progress.h"
-#include "util/util_set.h"
-#include "util/util_task.h"
-#include "util/util_vector.h"
-
-#include "subd/subd_patch_table.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/map.h"
+#include "util/murmurhash.h"
+#include "util/progress.h"
+#include "util/set.h"
+#include "util/task.h"
+#include "util/vector.h"
+
+#include "subd/patch_table.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/object.h b/intern/cycles/scene/object.h
index d3909bb2b03..f6dc57ee8b9 100644
--- a/intern/cycles/scene/object.h
+++ b/intern/cycles/scene/object.h
@@ -25,13 +25,13 @@
 #include "scene/particles.h"
 #include "scene/scene.h"
 
-#include "util/util_array.h"
-#include "util/util_boundbox.h"
-#include "util/util_param.h"
-#include "util/util_thread.h"
-#include "util/util_transform.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "util/array.h"
+#include "util/boundbox.h"
+#include "util/param.h"
+#include "util/thread.h"
+#include "util/transform.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp
index c8ab83ab781..09626cb48bb 100644
--- a/intern/cycles/scene/osl.cpp
+++ b/intern/cycles/scene/osl.cpp
@@ -28,17 +28,17 @@
 
 #ifdef WITH_OSL
 
-#  include "kernel/osl/osl_globals.h"
-#  include "kernel/osl/osl_services.h"
-#  include "kernel/osl/osl_shader.h"
-
-#  include "util/util_aligned_malloc.h"
-#  include "util/util_foreach.h"
-#  include "util/util_logging.h"
-#  include "util/util_md5.h"
-#  include "util/util_path.h"
-#  include "util/util_progress.h"
-#  include "util/util_projection.h"
+#  include "kernel/osl/globals.h"
+#  include "kernel/osl/services.h"
+#  include "kernel/osl/shader.h"
+
+#  include "util/aligned_malloc.h"
+#  include "util/foreach.h"
+#  include "util/log.h"
+#  include "util/md5.h"
+#  include "util/path.h"
+#  include "util/progress.h"
+#  include "util/projection.h"
 
 #endif
 
diff --git a/intern/cycles/scene/osl.h b/intern/cycles/scene/osl.h
index 4161fe6ed67..d54040e1047 100644
--- a/intern/cycles/scene/osl.h
+++ b/intern/cycles/scene/osl.h
@@ -17,10 +17,10 @@
 #ifndef __OSL_H__
 #define __OSL_H__
 
-#include "util/util_array.h"
-#include "util/util_set.h"
-#include "util/util_string.h"
-#include "util/util_thread.h"
+#include "util/array.h"
+#include "util/set.h"
+#include "util/string.h"
+#include "util/thread.h"
 
 #include "scene/shader.h"
 #include "scene/shader_graph.h"
diff --git a/intern/cycles/scene/particles.cpp b/intern/cycles/scene/particles.cpp
index 8041c57ba02..92381171082 100644
--- a/intern/cycles/scene/particles.cpp
+++ b/intern/cycles/scene/particles.cpp
@@ -19,12 +19,12 @@
 #include "scene/scene.h"
 #include "scene/stats.h"
 
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_map.h"
-#include "util/util_progress.h"
-#include "util/util_vector.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/map.h"
+#include "util/progress.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/particles.h b/intern/cycles/scene/particles.h
index 8b59756f148..b958d12e4e3 100644
--- a/intern/cycles/scene/particles.h
+++ b/intern/cycles/scene/particles.h
@@ -17,8 +17,8 @@
 #ifndef __PARTICLES_H__
 #define __PARTICLES_H__
 
-#include "util/util_array.h"
-#include "util/util_types.h"
+#include "util/array.h"
+#include "util/types.h"
 
 #include "graph/node.h"
 
diff --git a/intern/cycles/scene/pass.cpp b/intern/cycles/scene/pass.cpp
index ee770ac8e58..791101e0940 100644
--- a/intern/cycles/scene/pass.cpp
+++ b/intern/cycles/scene/pass.cpp
@@ -16,8 +16,8 @@
 
 #include "scene/pass.h"
 
-#include "util/util_algorithm.h"
-#include "util/util_logging.h"
+#include "util/algorithm.h"
+#include "util/log.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/pass.h b/intern/cycles/scene/pass.h
index 82230c62cb0..7da07cfa562 100644
--- a/intern/cycles/scene/pass.h
+++ b/intern/cycles/scene/pass.h
@@ -18,10 +18,10 @@
 
 #include <ostream>  // NOLINT
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
 #include "graph/node.h"
 
diff --git a/intern/cycles/scene/procedural.cpp b/intern/cycles/scene/procedural.cpp
index abfc6c62ad4..f038c8b1023 100644
--- a/intern/cycles/scene/procedural.cpp
+++ b/intern/cycles/scene/procedural.cpp
@@ -14,13 +14,12 @@
  * limitations under the License.
  */
 
-#include "procedural.h"
-
+#include "scene/procedural.h"
 #include "scene/scene.h"
 #include "scene/stats.h"
 
-#include "util/util_foreach.h"
-#include "util/util_progress.h"
+#include "util/foreach.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp
index bc5737ec126..8cde4873eab 100644
--- a/intern/cycles/scene/scene.cpp
+++ b/intern/cycles/scene/scene.cpp
@@ -38,10 +38,10 @@
 #include "scene/volume.h"
 #include "session/session.h"
 
-#include "util/util_foreach.h"
-#include "util/util_guarded_allocator.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
+#include "util/foreach.h"
+#include "util/guarded_allocator.h"
+#include "util/log.h"
+#include "util/progress.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/scene.h b/intern/cycles/scene/scene.h
index 9b2502c9361..fa7fc54602a 100644
--- a/intern/cycles/scene/scene.h
+++ b/intern/cycles/scene/scene.h
@@ -17,22 +17,22 @@
 #ifndef __SCENE_H__
 #define __SCENE_H__
 
-#include "bvh/bvh_params.h"
+#include "bvh/params.h"
 
 #include "scene/film.h"
 #include "scene/image.h"
 #include "scene/shader.h"
 
 #include "device/device.h"
-#include "device/device_memory.h"
-
-#include "util/util_param.h"
-#include "util/util_string.h"
-#include "util/util_system.h"
-#include "util/util_texture.h"
-#include "util/util_thread.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "device/memory.h"
+
+#include "util/param.h"
+#include "util/string.h"
+#include "util/system.h"
+#include "util/texture.h"
+#include "util/thread.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp
index 6b464a77401..0b286aba9cf 100644
--- a/intern/cycles/scene/shader.cpp
+++ b/intern/cycles/scene/shader.cpp
@@ -32,10 +32,10 @@
 #include "scene/svm.h"
 #include "scene/tables.h"
 
-#include "util/util_foreach.h"
-#include "util/util_murmurhash.h"
-#include "util/util_task.h"
-#include "util/util_transform.h"
+#include "util/foreach.h"
+#include "util/murmurhash.h"
+#include "util/task.h"
+#include "util/transform.h"
 
 #ifdef WITH_OCIO
 #  include <OpenColorIO/OpenColorIO.h>
diff --git a/intern/cycles/scene/shader.h b/intern/cycles/scene/shader.h
index 7ef3bda15d7..e9d26412ae8 100644
--- a/intern/cycles/scene/shader.h
+++ b/intern/cycles/scene/shader.h
@@ -19,20 +19,20 @@
 
 #ifdef WITH_OSL
 /* So no context pollution happens from indirectly included windows.h */
-#  include "util/util_windows.h"
+#  include "util/windows.h"
 #  include <OSL/oslexec.h>
 #endif
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 #include "scene/attribute.h"
 
 #include "graph/node.h"
 
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_string.h"
-#include "util/util_thread.h"
-#include "util/util_types.h"
+#include "util/map.h"
+#include "util/param.h"
+#include "util/string.h"
+#include "util/thread.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/shader_graph.cpp b/intern/cycles/scene/shader_graph.cpp
index 116d8335ef8..f99dfa141f6 100644
--- a/intern/cycles/scene/shader_graph.cpp
+++ b/intern/cycles/scene/shader_graph.cpp
@@ -21,11 +21,11 @@
 #include "scene/shader.h"
 #include "scene/shader_nodes.h"
 
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_md5.h"
-#include "util/util_queue.h"
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/md5.h"
+#include "util/queue.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/shader_graph.h b/intern/cycles/scene/shader_graph.h
index 3584754fad1..8b525a7ec0b 100644
--- a/intern/cycles/scene/shader_graph.h
+++ b/intern/cycles/scene/shader_graph.h
@@ -20,14 +20,14 @@
 #include "graph/node.h"
 #include "graph/node_type.h"
 
-#include "kernel/kernel_types.h"
-
-#include "util/util_list.h"
-#include "util/util_map.h"
-#include "util/util_param.h"
-#include "util/util_set.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
+#include "kernel/types.h"
+
+#include "util/list.h"
+#include "util/map.h"
+#include "util/param.h"
+#include "util/set.h"
+#include "util/types.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp
index d7fc7ae1c27..14d051350fb 100644
--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@@ -29,15 +29,15 @@
 
 #include "sky_model.h"
 
-#include "util/util_color.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_transform.h"
-
-#include "kernel/svm/svm_color_util.h"
-#include "kernel/svm/svm_mapping_util.h"
-#include "kernel/svm/svm_math_util.h"
-#include "kernel/svm/svm_ramp_util.h"
+#include "util/color.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/transform.h"
+
+#include "kernel/svm/color_util.h"
+#include "kernel/svm/mapping_util.h"
+#include "kernel/svm/math_util.h"
+#include "kernel/svm/ramp_util.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/shader_nodes.h b/intern/cycles/scene/shader_nodes.h
index b8a439fa9b9..64a2b1c7843 100644
--- a/intern/cycles/scene/shader_nodes.h
+++ b/intern/cycles/scene/shader_nodes.h
@@ -21,8 +21,8 @@
 #include "scene/image.h"
 #include "scene/shader_graph.h"
 
-#include "util/util_array.h"
-#include "util/util_string.h"
+#include "util/array.h"
+#include "util/string.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/sobol.cpp b/intern/cycles/scene/sobol.cpp
index 397c28814ca..09d10c3660e 100644
--- a/intern/cycles/scene/sobol.cpp
+++ b/intern/cycles/scene/sobol.cpp
@@ -45,13 +45,13 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "util/util_types.h"
+#include "util/types.h"
 
 #include "scene/sobol.h"
 
 CCL_NAMESPACE_BEGIN
 
-#include "sobol.tables"
+#include "scene/sobol.tables"
 
 void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions)
 {
diff --git a/intern/cycles/scene/sobol.h b/intern/cycles/scene/sobol.h
index d38857d2b35..86b2a1616b8 100644
--- a/intern/cycles/scene/sobol.h
+++ b/intern/cycles/scene/sobol.h
@@ -17,7 +17,7 @@
 #ifndef __SOBOL_H__
 #define __SOBOL_H__
 
-#include "util/util_types.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/stats.cpp b/intern/cycles/scene/stats.cpp
index 5c3cff232f4..e2b00d16593 100644
--- a/intern/cycles/scene/stats.cpp
+++ b/intern/cycles/scene/stats.cpp
@@ -16,9 +16,9 @@
 
 #include "scene/stats.h"
 #include "scene/object.h"
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_string.h"
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/string.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/stats.h b/intern/cycles/scene/stats.h
index ffcc4d55235..d9095acc4c9 100644
--- a/intern/cycles/scene/stats.h
+++ b/intern/cycles/scene/stats.h
@@ -19,9 +19,9 @@
 
 #include "scene/scene.h"
 
-#include "util/util_stats.h"
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/stats.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/svm.cpp b/intern/cycles/scene/svm.cpp
index b0b7fb605d1..6da0df302ad 100644
--- a/intern/cycles/scene/svm.cpp
+++ b/intern/cycles/scene/svm.cpp
@@ -26,10 +26,10 @@
 #include "scene/stats.h"
 #include "scene/svm.h"
 
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_progress.h"
-#include "util/util_task.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/progress.h"
+#include "util/task.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/svm.h b/intern/cycles/scene/svm.h
index a3d215218fa..edfd71040e4 100644
--- a/intern/cycles/scene/svm.h
+++ b/intern/cycles/scene/svm.h
@@ -21,10 +21,10 @@
 #include "scene/shader.h"
 #include "scene/shader_graph.h"
 
-#include "util/util_array.h"
-#include "util/util_set.h"
-#include "util/util_string.h"
-#include "util/util_thread.h"
+#include "util/array.h"
+#include "util/set.h"
+#include "util/string.h"
+#include "util/thread.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/tables.cpp b/intern/cycles/scene/tables.cpp
index 39edc5d89cd..3544fea67d6 100644
--- a/intern/cycles/scene/tables.cpp
+++ b/intern/cycles/scene/tables.cpp
@@ -19,8 +19,8 @@
 #include "scene/scene.h"
 #include "scene/stats.h"
 
-#include "util/util_logging.h"
-#include "util/util_time.h"
+#include "util/log.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/tables.h b/intern/cycles/scene/tables.h
index de538e2af78..3e52544d1fb 100644
--- a/intern/cycles/scene/tables.h
+++ b/intern/cycles/scene/tables.h
@@ -17,8 +17,8 @@
 #ifndef __TABLES_H__
 #define __TABLES_H__
 
-#include "util/util_list.h"
-#include "util/util_vector.h"
+#include "util/list.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/scene/volume.cpp b/intern/cycles/scene/volume.cpp
index 757388a4491..509d0ecedf7 100644
--- a/intern/cycles/scene/volume.cpp
+++ b/intern/cycles/scene/volume.cpp
@@ -25,12 +25,12 @@
 #  include <openvdb/tools/Morphology.h>
 #endif
 
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_logging.h"
-#include "util/util_openvdb.h"
-#include "util/util_progress.h"
-#include "util/util_types.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/log.h"
+#include "util/openvdb.h"
+#include "util/progress.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/buffers.cpp b/intern/cycles/session/buffers.cpp
index 439c0f826ea..51d9c1e5d8f 100644
--- a/intern/cycles/session/buffers.cpp
+++ b/intern/cycles/session/buffers.cpp
@@ -19,11 +19,11 @@
 #include "device/device.h"
 #include "session/buffers.h"
 
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_math.h"
-#include "util/util_time.h"
-#include "util/util_types.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/math.h"
+#include "util/time.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/buffers.h b/intern/cycles/session/buffers.h
index 4c261430bb6..67022bb5b6b 100644
--- a/intern/cycles/session/buffers.h
+++ b/intern/cycles/session/buffers.h
@@ -17,16 +17,16 @@
 #ifndef __BUFFERS_H__
 #define __BUFFERS_H__
 
-#include "device/device_memory.h"
+#include "device/memory.h"
 #include "graph/node.h"
 #include "scene/pass.h"
 
-#include "kernel/kernel_types.h"
+#include "kernel/types.h"
 
-#include "util/util_half.h"
-#include "util/util_string.h"
-#include "util/util_thread.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/string.h"
+#include "util/thread.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/display_driver.h b/intern/cycles/session/display_driver.h
index 85f305034d7..77f89326fd0 100644
--- a/intern/cycles/session/display_driver.h
+++ b/intern/cycles/session/display_driver.h
@@ -16,8 +16,8 @@
 
 #pragma once
 
-#include "util/util_half.h"
-#include "util/util_types.h"
+#include "util/half.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/merge.cpp b/intern/cycles/session/merge.cpp
index 97e9c75d5f7..5890c15f48c 100644
--- a/intern/cycles/session/merge.cpp
+++ b/intern/cycles/session/merge.cpp
@@ -16,11 +16,11 @@
 
 #include "session/merge.h"
 
-#include "util/util_array.h"
-#include "util/util_map.h"
-#include "util/util_system.h"
-#include "util/util_time.h"
-#include "util/util_unique_ptr.h"
+#include "util/array.h"
+#include "util/map.h"
+#include "util/system.h"
+#include "util/time.h"
+#include "util/unique_ptr.h"
 
 #include <OpenImageIO/filesystem.h>
 #include <OpenImageIO/imageio.h>
diff --git a/intern/cycles/session/merge.h b/intern/cycles/session/merge.h
index 87e5d2d4723..be03a69b27a 100644
--- a/intern/cycles/session/merge.h
+++ b/intern/cycles/session/merge.h
@@ -17,8 +17,8 @@
 #ifndef __MERGE_H__
 #define __MERGE_H__
 
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/output_driver.h b/intern/cycles/session/output_driver.h
index b7e980d71d4..95e15ed875b 100644
--- a/intern/cycles/session/output_driver.h
+++ b/intern/cycles/session/output_driver.h
@@ -16,9 +16,9 @@
 
 #pragma once
 
-#include "util/util_math.h"
-#include "util/util_string.h"
-#include "util/util_types.h"
+#include "util/math.h"
+#include "util/string.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/session.cpp b/intern/cycles/session/session.cpp
index f8fc892f127..b228939689c 100644
--- a/intern/cycles/session/session.cpp
+++ b/intern/cycles/session/session.cpp
@@ -35,12 +35,12 @@
 #include "session/output_driver.h"
 #include "session/session.h"
 
-#include "util/util_foreach.h"
-#include "util/util_function.h"
-#include "util/util_logging.h"
-#include "util/util_math.h"
-#include "util/util_task.h"
-#include "util/util_time.h"
+#include "util/foreach.h"
+#include "util/function.h"
+#include "util/log.h"
+#include "util/math.h"
+#include "util/task.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/session.h b/intern/cycles/session/session.h
index 5aa6df79ef1..1ec0c6e9bb1 100644
--- a/intern/cycles/session/session.h
+++ b/intern/cycles/session/session.h
@@ -24,11 +24,11 @@
 #include "session/buffers.h"
 #include "session/tile.h"
 
-#include "util/util_progress.h"
-#include "util/util_stats.h"
-#include "util/util_thread.h"
-#include "util/util_unique_ptr.h"
-#include "util/util_vector.h"
+#include "util/progress.h"
+#include "util/stats.h"
+#include "util/thread.h"
+#include "util/unique_ptr.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/tile.cpp b/intern/cycles/session/tile.cpp
index 59332530596..816bf4d5fa0 100644
--- a/intern/cycles/session/tile.cpp
+++ b/intern/cycles/session/tile.cpp
@@ -23,13 +23,13 @@
 #include "scene/film.h"
 #include "scene/integrator.h"
 #include "scene/scene.h"
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_path.h"
-#include "util/util_string.h"
-#include "util/util_system.h"
-#include "util/util_types.h"
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/path.h"
+#include "util/string.h"
+#include "util/system.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/session/tile.h b/intern/cycles/session/tile.h
index 37a02081a53..eace148eb0a 100644
--- a/intern/cycles/session/tile.h
+++ b/intern/cycles/session/tile.h
@@ -17,9 +17,9 @@
 #pragma once
 
 #include "session/buffers.h"
-#include "util/util_image.h"
-#include "util/util_string.h"
-#include "util/util_unique_ptr.h"
+#include "util/image.h"
+#include "util/string.h"
+#include "util/unique_ptr.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/subd/CMakeLists.txt b/intern/cycles/subd/CMakeLists.txt
index c697ddb9891..4bf5503dc4b 100644
--- a/intern/cycles/subd/CMakeLists.txt
+++ b/intern/cycles/subd/CMakeLists.txt
@@ -21,18 +21,18 @@ set(INC_SYS
 )
 
 set(SRC
-  subd_dice.cpp
-  subd_patch.cpp
-  subd_split.cpp
-  subd_patch_table.cpp
+  dice.cpp
+  patch.cpp
+  split.cpp
+  patch_table.cpp
 )
 
 set(SRC_HEADERS
-  subd_dice.h
-  subd_patch.h
-  subd_patch_table.h
-  subd_split.h
-  subd_subpatch.h
+  dice.h
+  patch.h
+  patch_table.h
+  split.h
+  subpatch.h
 )
 
 set(LIB
diff --git a/intern/cycles/subd/dice.cpp b/intern/cycles/subd/dice.cpp
new file mode 100644
index 00000000000..461fa0bcd9c
--- /dev/null
+++ b/intern/cycles/subd/dice.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/camera.h"
+#include "scene/mesh.h"
+
+#include "subd/dice.h"
+#include "subd/patch.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* EdgeDice Base */
+
+EdgeDice::EdgeDice(const SubdParams &params_) : params(params_)
+{
+  mesh_P = NULL;
+  mesh_N = NULL;
+  vert_offset = 0;
+
+  params.mesh->attributes.add(ATTR_STD_VERTEX_NORMAL);
+
+  if (params.ptex) {
+    params.mesh->attributes.add(ATTR_STD_PTEX_UV);
+    params.mesh->attributes.add(ATTR_STD_PTEX_FACE_ID);
+  }
+}
+
+void EdgeDice::reserve(int num_verts, int num_triangles)
+{
+  Mesh *mesh = params.mesh;
+
+  vert_offset = mesh->get_verts().size();
+  tri_offset = mesh->num_triangles();
+
+  mesh->resize_mesh(mesh->get_verts().size() + num_verts, mesh->num_triangles());
+  mesh->reserve_mesh(mesh->get_verts().size() + num_verts, mesh->num_triangles() + num_triangles);
+
+  Attribute *attr_vN = mesh->attributes.add(ATTR_STD_VERTEX_NORMAL);
+
+  mesh_P = mesh->verts.data() + vert_offset;
+  mesh_N = attr_vN->data_float3() + vert_offset;
+
+  params.mesh->num_subd_verts += num_verts;
+}
+
+void EdgeDice::set_vert(Patch *patch, int index, float2 uv)
+{
+  float3 P, N;
+
+  patch->eval(&P, NULL, NULL, &N, uv.x, uv.y);
+
+  assert(index < params.mesh->verts.size());
+
+  mesh_P[index] = P;
+  mesh_N[index] = N;
+  params.mesh->vert_patch_uv[index + vert_offset] = make_float2(uv.x, uv.y);
+}
+
+void EdgeDice::add_triangle(Patch *patch, int v0, int v1, int v2)
+{
+  Mesh *mesh = params.mesh;
+
+  mesh->add_triangle(v0 + vert_offset, v1 + vert_offset, v2 + vert_offset, patch->shader, true);
+  params.mesh->triangle_patch[params.mesh->num_triangles() - 1] = patch->patch_index;
+
+  tri_offset++;
+}
+
+void EdgeDice::stitch_triangles(Subpatch &sub, int edge)
+{
+  int Mu = max(sub.edge_u0.T, sub.edge_u1.T);
+  int Mv = max(sub.edge_v0.T, sub.edge_v1.T);
+  Mu = max(Mu, 2);
+  Mv = max(Mv, 2);
+
+  int outer_T = sub.edges[edge].T;
+  int inner_T = ((edge % 2) == 0) ? Mv - 2 : Mu - 2;
+
+  if (inner_T < 0 || outer_T < 0)
+    return;  // XXX avoid crashes for Mu or Mv == 1, missing polygons
+
+  /* stitch together two arrays of verts with triangles. at each step,
+   * we compare using the next verts on both sides, to find the split
+   * direction with the smallest diagonal, and use that in order to keep
+   * the triangle shape reasonable. */
+  for (size_t i = 0, j = 0; i < inner_T || j < outer_T;) {
+    int v0, v1, v2;
+
+    v0 = sub.get_vert_along_grid_edge(edge, i);
+    v1 = sub.get_vert_along_edge(edge, j);
+
+    if (j == outer_T) {
+      v2 = sub.get_vert_along_grid_edge(edge, ++i);
+    }
+    else if (i == inner_T) {
+      v2 = sub.get_vert_along_edge(edge, ++j);
+    }
+    else {
+      /* length of diagonals */
+      float len1 = len_squared(mesh_P[sub.get_vert_along_grid_edge(edge, i)] -
+                               mesh_P[sub.get_vert_along_edge(edge, j + 1)]);
+      float len2 = len_squared(mesh_P[sub.get_vert_along_edge(edge, j)] -
+                               mesh_P[sub.get_vert_along_grid_edge(edge, i + 1)]);
+
+      /* use smallest diagonal */
+      if (len1 < len2)
+        v2 = sub.get_vert_along_edge(edge, ++j);
+      else
+        v2 = sub.get_vert_along_grid_edge(edge, ++i);
+    }
+
+    add_triangle(sub.patch, v1, v0, v2);
+  }
+}
+
+/* QuadDice */
+
+QuadDice::QuadDice(const SubdParams &params_) : EdgeDice(params_)
+{
+}
+
+float2 QuadDice::map_uv(Subpatch &sub, float u, float v)
+{
+  /* map UV from subpatch to patch parametric coordinates */
+  float2 d0 = interp(sub.c00, sub.c01, v);
+  float2 d1 = interp(sub.c10, sub.c11, v);
+  return interp(d0, d1, u);
+}
+
+float3 QuadDice::eval_projected(Subpatch &sub, float u, float v)
+{
+  float2 uv = map_uv(sub, u, v);
+  float3 P;
+
+  sub.patch->eval(&P, NULL, NULL, NULL, uv.x, uv.y);
+  if (params.camera)
+    P = transform_perspective(&params.camera->worldtoraster, P);
+
+  return P;
+}
+
+void QuadDice::set_vert(Subpatch &sub, int index, float u, float v)
+{
+  EdgeDice::set_vert(sub.patch, index, map_uv(sub, u, v));
+}
+
+void QuadDice::set_side(Subpatch &sub, int edge)
+{
+  int t = sub.edges[edge].T;
+
+  /* set verts on the edge of the patch */
+  for (int i = 0; i < t; i++) {
+    float f = i / (float)t;
+
+    float u, v;
+    switch (edge) {
+      case 0:
+        u = 0;
+        v = f;
+        break;
+      case 1:
+        u = f;
+        v = 1;
+        break;
+      case 2:
+        u = 1;
+        v = 1.0f - f;
+        break;
+      case 3:
+      default:
+        u = 1.0f - f;
+        v = 0;
+        break;
+    }
+
+    set_vert(sub, sub.get_vert_along_edge(edge, i), u, v);
+  }
+}
+
+float QuadDice::quad_area(const float3 &a, const float3 &b, const float3 &c, const float3 &d)
+{
+  return triangle_area(a, b, d) + triangle_area(a, d, c);
+}
+
+float QuadDice::scale_factor(Subpatch &sub, int Mu, int Mv)
+{
+  /* estimate area as 4x largest of 4 quads */
+  float3 P[3][3];
+
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 3; j++)
+      P[i][j] = eval_projected(sub, i * 0.5f, j * 0.5f);
+
+  float A1 = quad_area(P[0][0], P[1][0], P[0][1], P[1][1]);
+  float A2 = quad_area(P[1][0], P[2][0], P[1][1], P[2][1]);
+  float A3 = quad_area(P[0][1], P[1][1], P[0][2], P[1][2]);
+  float A4 = quad_area(P[1][1], P[2][1], P[1][2], P[2][2]);
+  float Apatch = max(A1, max(A2, max(A3, A4))) * 4.0f;
+
+  /* solve for scaling factor */
+  float Atri = params.dicing_rate * params.dicing_rate * 0.5f;
+  float Ntris = Apatch / Atri;
+
+  // XXX does the -sqrt solution matter
+  // XXX max(D, 0.0) is highly suspicious, need to test cases
+  // where D goes negative
+  float N = 0.5f * (Ntris - (sub.edge_u0.T + sub.edge_u1.T + sub.edge_v0.T + sub.edge_v1.T));
+  float D = 4.0f * N * Mu * Mv + (Mu + Mv) * (Mu + Mv);
+  float S = (Mu + Mv + sqrtf(max(D, 0.0f))) / (2 * Mu * Mv);
+
+  return S;
+}
+
+void QuadDice::add_grid(Subpatch &sub, int Mu, int Mv, int offset)
+{
+  /* create inner grid */
+  float du = 1.0f / (float)Mu;
+  float dv = 1.0f / (float)Mv;
+
+  for (int j = 1; j < Mv; j++) {
+    for (int i = 1; i < Mu; i++) {
+      float u = i * du;
+      float v = j * dv;
+
+      set_vert(sub, offset + (i - 1) + (j - 1) * (Mu - 1), u, v);
+
+      if (i < Mu - 1 && j < Mv - 1) {
+        int i1 = offset + (i - 1) + (j - 1) * (Mu - 1);
+        int i2 = offset + i + (j - 1) * (Mu - 1);
+        int i3 = offset + i + j * (Mu - 1);
+        int i4 = offset + (i - 1) + j * (Mu - 1);
+
+        add_triangle(sub.patch, i1, i2, i3);
+        add_triangle(sub.patch, i1, i3, i4);
+      }
+    }
+  }
+}
+
+void QuadDice::dice(Subpatch &sub)
+{
+  /* compute inner grid size with scale factor */
+  int Mu = max(sub.edge_u0.T, sub.edge_u1.T);
+  int Mv = max(sub.edge_v0.T, sub.edge_v1.T);
+
+#if 0 /* Doesn't work very well, especially at grazing angles. */
+  float S = scale_factor(sub, ef, Mu, Mv);
+#else
+  float S = 1.0f;
+#endif
+
+  Mu = max((int)ceilf(S * Mu), 2);  // XXX handle 0 & 1?
+  Mv = max((int)ceilf(S * Mv), 2);  // XXX handle 0 & 1?
+
+  /* inner grid */
+  add_grid(sub, Mu, Mv, sub.inner_grid_vert_offset);
+
+  /* sides */
+  set_side(sub, 0);
+  set_side(sub, 1);
+  set_side(sub, 2);
+  set_side(sub, 3);
+
+  stitch_triangles(sub, 0);
+  stitch_triangles(sub, 1);
+  stitch_triangles(sub, 2);
+  stitch_triangles(sub, 3);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/dice.h b/intern/cycles/subd/dice.h
new file mode 100644
index 00000000000..7510aae775c
--- /dev/null
+++ b/intern/cycles/subd/dice.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_DICE_H__
+#define __SUBD_DICE_H__
+
+/* DX11 like EdgeDice implementation, with different tessellation factors for
+ * each edge for watertight tessellation, with subpatch remapping to work with
+ * DiagSplit. For more algorithm details, see the DiagSplit paper or the
+ * ARB_tessellation_shader OpenGL extension, Section 2.X.2. */
+
+#include "util/types.h"
+#include "util/vector.h"
+
+#include "subd/subpatch.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Camera;
+class Mesh;
+class Patch;
+
+struct SubdParams {
+  Mesh *mesh;
+  bool ptex;
+
+  int test_steps;
+  int split_threshold;
+  float dicing_rate;
+  int max_level;
+  Camera *camera;
+  Transform objecttoworld;
+
+  SubdParams(Mesh *mesh_, bool ptex_ = false)
+  {
+    mesh = mesh_;
+    ptex = ptex_;
+
+    test_steps = 3;
+    split_threshold = 1;
+    dicing_rate = 1.0f;
+    max_level = 12;
+    camera = NULL;
+  }
+};
+
+/* EdgeDice Base */
+
+class EdgeDice {
+ public:
+  SubdParams params;
+  float3 *mesh_P;
+  float3 *mesh_N;
+  size_t vert_offset;
+  size_t tri_offset;
+
+  explicit EdgeDice(const SubdParams &params);
+
+  void reserve(int num_verts, int num_triangles);
+
+  void set_vert(Patch *patch, int index, float2 uv);
+  void add_triangle(Patch *patch, int v0, int v1, int v2);
+
+  void stitch_triangles(Subpatch &sub, int edge);
+};
+
+/* Quad EdgeDice */
+
+class QuadDice : public EdgeDice {
+ public:
+  explicit QuadDice(const SubdParams &params);
+
+  float3 eval_projected(Subpatch &sub, float u, float v);
+
+  float2 map_uv(Subpatch &sub, float u, float v);
+  void set_vert(Subpatch &sub, int index, float u, float v);
+
+  void add_grid(Subpatch &sub, int Mu, int Mv, int offset);
+
+  void set_side(Subpatch &sub, int edge);
+
+  float quad_area(const float3 &a, const float3 &b, const float3 &c, const float3 &d);
+  float scale_factor(Subpatch &sub, int Mu, int Mv);
+
+  void dice(Subpatch &sub);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_DICE_H__ */
diff --git a/intern/cycles/subd/patch.cpp b/intern/cycles/subd/patch.cpp
new file mode 100644
index 00000000000..4d73f334c1b
--- /dev/null
+++ b/intern/cycles/subd/patch.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Parts adapted from code in the public domain in NVidia Mesh Tools. */
+
+#include "scene/mesh.h"
+
+#include "subd/patch.h"
+
+#include "util/math.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* De Casteljau Evaluation */
+
+static void decasteljau_cubic(float3 *P, float3 *dt, float t, const float3 cp[4])
+{
+  float3 d0 = cp[0] + t * (cp[1] - cp[0]);
+  float3 d1 = cp[1] + t * (cp[2] - cp[1]);
+  float3 d2 = cp[2] + t * (cp[3] - cp[2]);
+
+  d0 += t * (d1 - d0);
+  d1 += t * (d2 - d1);
+
+  *P = d0 + t * (d1 - d0);
+  if (dt)
+    *dt = d1 - d0;
+}
+
+static void decasteljau_bicubic(
+    float3 *P, float3 *du, float3 *dv, const float3 cp[16], float u, float v)
+{
+  float3 ucp[4], utn[4];
+
+  /* interpolate over u */
+  decasteljau_cubic(ucp + 0, utn + 0, u, cp);
+  decasteljau_cubic(ucp + 1, utn + 1, u, cp + 4);
+  decasteljau_cubic(ucp + 2, utn + 2, u, cp + 8);
+  decasteljau_cubic(ucp + 3, utn + 3, u, cp + 12);
+
+  /* interpolate over v */
+  decasteljau_cubic(P, dv, v, ucp);
+  if (du)
+    decasteljau_cubic(du, NULL, v, utn);
+}
+
+/* Linear Quad Patch */
+
+void LinearQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v)
+{
+  float3 d0 = interp(hull[0], hull[1], u);
+  float3 d1 = interp(hull[2], hull[3], u);
+
+  *P = interp(d0, d1, v);
+
+  if (dPdu && dPdv) {
+    *dPdu = interp(hull[1] - hull[0], hull[3] - hull[2], v);
+    *dPdv = interp(hull[2] - hull[0], hull[3] - hull[1], u);
+  }
+
+  if (N) {
+    *N = normalize(
+        interp(interp(normals[0], normals[1], u), interp(normals[2], normals[3], u), v));
+  }
+}
+
+BoundBox LinearQuadPatch::bound()
+{
+  BoundBox bbox = BoundBox::empty;
+
+  for (int i = 0; i < 4; i++)
+    bbox.grow(hull[i]);
+
+  return bbox;
+}
+
+/* Bicubic Patch */
+
+void BicubicPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v)
+{
+  if (N) {
+    float3 dPdu_, dPdv_;
+    decasteljau_bicubic(P, &dPdu_, &dPdv_, hull, u, v);
+
+    if (dPdu && dPdv) {
+      *dPdu = dPdu_;
+      *dPdv = dPdv_;
+    }
+
+    *N = normalize(cross(dPdu_, dPdv_));
+  }
+  else {
+    decasteljau_bicubic(P, dPdu, dPdv, hull, u, v);
+  }
+}
+
+BoundBox BicubicPatch::bound()
+{
+  BoundBox bbox = BoundBox::empty;
+
+  for (int i = 0; i < 16; i++)
+    bbox.grow(hull[i]);
+
+  return bbox;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/patch.h b/intern/cycles/subd/patch.h
new file mode 100644
index 00000000000..ad4dc1bd8e9
--- /dev/null
+++ b/intern/cycles/subd/patch.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_PATCH_H__
+#define __SUBD_PATCH_H__
+
+#include "util/boundbox.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Patch {
+ public:
+  Patch() : patch_index(0), shader(0), from_ngon(false)
+  {
+  }
+
+  virtual ~Patch() = default;
+
+  virtual void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v) = 0;
+
+  int patch_index;
+  int shader;
+  bool from_ngon;
+};
+
+/* Linear Quad Patch */
+
+class LinearQuadPatch : public Patch {
+ public:
+  float3 hull[4];
+  float3 normals[4];
+
+  void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v);
+  BoundBox bound();
+};
+
+/* Bicubic Patch */
+
+class BicubicPatch : public Patch {
+ public:
+  float3 hull[16];
+
+  void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v);
+  BoundBox bound();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_PATCH_H__ */
diff --git a/intern/cycles/subd/patch_table.cpp b/intern/cycles/subd/patch_table.cpp
new file mode 100644
index 00000000000..d215dfaa1dd
--- /dev/null
+++ b/intern/cycles/subd/patch_table.cpp
@@ -0,0 +1,295 @@
+/*
+ * Based on code from OpenSubdiv released under this license:
+ *
+ * Copyright 2014 DreamWorks Animation LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "Apache License")
+ * with the following modification; you may not use this file except in
+ * compliance with the Apache License and the following modification to it:
+ * Section 6. Trademarks. is deleted and replaced with:
+ *
+ * 6. Trademarks. This License does not grant permission to use the trade
+ *   names, trademarks, service marks, or product names of the Licensor
+ *   and its affiliates, except as required to comply with Section 4(c) of
+ *   the License and to reproduce the content of the NOTICE file.
+ *
+ * You may obtain a copy of the Apache License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Apache License with the above modification is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the Apache License for the specific
+ * language governing permissions and limitations under the Apache License.
+ */
+
+#include "subd/patch_table.h"
+#include "kernel/types.h"
+
+#include "util/math.h"
+
+#ifdef WITH_OPENSUBDIV
+#  include <opensubdiv/far/patchTable.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_OPENSUBDIV
+
+using namespace OpenSubdiv;
+
+/* functions for building patch maps */
+
+struct PatchMapQuadNode {
+  /* sets all the children to point to the patch of index */
+  void set_child(int index)
+  {
+    for (int i = 0; i < 4; i++) {
+      children[i] = index | PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF;
+    }
+  }
+
+  /* sets the child in quadrant to point to the node or patch of the given index */
+  void set_child(unsigned char quadrant, int index, bool is_leaf = true)
+  {
+    assert(quadrant < 4);
+    children[quadrant] = index | PATCH_MAP_NODE_IS_SET | (is_leaf ? PATCH_MAP_NODE_IS_LEAF : 0);
+  }
+
+  uint children[4];
+};
+
+template<class T> static int resolve_quadrant(T &median, T &u, T &v)
+{
+  int quadrant = -1;
+
+  if (u < median) {
+    if (v < median) {
+      quadrant = 0;
+    }
+    else {
+      quadrant = 1;
+      v -= median;
+    }
+  }
+  else {
+    if (v < median) {
+      quadrant = 3;
+    }
+    else {
+      quadrant = 2;
+      v -= median;
+    }
+    u -= median;
+  }
+
+  return quadrant;
+}
+
+static void build_patch_map(PackedPatchTable &table,
+                            OpenSubdiv::Far::PatchTable *patch_table,
+                            int offset)
+{
+  int num_faces = 0;
+
+  for (int array = 0; array < table.num_arrays; array++) {
+    Far::ConstPatchParamArray params = patch_table->GetPatchParams(array);
+
+    for (int j = 0; j < patch_table->GetNumPatches(array); j++) {
+      num_faces = max(num_faces, (int)params[j].GetFaceId());
+    }
+  }
+  num_faces++;
+
+  vector<PatchMapQuadNode> quadtree;
+  quadtree.reserve(num_faces + table.num_patches);
+  quadtree.resize(num_faces);
+
+  /* adjust offsets to make indices relative to the table */
+  int handle_index = -(table.num_patches * PATCH_HANDLE_SIZE);
+  offset += table.total_size();
+
+  /* populate the quadtree from the FarPatchArrays sub-patches */
+  for (int array = 0; array < table.num_arrays; array++) {
+    Far::ConstPatchParamArray params = patch_table->GetPatchParams(array);
+
+    for (int i = 0; i < patch_table->GetNumPatches(array);
+         i++, handle_index += PATCH_HANDLE_SIZE) {
+      const Far::PatchParam &param = params[i];
+      unsigned short depth = param.GetDepth();
+
+      PatchMapQuadNode *node = &quadtree[params[i].GetFaceId()];
+
+      if (depth == (param.NonQuadRoot() ? 1 : 0)) {
+        /* special case : regular BSpline face w/ no sub-patches */
+        node->set_child(handle_index + offset);
+        continue;
+      }
+
+      int u = param.GetU();
+      int v = param.GetV();
+      int pdepth = param.NonQuadRoot() ? depth - 2 : depth - 1;
+      int half = 1 << pdepth;
+
+      for (int j = 0; j < depth; j++) {
+        int delta = half >> 1;
+
+        int quadrant = resolve_quadrant(half, u, v);
+        assert(quadrant >= 0);
+
+        half = delta;
+
+        if (j == pdepth) {
+          /* we have reached the depth of the sub-patch : add a leaf */
+          assert(!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET));
+          node->set_child(quadrant, handle_index + offset, true);
+          break;
+        }
+        else {
+          /* travel down the child node of the corresponding quadrant */
+          if (!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET)) {
+            /* create a new branch in the quadrant */
+            quadtree.push_back(PatchMapQuadNode());
+
+            int idx = (int)quadtree.size() - 1;
+            node->set_child(quadrant, idx * 4 + offset, false);
+
+            node = &quadtree[idx];
+          }
+          else {
+            /* travel down an existing branch */
+            uint idx = node->children[quadrant] & PATCH_MAP_NODE_INDEX_MASK;
+            node = &(quadtree[(idx - offset) / 4]);
+          }
+        }
+      }
+    }
+  }
+
+  /* copy into table */
+  assert(table.table.size() == table.total_size());
+  uint map_offset = table.total_size();
+
+  table.num_nodes = quadtree.size() * 4;
+  table.table.resize(table.total_size());
+
+  uint *data = &table.table[map_offset];
+
+  for (int i = 0; i < quadtree.size(); i++) {
+    for (int j = 0; j < 4; j++) {
+      assert(quadtree[i].children[j] & PATCH_MAP_NODE_IS_SET);
+      *(data++) = quadtree[i].children[j];
+    }
+  }
+}
+
+#endif
+
+/* packed patch table functions */
+
+size_t PackedPatchTable::total_size()
+{
+  return num_arrays * PATCH_ARRAY_SIZE + num_indices +
+         num_patches * (PATCH_PARAM_SIZE + PATCH_HANDLE_SIZE) + num_nodes * PATCH_NODE_SIZE;
+}
+
+void PackedPatchTable::pack(Far::PatchTable *patch_table, int offset)
+{
+  num_arrays = 0;
+  num_patches = 0;
+  num_indices = 0;
+  num_nodes = 0;
+
+#ifdef WITH_OPENSUBDIV
+  num_arrays = patch_table->GetNumPatchArrays();
+
+  for (int i = 0; i < num_arrays; i++) {
+    int patches = patch_table->GetNumPatches(i);
+    int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices();
+
+    num_patches += patches;
+    num_indices += patches * num_control;
+  }
+
+  table.resize(total_size());
+  uint *data = table.data();
+
+  uint *array = data;
+  uint *index = array + num_arrays * PATCH_ARRAY_SIZE;
+  uint *param = index + num_indices;
+  uint *handle = param + num_patches * PATCH_PARAM_SIZE;
+
+  uint current_param = 0;
+
+  for (int i = 0; i < num_arrays; i++) {
+    *(array++) = patch_table->GetPatchArrayDescriptor(i).GetType();
+    *(array++) = patch_table->GetNumPatches(i);
+    *(array++) = (index - data) + offset;
+    *(array++) = (param - data) + offset;
+
+    Far::ConstIndexArray indices = patch_table->GetPatchArrayVertices(i);
+
+    for (int j = 0; j < indices.size(); j++) {
+      *(index++) = indices[j];
+    }
+
+    const Far::PatchParamTable &param_table = patch_table->GetPatchParamTable();
+
+    int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices();
+    int patches = patch_table->GetNumPatches(i);
+
+    for (int j = 0; j < patches; j++, current_param++) {
+      *(param++) = param_table[current_param].field0;
+      *(param++) = param_table[current_param].field1;
+
+      *(handle++) = (array - data) - PATCH_ARRAY_SIZE + offset;
+      *(handle++) = (param - data) - PATCH_PARAM_SIZE + offset;
+      *(handle++) = j * num_control;
+    }
+  }
+
+  build_patch_map(*this, patch_table, offset);
+#else
+  (void)patch_table;
+  (void)offset;
+#endif
+}
+
+void PackedPatchTable::copy_adjusting_offsets(uint *dest, int doffset)
+{
+  uint *src = table.data();
+
+  /* arrays */
+  for (int i = 0; i < num_arrays; i++) {
+    *(dest++) = *(src++);
+    *(dest++) = *(src++);
+    *(dest++) = *(src++) + doffset;
+    *(dest++) = *(src++) + doffset;
+  }
+
+  /* indices */
+  for (int i = 0; i < num_indices; i++) {
+    *(dest++) = *(src++);
+  }
+
+  /* params */
+  for (int i = 0; i < num_patches; i++) {
+    *(dest++) = *(src++);
+    *(dest++) = *(src++);
+  }
+
+  /* handles */
+  for (int i = 0; i < num_patches; i++) {
+    *(dest++) = *(src++) + doffset;
+    *(dest++) = *(src++) + doffset;
+    *(dest++) = *(src++);
+  }
+
+  /* nodes */
+  for (int i = 0; i < num_nodes; i++) {
+    *(dest++) = *(src++) + doffset;
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/patch_table.h b/intern/cycles/subd/patch_table.h
new file mode 100644
index 00000000000..b5fd5923f31
--- /dev/null
+++ b/intern/cycles/subd/patch_table.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_PATCH_TABLE_H__
+#define __SUBD_PATCH_TABLE_H__
+
+#include "util/array.h"
+#include "util/types.h"
+
+#ifdef WITH_OPENSUBDIV
+#  ifdef _MSC_VER
+#    include "iso646.h"
+#  endif
+
+#  include <opensubdiv/far/patchTable.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_OPENSUBDIV
+using namespace OpenSubdiv;
+#else
+/* forward declare for when OpenSubdiv is unavailable */
+namespace Far {
+struct PatchTable;
+}
+#endif
+
+#define PATCH_ARRAY_SIZE 4
+#define PATCH_PARAM_SIZE 2
+#define PATCH_HANDLE_SIZE 3
+#define PATCH_NODE_SIZE 1
+
+struct PackedPatchTable {
+  array<uint> table;
+
+  size_t num_arrays;
+  size_t num_indices;
+  size_t num_patches;
+  size_t num_nodes;
+
+  /* calculated size from num_* members */
+  size_t total_size();
+
+  void pack(Far::PatchTable *patch_table, int offset = 0);
+  void copy_adjusting_offsets(uint *dest, int doffset);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_PATCH_TABLE_H__ */
diff --git a/intern/cycles/subd/split.cpp b/intern/cycles/subd/split.cpp
new file mode 100644
index 00000000000..2b29f3a5a78
--- /dev/null
+++ b/intern/cycles/subd/split.cpp
@@ -0,0 +1,748 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scene/camera.h"
+#include "scene/mesh.h"
+
+#include "subd/dice.h"
+#include "subd/patch.h"
+#include "subd/split.h"
+
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/hash.h"
+#include "util/math.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* DiagSplit */
+
+#define DSPLIT_NON_UNIFORM -1
+#define STITCH_NGON_CENTER_VERT_INDEX_OFFSET 0x60000000
+#define STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG (0x60000000 - 1)
+
+DiagSplit::DiagSplit(const SubdParams &params_) : params(params_)
+{
+}
+
+float3 DiagSplit::to_world(Patch *patch, float2 uv)
+{
+  float3 P;
+
+  patch->eval(&P, NULL, NULL, NULL, uv.x, uv.y);
+  if (params.camera)
+    P = transform_point(&params.objecttoworld, P);
+
+  return P;
+}
+
+static void order_float2(float2 &a, float2 &b)
+{
+  if (b.x < a.x || b.y < a.y) {
+    swap(a, b);
+  }
+}
+
+int DiagSplit::T(Patch *patch, float2 Pstart, float2 Pend, bool recursive_resolve)
+{
+  order_float2(Pstart, Pend); /* May not be necessary, but better to be safe. */
+
+  float Lsum = 0.0f;
+  float Lmax = 0.0f;
+
+  float3 Plast = to_world(patch, Pstart);
+
+  for (int i = 1; i < params.test_steps; i++) {
+    float t = i / (float)(params.test_steps - 1);
+
+    float3 P = to_world(patch, Pstart + t * (Pend - Pstart));
+
+    float L;
+
+    if (!params.camera) {
+      L = len(P - Plast);
+    }
+    else {
+      Camera *cam = params.camera;
+
+      float pixel_width = cam->world_to_raster_size((P + Plast) * 0.5f);
+      L = len(P - Plast) / pixel_width;
+    }
+
+    Lsum += L;
+    Lmax = max(L, Lmax);
+
+    Plast = P;
+  }
+
+  int tmin = (int)ceilf(Lsum / params.dicing_rate);
+  int tmax = (int)ceilf((params.test_steps - 1) * Lmax /
+                        params.dicing_rate);  // XXX paper says N instead of N-1, seems wrong?
+  int res = max(tmax, 1);
+
+  if (tmax - tmin > params.split_threshold) {
+    if (!recursive_resolve) {
+      res = DSPLIT_NON_UNIFORM;
+    }
+    else {
+      float2 P = (Pstart + Pend) * 0.5f;
+      res = T(patch, Pstart, P, true) + T(patch, P, Pend, true);
+    }
+  }
+
+  limit_edge_factor(res, patch, Pstart, Pend);
+  return res;
+}
+
+void DiagSplit::partition_edge(
+    Patch *patch, float2 *P, int *t0, int *t1, float2 Pstart, float2 Pend, int t)
+{
+  if (t == DSPLIT_NON_UNIFORM) {
+    *P = (Pstart + Pend) * 0.5f;
+    *t0 = T(patch, Pstart, *P);
+    *t1 = T(patch, *P, Pend);
+  }
+  else {
+    assert(t >= 2); /* Need at least two segments to partition into. */
+
+    int I = (int)floorf((float)t * 0.5f);
+    *P = interp(Pstart, Pend, I / (float)t);
+    *t0 = I;
+    *t1 = t - I;
+  }
+}
+
+void DiagSplit::limit_edge_factor(int &T, Patch *patch, float2 Pstart, float2 Pend)
+{
+  int max_t = 1 << params.max_level;
+  int max_t_for_edge = int(max_t * len(Pstart - Pend));
+
+  if (patch->from_ngon) {
+    max_t_for_edge >>= 1; /* Initial split of ngon causes edges to extend half the distance. */
+  }
+
+  T = (max_t_for_edge <= 1) ? 1 : min(T, max_t_for_edge);
+
+  assert(T >= 1 || T == DSPLIT_NON_UNIFORM);
+}
+
+void DiagSplit::resolve_edge_factors(Subpatch &sub)
+{
+  /* Resolve DSPLIT_NON_UNIFORM to actual T value if splitting is no longer possible. */
+  if (sub.edge_u0.T == 1 && sub.edge_u1.T == DSPLIT_NON_UNIFORM) {
+    sub.edge_u1.T = T(sub.patch, sub.c01, sub.c11, true);
+  }
+  if (sub.edge_u1.T == 1 && sub.edge_u0.T == DSPLIT_NON_UNIFORM) {
+    sub.edge_u0.T = T(sub.patch, sub.c00, sub.c10, true);
+  }
+  if (sub.edge_v0.T == 1 && sub.edge_v1.T == DSPLIT_NON_UNIFORM) {
+    sub.edge_v1.T = T(sub.patch, sub.c11, sub.c10, true);
+  }
+  if (sub.edge_v1.T == 1 && sub.edge_v0.T == DSPLIT_NON_UNIFORM) {
+    sub.edge_v0.T = T(sub.patch, sub.c01, sub.c00, true);
+  }
+}
+
+void DiagSplit::split(Subpatch &sub, int depth)
+{
+  if (depth > 32) {
+    /* We should never get here, but just in case end recursion safely. */
+    assert(!"diagsplit recursion limit reached");
+
+    sub.edge_u0.T = 1;
+    sub.edge_u1.T = 1;
+    sub.edge_v0.T = 1;
+    sub.edge_v1.T = 1;
+
+    subpatches.push_back(sub);
+    return;
+  }
+
+  bool split_u = (sub.edge_u0.T == DSPLIT_NON_UNIFORM || sub.edge_u1.T == DSPLIT_NON_UNIFORM);
+  bool split_v = (sub.edge_v0.T == DSPLIT_NON_UNIFORM || sub.edge_v1.T == DSPLIT_NON_UNIFORM);
+
+  /* Split subpatches such that the ratio of T for opposite edges doesn't
+   * exceed 1.5, this reduces over tessellation for some patches
+   */
+  /* clang-format off */
+  if (min(sub.edge_u0.T, sub.edge_u1.T) > 8 && /* Must be uniform and preferably greater than 8 to split. */
+      min(sub.edge_v0.T, sub.edge_v1.T) >= 2 && /* Must be uniform and at least 2 to split. */
+      max(sub.edge_u0.T, sub.edge_u1.T) / min(sub.edge_u0.T, sub.edge_u1.T) > 1.5f)
+  {
+    split_v = true;
+  }
+  if (min(sub.edge_v0.T, sub.edge_v1.T) > 8 &&
+      min(sub.edge_u0.T, sub.edge_u1.T) >= 2 &&
+      max(sub.edge_v0.T, sub.edge_v1.T) / min(sub.edge_v0.T, sub.edge_v1.T) > 1.5f)
+  {
+    split_u = true;
+  }
+  /* clang-format on */
+
+  /* Alternate axis. */
+  if (split_u && split_v) {
+    split_u = depth % 2;
+  }
+
+  if (!split_u && !split_v) {
+    /* Add the unsplit subpatch. */
+    subpatches.push_back(sub);
+    Subpatch &subpatch = subpatches[subpatches.size() - 1];
+
+    /* Update T values and offsets. */
+    for (int i = 0; i < 4; i++) {
+      Subpatch::edge_t &edge = subpatch.edges[i];
+
+      edge.offset = edge.edge->T;
+      edge.edge->T += edge.T;
+    }
+  }
+  else {
+    /* Copy into new subpatches. */
+    Subpatch sub_a = sub;
+    Subpatch sub_b = sub;
+
+    /* Pointers to various subpatch elements. */
+    Subpatch::edge_t *sub_across_0, *sub_across_1;
+    Subpatch::edge_t *sub_a_across_0, *sub_a_across_1;
+    Subpatch::edge_t *sub_b_across_0, *sub_b_across_1;
+
+    Subpatch::edge_t *sub_a_split, *sub_b_split;
+
+    float2 *Pa, *Pb, *Pc, *Pd;
+
+    /* Set pointers based on split axis. */
+    if (split_u) {
+      sub_across_0 = &sub.edge_u0;
+      sub_across_1 = &sub.edge_u1;
+      sub_a_across_0 = &sub_a.edge_u0;
+      sub_a_across_1 = &sub_a.edge_u1;
+      sub_b_across_0 = &sub_b.edge_u0;
+      sub_b_across_1 = &sub_b.edge_u1;
+
+      sub_a_split = &sub_a.edge_v1;
+      sub_b_split = &sub_b.edge_v0;
+
+      Pa = &sub_a.c11;
+      Pb = &sub_a.c10;
+      Pc = &sub_b.c01;
+      Pd = &sub_b.c00;
+    }
+    else {
+      sub_across_0 = &sub.edge_v0;
+      sub_across_1 = &sub.edge_v1;
+      sub_a_across_0 = &sub_a.edge_v0;
+      sub_a_across_1 = &sub_a.edge_v1;
+      sub_b_across_0 = &sub_b.edge_v0;
+      sub_b_across_1 = &sub_b.edge_v1;
+
+      sub_a_split = &sub_a.edge_u0;
+      sub_b_split = &sub_b.edge_u1;
+
+      Pa = &sub_a.c10;
+      Pb = &sub_a.c00;
+      Pc = &sub_b.c11;
+      Pd = &sub_b.c01;
+    }
+
+    /* Partition edges */
+    float2 P0, P1;
+
+    partition_edge(
+        sub.patch, &P0, &sub_a_across_0->T, &sub_b_across_0->T, *Pd, *Pb, sub_across_0->T);
+    partition_edge(
+        sub.patch, &P1, &sub_a_across_1->T, &sub_b_across_1->T, *Pc, *Pa, sub_across_1->T);
+
+    /* Split */
+    *Pa = P1;
+    *Pb = P0;
+
+    *Pc = P1;
+    *Pd = P0;
+
+    int tsplit = T(sub.patch, P0, P1);
+
+    if (depth == -2 && tsplit == 1) {
+      tsplit = 2; /* Ensure we can always split at depth -1. */
+    }
+
+    sub_a_split->T = tsplit;
+    sub_b_split->T = tsplit;
+
+    resolve_edge_factors(sub_a);
+    resolve_edge_factors(sub_b);
+
+    /* Create new edge */
+    Edge &edge = *alloc_edge();
+
+    sub_a_split->edge = &edge;
+    sub_b_split->edge = &edge;
+
+    sub_a_split->offset = 0;
+    sub_b_split->offset = 0;
+
+    sub_a_split->indices_decrease_along_edge = false;
+    sub_b_split->indices_decrease_along_edge = true;
+
+    sub_a_split->sub_edges_created_in_reverse_order = !split_u;
+    sub_b_split->sub_edges_created_in_reverse_order = !split_u;
+
+    edge.top_indices_decrease = sub_across_1->sub_edges_created_in_reverse_order;
+    edge.bottom_indices_decrease = sub_across_0->sub_edges_created_in_reverse_order;
+
+    /* Recurse */
+    edge.T = 0;
+    split(sub_a, depth + 1);
+
+    int edge_t = edge.T;
+    (void)edge_t;
+
+    edge.top_offset = sub_across_1->edge->T;
+    edge.bottom_offset = sub_across_0->edge->T;
+
+    edge.T = 0; /* We calculate T twice along each edge. :/ */
+    split(sub_b, depth + 1);
+
+    assert(edge.T == edge_t); /* If this fails we will crash at some later point! */
+
+    edge.top = sub_across_1->edge;
+    edge.bottom = sub_across_0->edge;
+  }
+}
+
+int DiagSplit::alloc_verts(int n)
+{
+  int a = num_alloced_verts;
+  num_alloced_verts += n;
+  return a;
+}
+
+Edge *DiagSplit::alloc_edge()
+{
+  edges.emplace_back();
+  return &edges.back();
+}
+
+void DiagSplit::split_patches(Patch *patches, size_t patches_byte_stride)
+{
+  int patch_index = 0;
+
+  for (int f = 0; f < params.mesh->get_num_subd_faces(); f++) {
+    Mesh::SubdFace face = params.mesh->get_subd_face(f);
+
+    Patch *patch = (Patch *)(((char *)patches) + patch_index * patches_byte_stride);
+
+    if (face.is_quad()) {
+      patch_index++;
+
+      split_quad(face, patch);
+    }
+    else {
+      patch_index += face.num_corners;
+
+      split_ngon(face, patch, patches_byte_stride);
+    }
+  }
+
+  params.mesh->vert_to_stitching_key_map.clear();
+  params.mesh->vert_stitching_map.clear();
+
+  post_split();
+}
+
+static Edge *create_edge_from_corner(DiagSplit *split,
+                                     const Mesh *mesh,
+                                     const Mesh::SubdFace &face,
+                                     int corner,
+                                     bool &reversed,
+                                     int v0,
+                                     int v1)
+{
+  int a = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 0, face.num_corners)];
+  int b = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 1, face.num_corners)];
+
+  reversed = !(b < a);
+
+  if (b < a) {
+    swap(a, b);
+    swap(v0, v1);
+  }
+
+  Edge *edge = split->alloc_edge();
+
+  edge->is_stitch_edge = true;
+  edge->stitch_start_vert_index = a;
+  edge->stitch_end_vert_index = b;
+
+  edge->start_vert_index = v0;
+  edge->end_vert_index = v1;
+
+  edge->stitch_edge_key = {a, b};
+
+  return edge;
+}
+
+void DiagSplit::split_quad(const Mesh::SubdFace &face, Patch *patch)
+{
+  Subpatch subpatch(patch);
+
+  int v = alloc_verts(4);
+
+  bool v0_reversed, u1_reversed, v1_reversed, u0_reversed;
+  subpatch.edge_v0.edge = create_edge_from_corner(
+      this, params.mesh, face, 3, v0_reversed, v + 3, v + 0);
+  subpatch.edge_u1.edge = create_edge_from_corner(
+      this, params.mesh, face, 2, u1_reversed, v + 2, v + 3);
+  subpatch.edge_v1.edge = create_edge_from_corner(
+      this, params.mesh, face, 1, v1_reversed, v + 1, v + 2);
+  subpatch.edge_u0.edge = create_edge_from_corner(
+      this, params.mesh, face, 0, u0_reversed, v + 0, v + 1);
+
+  subpatch.edge_v0.sub_edges_created_in_reverse_order = !v0_reversed;
+  subpatch.edge_u1.sub_edges_created_in_reverse_order = u1_reversed;
+  subpatch.edge_v1.sub_edges_created_in_reverse_order = v1_reversed;
+  subpatch.edge_u0.sub_edges_created_in_reverse_order = !u0_reversed;
+
+  subpatch.edge_v0.indices_decrease_along_edge = v0_reversed;
+  subpatch.edge_u1.indices_decrease_along_edge = u1_reversed;
+  subpatch.edge_v1.indices_decrease_along_edge = v1_reversed;
+  subpatch.edge_u0.indices_decrease_along_edge = u0_reversed;
+
+  /* Forces a split in both axis for quads, needed to match split of ngons into quads. */
+  subpatch.edge_u0.T = DSPLIT_NON_UNIFORM;
+  subpatch.edge_u1.T = DSPLIT_NON_UNIFORM;
+  subpatch.edge_v0.T = DSPLIT_NON_UNIFORM;
+  subpatch.edge_v1.T = DSPLIT_NON_UNIFORM;
+
+  split(subpatch, -2);
+}
+
+static Edge *create_split_edge_from_corner(DiagSplit *split,
+                                           const Mesh *mesh,
+                                           const Mesh::SubdFace &face,
+                                           int corner,
+                                           int side,
+                                           bool &reversed,
+                                           int v0,
+                                           int v1,
+                                           int vc)
+{
+  Edge *edge = split->alloc_edge();
+
+  int a = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 0, face.num_corners)];
+  int b = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 1, face.num_corners)];
+
+  if (b < a) {
+    edge->stitch_edge_key = {b, a};
+  }
+  else {
+    edge->stitch_edge_key = {a, b};
+  }
+
+  reversed = !(b < a);
+
+  if (side == 0) {
+    a = vc;
+  }
+  else {
+    b = vc;
+  }
+
+  if (!reversed) {
+    swap(a, b);
+    swap(v0, v1);
+  }
+
+  edge->is_stitch_edge = true;
+  edge->stitch_start_vert_index = a;
+  edge->stitch_end_vert_index = b;
+
+  edge->start_vert_index = v0;
+  edge->end_vert_index = v1;
+
+  return edge;
+}
+
+void DiagSplit::split_ngon(const Mesh::SubdFace &face, Patch *patches, size_t patches_byte_stride)
+{
+  Edge *prev_edge_u0 = nullptr;
+  Edge *first_edge_v0 = nullptr;
+
+  for (int corner = 0; corner < face.num_corners; corner++) {
+    Patch *patch = (Patch *)(((char *)patches) + corner * patches_byte_stride);
+
+    Subpatch subpatch(patch);
+
+    int v = alloc_verts(4);
+
+    /* Setup edges. */
+    Edge *edge_u1 = alloc_edge();
+    Edge *edge_v1 = alloc_edge();
+
+    edge_v1->is_stitch_edge = true;
+    edge_u1->is_stitch_edge = true;
+
+    edge_u1->stitch_start_vert_index = -(face.start_corner + mod(corner + 0, face.num_corners)) -
+                                       1;
+    edge_u1->stitch_end_vert_index = STITCH_NGON_CENTER_VERT_INDEX_OFFSET + face.ptex_offset;
+
+    edge_u1->start_vert_index = v + 3;
+    edge_u1->end_vert_index = v + 2;
+
+    edge_u1->stitch_edge_key = {edge_u1->stitch_start_vert_index, edge_u1->stitch_end_vert_index};
+
+    edge_v1->stitch_start_vert_index = -(face.start_corner + mod(corner + 1, face.num_corners)) -
+                                       1;
+    edge_v1->stitch_end_vert_index = STITCH_NGON_CENTER_VERT_INDEX_OFFSET + face.ptex_offset;
+
+    edge_v1->start_vert_index = v + 1;
+    edge_v1->end_vert_index = v + 2;
+
+    edge_v1->stitch_edge_key = {edge_v1->stitch_start_vert_index, edge_v1->stitch_end_vert_index};
+
+    bool v0_reversed, u0_reversed;
+
+    subpatch.edge_v0.edge = create_split_edge_from_corner(this,
+                                                          params.mesh,
+                                                          face,
+                                                          corner - 1,
+                                                          0,
+                                                          v0_reversed,
+                                                          v + 3,
+                                                          v + 0,
+                                                          STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG);
+
+    subpatch.edge_u1.edge = edge_u1;
+    subpatch.edge_v1.edge = edge_v1;
+
+    subpatch.edge_u0.edge = create_split_edge_from_corner(this,
+                                                          params.mesh,
+                                                          face,
+                                                          corner + 0,
+                                                          1,
+                                                          u0_reversed,
+                                                          v + 0,
+                                                          v + 1,
+                                                          STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG);
+
+    subpatch.edge_v0.sub_edges_created_in_reverse_order = !v0_reversed;
+    subpatch.edge_u1.sub_edges_created_in_reverse_order = false;
+    subpatch.edge_v1.sub_edges_created_in_reverse_order = true;
+    subpatch.edge_u0.sub_edges_created_in_reverse_order = !u0_reversed;
+
+    subpatch.edge_v0.indices_decrease_along_edge = v0_reversed;
+    subpatch.edge_u1.indices_decrease_along_edge = false;
+    subpatch.edge_v1.indices_decrease_along_edge = true;
+    subpatch.edge_u0.indices_decrease_along_edge = u0_reversed;
+
+    /* Perform split. */
+    {
+      subpatch.edge_u0.T = T(subpatch.patch, subpatch.c00, subpatch.c10);
+      subpatch.edge_u1.T = T(subpatch.patch, subpatch.c01, subpatch.c11);
+      subpatch.edge_v0.T = T(subpatch.patch, subpatch.c00, subpatch.c01);
+      subpatch.edge_v1.T = T(subpatch.patch, subpatch.c10, subpatch.c11);
+
+      resolve_edge_factors(subpatch);
+
+      split(subpatch, 0);
+    }
+
+    /* Update offsets after T is known from split. */
+    edge_u1->top = subpatch.edge_v0.edge;
+    edge_u1->stitch_top_offset = edge_u1->top->T * (v0_reversed ? -1 : 1);
+    edge_v1->top = subpatch.edge_u0.edge;
+    edge_v1->stitch_top_offset = edge_v1->top->T * (!u0_reversed ? -1 : 1);
+
+    if (corner == 0) {
+      first_edge_v0 = subpatch.edge_v0.edge;
+    }
+
+    if (prev_edge_u0) {
+      if (v0_reversed) {
+        subpatch.edge_v0.edge->stitch_offset = prev_edge_u0->T;
+      }
+      else {
+        prev_edge_u0->stitch_offset = subpatch.edge_v0.edge->T;
+      }
+
+      int T = subpatch.edge_v0.edge->T + prev_edge_u0->T;
+      subpatch.edge_v0.edge->stitch_edge_T = T;
+      prev_edge_u0->stitch_edge_T = T;
+    }
+
+    if (corner == face.num_corners - 1) {
+      if (v0_reversed) {
+        subpatch.edge_u0.edge->stitch_offset = first_edge_v0->T;
+      }
+      else {
+        first_edge_v0->stitch_offset = subpatch.edge_u0.edge->T;
+      }
+
+      int T = first_edge_v0->T + subpatch.edge_u0.edge->T;
+      first_edge_v0->stitch_edge_T = T;
+      subpatch.edge_u0.edge->stitch_edge_T = T;
+    }
+
+    prev_edge_u0 = subpatch.edge_u0.edge;
+  }
+}
+
+void DiagSplit::post_split()
+{
+  int num_stitch_verts = 0;
+
+  /* All patches are now split, and all T values known. */
+
+  foreach (Edge &edge, edges) {
+    if (edge.second_vert_index < 0) {
+      edge.second_vert_index = alloc_verts(edge.T - 1);
+    }
+
+    if (edge.is_stitch_edge) {
+      num_stitch_verts = max(num_stitch_verts,
+                             max(edge.stitch_start_vert_index, edge.stitch_end_vert_index));
+    }
+  }
+
+  num_stitch_verts += 1;
+
+  /* Map of edge key to edge stitching vert offset. */
+  struct pair_hasher {
+    size_t operator()(const pair<int, int> &k) const
+    {
+      return hash_uint2(k.first, k.second);
+    }
+  };
+  typedef unordered_map<pair<int, int>, int, pair_hasher> edge_stitch_verts_map_t;
+  edge_stitch_verts_map_t edge_stitch_verts_map;
+
+  foreach (Edge &edge, edges) {
+    if (edge.is_stitch_edge) {
+      if (edge.stitch_edge_T == 0) {
+        edge.stitch_edge_T = edge.T;
+      }
+
+      if (edge_stitch_verts_map.find(edge.stitch_edge_key) == edge_stitch_verts_map.end()) {
+        edge_stitch_verts_map[edge.stitch_edge_key] = num_stitch_verts;
+        num_stitch_verts += edge.stitch_edge_T - 1;
+      }
+    }
+  }
+
+  /* Set start and end indices for edges generated from a split. */
+  foreach (Edge &edge, edges) {
+    if (edge.start_vert_index < 0) {
+      /* Fix up offsets. */
+      if (edge.top_indices_decrease) {
+        edge.top_offset = edge.top->T - edge.top_offset;
+      }
+
+      edge.start_vert_index = edge.top->get_vert_along_edge(edge.top_offset);
+    }
+
+    if (edge.end_vert_index < 0) {
+      if (edge.bottom_indices_decrease) {
+        edge.bottom_offset = edge.bottom->T - edge.bottom_offset;
+      }
+
+      edge.end_vert_index = edge.bottom->get_vert_along_edge(edge.bottom_offset);
+    }
+  }
+
+  int vert_offset = params.mesh->verts.size();
+
+  /* Add verts to stitching map. */
+  foreach (const Edge &edge, edges) {
+    if (edge.is_stitch_edge) {
+      int second_stitch_vert_index = edge_stitch_verts_map[edge.stitch_edge_key];
+
+      for (int i = 0; i <= edge.T; i++) {
+        /* Get proper stitching key. */
+        int key;
+
+        if (i == 0) {
+          key = edge.stitch_start_vert_index;
+        }
+        else if (i == edge.T) {
+          key = edge.stitch_end_vert_index;
+        }
+        else {
+          key = second_stitch_vert_index + i - 1 + edge.stitch_offset;
+        }
+
+        if (key == STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG) {
+          if (i == 0) {
+            key = second_stitch_vert_index - 1 + edge.stitch_offset;
+          }
+          else if (i == edge.T) {
+            key = second_stitch_vert_index - 1 + edge.T;
+          }
+        }
+        else if (key < 0 && edge.top) { /* ngon spoke edge */
+          int s = edge_stitch_verts_map[edge.top->stitch_edge_key];
+          if (edge.stitch_top_offset >= 0) {
+            key = s - 1 + edge.stitch_top_offset;
+          }
+          else {
+            key = s - 1 + edge.top->stitch_edge_T + edge.stitch_top_offset;
+          }
+        }
+
+        /* Get real vert index. */
+        int vert = edge.get_vert_along_edge(i) + vert_offset;
+
+        /* Add to map */
+        if (params.mesh->vert_to_stitching_key_map.find(vert) ==
+            params.mesh->vert_to_stitching_key_map.end()) {
+          params.mesh->vert_to_stitching_key_map[vert] = key;
+          params.mesh->vert_stitching_map.insert({key, vert});
+        }
+      }
+    }
+  }
+
+  /* Dice; TODO(mai): Move this out of split. */
+  QuadDice dice(params);
+
+  int num_verts = num_alloced_verts;
+  int num_triangles = 0;
+
+  for (size_t i = 0; i < subpatches.size(); i++) {
+    subpatches[i].inner_grid_vert_offset = num_verts;
+    num_verts += subpatches[i].calc_num_inner_verts();
+    num_triangles += subpatches[i].calc_num_triangles();
+  }
+
+  dice.reserve(num_verts, num_triangles);
+
+  for (size_t i = 0; i < subpatches.size(); i++) {
+    Subpatch &sub = subpatches[i];
+
+    sub.edge_u0.T = max(sub.edge_u0.T, 1);
+    sub.edge_u1.T = max(sub.edge_u1.T, 1);
+    sub.edge_v0.T = max(sub.edge_v0.T, 1);
+    sub.edge_v1.T = max(sub.edge_v1.T, 1);
+
+    dice.dice(sub);
+  }
+
+  /* Cleanup */
+  subpatches.clear();
+  edges.clear();
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/split.h b/intern/cycles/subd/split.h
new file mode 100644
index 00000000000..e876f34c419
--- /dev/null
+++ b/intern/cycles/subd/split.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_SPLIT_H__
+#define __SUBD_SPLIT_H__
+
+/* DiagSplit: Parallel, Crack-free, Adaptive Tessellation for Micropolygon Rendering
+ * Splits up patches and determines edge tessellation factors for dicing. Patch
+ * evaluation at arbitrary points is required for this to work. See the paper
+ * for more details. */
+
+#include "subd/dice.h"
+#include "subd/subpatch.h"
+
+#include "util/deque.h"
+#include "util/types.h"
+#include "util/vector.h"
+
+#include <deque>
+
+CCL_NAMESPACE_BEGIN
+
+class Mesh;
+class Patch;
+
+class DiagSplit {
+  SubdParams params;
+
+  vector<Subpatch> subpatches;
+  /* `deque` is used so that element pointers remain valid when size is changed. */
+  deque<Edge> edges;
+
+  float3 to_world(Patch *patch, float2 uv);
+  int T(Patch *patch, float2 Pstart, float2 Pend, bool recursive_resolve = false);
+
+  void limit_edge_factor(int &T, Patch *patch, float2 Pstart, float2 Pend);
+  void resolve_edge_factors(Subpatch &sub);
+
+  void partition_edge(
+      Patch *patch, float2 *P, int *t0, int *t1, float2 Pstart, float2 Pend, int t);
+
+  void split(Subpatch &sub, int depth = 0);
+
+  int num_alloced_verts = 0;
+  int alloc_verts(int n); /* Returns start index of new verts. */
+
+ public:
+  Edge *alloc_edge();
+
+  explicit DiagSplit(const SubdParams &params);
+
+  void split_patches(Patch *patches, size_t patches_byte_stride);
+
+  void split_quad(const Mesh::SubdFace &face, Patch *patch);
+  void split_ngon(const Mesh::SubdFace &face, Patch *patches, size_t patches_byte_stride);
+
+  void post_split();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_SPLIT_H__ */
diff --git a/intern/cycles/subd/subd_dice.cpp b/intern/cycles/subd/subd_dice.cpp
deleted file mode 100644
index a4019a5d639..00000000000
--- a/intern/cycles/subd/subd_dice.cpp
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/camera.h"
-#include "scene/mesh.h"
-
-#include "subd/subd_dice.h"
-#include "subd/subd_patch.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* EdgeDice Base */
-
-EdgeDice::EdgeDice(const SubdParams &params_) : params(params_)
-{
-  mesh_P = NULL;
-  mesh_N = NULL;
-  vert_offset = 0;
-
-  params.mesh->attributes.add(ATTR_STD_VERTEX_NORMAL);
-
-  if (params.ptex) {
-    params.mesh->attributes.add(ATTR_STD_PTEX_UV);
-    params.mesh->attributes.add(ATTR_STD_PTEX_FACE_ID);
-  }
-}
-
-void EdgeDice::reserve(int num_verts, int num_triangles)
-{
-  Mesh *mesh = params.mesh;
-
-  vert_offset = mesh->get_verts().size();
-  tri_offset = mesh->num_triangles();
-
-  mesh->resize_mesh(mesh->get_verts().size() + num_verts, mesh->num_triangles());
-  mesh->reserve_mesh(mesh->get_verts().size() + num_verts, mesh->num_triangles() + num_triangles);
-
-  Attribute *attr_vN = mesh->attributes.add(ATTR_STD_VERTEX_NORMAL);
-
-  mesh_P = mesh->verts.data() + vert_offset;
-  mesh_N = attr_vN->data_float3() + vert_offset;
-
-  params.mesh->num_subd_verts += num_verts;
-}
-
-void EdgeDice::set_vert(Patch *patch, int index, float2 uv)
-{
-  float3 P, N;
-
-  patch->eval(&P, NULL, NULL, &N, uv.x, uv.y);
-
-  assert(index < params.mesh->verts.size());
-
-  mesh_P[index] = P;
-  mesh_N[index] = N;
-  params.mesh->vert_patch_uv[index + vert_offset] = make_float2(uv.x, uv.y);
-}
-
-void EdgeDice::add_triangle(Patch *patch, int v0, int v1, int v2)
-{
-  Mesh *mesh = params.mesh;
-
-  mesh->add_triangle(v0 + vert_offset, v1 + vert_offset, v2 + vert_offset, patch->shader, true);
-  params.mesh->triangle_patch[params.mesh->num_triangles() - 1] = patch->patch_index;
-
-  tri_offset++;
-}
-
-void EdgeDice::stitch_triangles(Subpatch &sub, int edge)
-{
-  int Mu = max(sub.edge_u0.T, sub.edge_u1.T);
-  int Mv = max(sub.edge_v0.T, sub.edge_v1.T);
-  Mu = max(Mu, 2);
-  Mv = max(Mv, 2);
-
-  int outer_T = sub.edges[edge].T;
-  int inner_T = ((edge % 2) == 0) ? Mv - 2 : Mu - 2;
-
-  if (inner_T < 0 || outer_T < 0)
-    return;  // XXX avoid crashes for Mu or Mv == 1, missing polygons
-
-  /* stitch together two arrays of verts with triangles. at each step,
-   * we compare using the next verts on both sides, to find the split
-   * direction with the smallest diagonal, and use that in order to keep
-   * the triangle shape reasonable. */
-  for (size_t i = 0, j = 0; i < inner_T || j < outer_T;) {
-    int v0, v1, v2;
-
-    v0 = sub.get_vert_along_grid_edge(edge, i);
-    v1 = sub.get_vert_along_edge(edge, j);
-
-    if (j == outer_T) {
-      v2 = sub.get_vert_along_grid_edge(edge, ++i);
-    }
-    else if (i == inner_T) {
-      v2 = sub.get_vert_along_edge(edge, ++j);
-    }
-    else {
-      /* length of diagonals */
-      float len1 = len_squared(mesh_P[sub.get_vert_along_grid_edge(edge, i)] -
-                               mesh_P[sub.get_vert_along_edge(edge, j + 1)]);
-      float len2 = len_squared(mesh_P[sub.get_vert_along_edge(edge, j)] -
-                               mesh_P[sub.get_vert_along_grid_edge(edge, i + 1)]);
-
-      /* use smallest diagonal */
-      if (len1 < len2)
-        v2 = sub.get_vert_along_edge(edge, ++j);
-      else
-        v2 = sub.get_vert_along_grid_edge(edge, ++i);
-    }
-
-    add_triangle(sub.patch, v1, v0, v2);
-  }
-}
-
-/* QuadDice */
-
-QuadDice::QuadDice(const SubdParams &params_) : EdgeDice(params_)
-{
-}
-
-float2 QuadDice::map_uv(Subpatch &sub, float u, float v)
-{
-  /* map UV from subpatch to patch parametric coordinates */
-  float2 d0 = interp(sub.c00, sub.c01, v);
-  float2 d1 = interp(sub.c10, sub.c11, v);
-  return interp(d0, d1, u);
-}
-
-float3 QuadDice::eval_projected(Subpatch &sub, float u, float v)
-{
-  float2 uv = map_uv(sub, u, v);
-  float3 P;
-
-  sub.patch->eval(&P, NULL, NULL, NULL, uv.x, uv.y);
-  if (params.camera)
-    P = transform_perspective(&params.camera->worldtoraster, P);
-
-  return P;
-}
-
-void QuadDice::set_vert(Subpatch &sub, int index, float u, float v)
-{
-  EdgeDice::set_vert(sub.patch, index, map_uv(sub, u, v));
-}
-
-void QuadDice::set_side(Subpatch &sub, int edge)
-{
-  int t = sub.edges[edge].T;
-
-  /* set verts on the edge of the patch */
-  for (int i = 0; i < t; i++) {
-    float f = i / (float)t;
-
-    float u, v;
-    switch (edge) {
-      case 0:
-        u = 0;
-        v = f;
-        break;
-      case 1:
-        u = f;
-        v = 1;
-        break;
-      case 2:
-        u = 1;
-        v = 1.0f - f;
-        break;
-      case 3:
-      default:
-        u = 1.0f - f;
-        v = 0;
-        break;
-    }
-
-    set_vert(sub, sub.get_vert_along_edge(edge, i), u, v);
-  }
-}
-
-float QuadDice::quad_area(const float3 &a, const float3 &b, const float3 &c, const float3 &d)
-{
-  return triangle_area(a, b, d) + triangle_area(a, d, c);
-}
-
-float QuadDice::scale_factor(Subpatch &sub, int Mu, int Mv)
-{
-  /* estimate area as 4x largest of 4 quads */
-  float3 P[3][3];
-
-  for (int i = 0; i < 3; i++)
-    for (int j = 0; j < 3; j++)
-      P[i][j] = eval_projected(sub, i * 0.5f, j * 0.5f);
-
-  float A1 = quad_area(P[0][0], P[1][0], P[0][1], P[1][1]);
-  float A2 = quad_area(P[1][0], P[2][0], P[1][1], P[2][1]);
-  float A3 = quad_area(P[0][1], P[1][1], P[0][2], P[1][2]);
-  float A4 = quad_area(P[1][1], P[2][1], P[1][2], P[2][2]);
-  float Apatch = max(A1, max(A2, max(A3, A4))) * 4.0f;
-
-  /* solve for scaling factor */
-  float Atri = params.dicing_rate * params.dicing_rate * 0.5f;
-  float Ntris = Apatch / Atri;
-
-  // XXX does the -sqrt solution matter
-  // XXX max(D, 0.0) is highly suspicious, need to test cases
-  // where D goes negative
-  float N = 0.5f * (Ntris - (sub.edge_u0.T + sub.edge_u1.T + sub.edge_v0.T + sub.edge_v1.T));
-  float D = 4.0f * N * Mu * Mv + (Mu + Mv) * (Mu + Mv);
-  float S = (Mu + Mv + sqrtf(max(D, 0.0f))) / (2 * Mu * Mv);
-
-  return S;
-}
-
-void QuadDice::add_grid(Subpatch &sub, int Mu, int Mv, int offset)
-{
-  /* create inner grid */
-  float du = 1.0f / (float)Mu;
-  float dv = 1.0f / (float)Mv;
-
-  for (int j = 1; j < Mv; j++) {
-    for (int i = 1; i < Mu; i++) {
-      float u = i * du;
-      float v = j * dv;
-
-      set_vert(sub, offset + (i - 1) + (j - 1) * (Mu - 1), u, v);
-
-      if (i < Mu - 1 && j < Mv - 1) {
-        int i1 = offset + (i - 1) + (j - 1) * (Mu - 1);
-        int i2 = offset + i + (j - 1) * (Mu - 1);
-        int i3 = offset + i + j * (Mu - 1);
-        int i4 = offset + (i - 1) + j * (Mu - 1);
-
-        add_triangle(sub.patch, i1, i2, i3);
-        add_triangle(sub.patch, i1, i3, i4);
-      }
-    }
-  }
-}
-
-void QuadDice::dice(Subpatch &sub)
-{
-  /* compute inner grid size with scale factor */
-  int Mu = max(sub.edge_u0.T, sub.edge_u1.T);
-  int Mv = max(sub.edge_v0.T, sub.edge_v1.T);
-
-#if 0 /* Doesn't work very well, especially at grazing angles. */
-  float S = scale_factor(sub, ef, Mu, Mv);
-#else
-  float S = 1.0f;
-#endif
-
-  Mu = max((int)ceilf(S * Mu), 2);  // XXX handle 0 & 1?
-  Mv = max((int)ceilf(S * Mv), 2);  // XXX handle 0 & 1?
-
-  /* inner grid */
-  add_grid(sub, Mu, Mv, sub.inner_grid_vert_offset);
-
-  /* sides */
-  set_side(sub, 0);
-  set_side(sub, 1);
-  set_side(sub, 2);
-  set_side(sub, 3);
-
-  stitch_triangles(sub, 0);
-  stitch_triangles(sub, 1);
-  stitch_triangles(sub, 2);
-  stitch_triangles(sub, 3);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_dice.h b/intern/cycles/subd/subd_dice.h
deleted file mode 100644
index ee63403d40c..00000000000
--- a/intern/cycles/subd/subd_dice.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SUBD_DICE_H__
-#define __SUBD_DICE_H__
-
-/* DX11 like EdgeDice implementation, with different tessellation factors for
- * each edge for watertight tessellation, with subpatch remapping to work with
- * DiagSplit. For more algorithm details, see the DiagSplit paper or the
- * ARB_tessellation_shader OpenGL extension, Section 2.X.2. */
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-#include "subd/subd_subpatch.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Camera;
-class Mesh;
-class Patch;
-
-struct SubdParams {
-  Mesh *mesh;
-  bool ptex;
-
-  int test_steps;
-  int split_threshold;
-  float dicing_rate;
-  int max_level;
-  Camera *camera;
-  Transform objecttoworld;
-
-  SubdParams(Mesh *mesh_, bool ptex_ = false)
-  {
-    mesh = mesh_;
-    ptex = ptex_;
-
-    test_steps = 3;
-    split_threshold = 1;
-    dicing_rate = 1.0f;
-    max_level = 12;
-    camera = NULL;
-  }
-};
-
-/* EdgeDice Base */
-
-class EdgeDice {
- public:
-  SubdParams params;
-  float3 *mesh_P;
-  float3 *mesh_N;
-  size_t vert_offset;
-  size_t tri_offset;
-
-  explicit EdgeDice(const SubdParams &params);
-
-  void reserve(int num_verts, int num_triangles);
-
-  void set_vert(Patch *patch, int index, float2 uv);
-  void add_triangle(Patch *patch, int v0, int v1, int v2);
-
-  void stitch_triangles(Subpatch &sub, int edge);
-};
-
-/* Quad EdgeDice */
-
-class QuadDice : public EdgeDice {
- public:
-  explicit QuadDice(const SubdParams &params);
-
-  float3 eval_projected(Subpatch &sub, float u, float v);
-
-  float2 map_uv(Subpatch &sub, float u, float v);
-  void set_vert(Subpatch &sub, int index, float u, float v);
-
-  void add_grid(Subpatch &sub, int Mu, int Mv, int offset);
-
-  void set_side(Subpatch &sub, int edge);
-
-  float quad_area(const float3 &a, const float3 &b, const float3 &c, const float3 &d);
-  float scale_factor(Subpatch &sub, int Mu, int Mv);
-
-  void dice(Subpatch &sub);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __SUBD_DICE_H__ */
diff --git a/intern/cycles/subd/subd_patch.cpp b/intern/cycles/subd/subd_patch.cpp
deleted file mode 100644
index 23b3e6d5136..00000000000
--- a/intern/cycles/subd/subd_patch.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Parts adapted from code in the public domain in NVidia Mesh Tools. */
-
-#include "scene/mesh.h"
-
-#include "subd/subd_patch.h"
-
-#include "util/util_math.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* De Casteljau Evaluation */
-
-static void decasteljau_cubic(float3 *P, float3 *dt, float t, const float3 cp[4])
-{
-  float3 d0 = cp[0] + t * (cp[1] - cp[0]);
-  float3 d1 = cp[1] + t * (cp[2] - cp[1]);
-  float3 d2 = cp[2] + t * (cp[3] - cp[2]);
-
-  d0 += t * (d1 - d0);
-  d1 += t * (d2 - d1);
-
-  *P = d0 + t * (d1 - d0);
-  if (dt)
-    *dt = d1 - d0;
-}
-
-static void decasteljau_bicubic(
-    float3 *P, float3 *du, float3 *dv, const float3 cp[16], float u, float v)
-{
-  float3 ucp[4], utn[4];
-
-  /* interpolate over u */
-  decasteljau_cubic(ucp + 0, utn + 0, u, cp);
-  decasteljau_cubic(ucp + 1, utn + 1, u, cp + 4);
-  decasteljau_cubic(ucp + 2, utn + 2, u, cp + 8);
-  decasteljau_cubic(ucp + 3, utn + 3, u, cp + 12);
-
-  /* interpolate over v */
-  decasteljau_cubic(P, dv, v, ucp);
-  if (du)
-    decasteljau_cubic(du, NULL, v, utn);
-}
-
-/* Linear Quad Patch */
-
-void LinearQuadPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v)
-{
-  float3 d0 = interp(hull[0], hull[1], u);
-  float3 d1 = interp(hull[2], hull[3], u);
-
-  *P = interp(d0, d1, v);
-
-  if (dPdu && dPdv) {
-    *dPdu = interp(hull[1] - hull[0], hull[3] - hull[2], v);
-    *dPdv = interp(hull[2] - hull[0], hull[3] - hull[1], u);
-  }
-
-  if (N) {
-    *N = normalize(
-        interp(interp(normals[0], normals[1], u), interp(normals[2], normals[3], u), v));
-  }
-}
-
-BoundBox LinearQuadPatch::bound()
-{
-  BoundBox bbox = BoundBox::empty;
-
-  for (int i = 0; i < 4; i++)
-    bbox.grow(hull[i]);
-
-  return bbox;
-}
-
-/* Bicubic Patch */
-
-void BicubicPatch::eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v)
-{
-  if (N) {
-    float3 dPdu_, dPdv_;
-    decasteljau_bicubic(P, &dPdu_, &dPdv_, hull, u, v);
-
-    if (dPdu && dPdv) {
-      *dPdu = dPdu_;
-      *dPdv = dPdv_;
-    }
-
-    *N = normalize(cross(dPdu_, dPdv_));
-  }
-  else {
-    decasteljau_bicubic(P, dPdu, dPdv, hull, u, v);
-  }
-}
-
-BoundBox BicubicPatch::bound()
-{
-  BoundBox bbox = BoundBox::empty;
-
-  for (int i = 0; i < 16; i++)
-    bbox.grow(hull[i]);
-
-  return bbox;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_patch.h b/intern/cycles/subd/subd_patch.h
deleted file mode 100644
index 8fe423bc94d..00000000000
--- a/intern/cycles/subd/subd_patch.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SUBD_PATCH_H__
-#define __SUBD_PATCH_H__
-
-#include "util/util_boundbox.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Patch {
- public:
-  Patch() : patch_index(0), shader(0), from_ngon(false)
-  {
-  }
-
-  virtual ~Patch() = default;
-
-  virtual void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v) = 0;
-
-  int patch_index;
-  int shader;
-  bool from_ngon;
-};
-
-/* Linear Quad Patch */
-
-class LinearQuadPatch : public Patch {
- public:
-  float3 hull[4];
-  float3 normals[4];
-
-  void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v);
-  BoundBox bound();
-};
-
-/* Bicubic Patch */
-
-class BicubicPatch : public Patch {
- public:
-  float3 hull[16];
-
-  void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v);
-  BoundBox bound();
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __SUBD_PATCH_H__ */
diff --git a/intern/cycles/subd/subd_patch_table.cpp b/intern/cycles/subd/subd_patch_table.cpp
deleted file mode 100644
index 4e873375725..00000000000
--- a/intern/cycles/subd/subd_patch_table.cpp
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Based on code from OpenSubdiv released under this license:
- *
- * Copyright 2014 DreamWorks Animation LLC.
- *
- * Licensed under the Apache License, Version 2.0 (the "Apache License")
- * with the following modification; you may not use this file except in
- * compliance with the Apache License and the following modification to it:
- * Section 6. Trademarks. is deleted and replaced with:
- *
- * 6. Trademarks. This License does not grant permission to use the trade
- *   names, trademarks, service marks, or product names of the Licensor
- *   and its affiliates, except as required to comply with Section 4(c) of
- *   the License and to reproduce the content of the NOTICE file.
- *
- * You may obtain a copy of the Apache License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the Apache License with the above modification is
- * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the Apache License for the specific
- * language governing permissions and limitations under the Apache License.
- */
-
-#include "subd/subd_patch_table.h"
-#include "kernel/kernel_types.h"
-
-#include "util/util_math.h"
-
-#ifdef WITH_OPENSUBDIV
-#  include <opensubdiv/far/patchTable.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef WITH_OPENSUBDIV
-
-using namespace OpenSubdiv;
-
-/* functions for building patch maps */
-
-struct PatchMapQuadNode {
-  /* sets all the children to point to the patch of index */
-  void set_child(int index)
-  {
-    for (int i = 0; i < 4; i++) {
-      children[i] = index | PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF;
-    }
-  }
-
-  /* sets the child in quadrant to point to the node or patch of the given index */
-  void set_child(unsigned char quadrant, int index, bool is_leaf = true)
-  {
-    assert(quadrant < 4);
-    children[quadrant] = index | PATCH_MAP_NODE_IS_SET | (is_leaf ? PATCH_MAP_NODE_IS_LEAF : 0);
-  }
-
-  uint children[4];
-};
-
-template<class T> static int resolve_quadrant(T &median, T &u, T &v)
-{
-  int quadrant = -1;
-
-  if (u < median) {
-    if (v < median) {
-      quadrant = 0;
-    }
-    else {
-      quadrant = 1;
-      v -= median;
-    }
-  }
-  else {
-    if (v < median) {
-      quadrant = 3;
-    }
-    else {
-      quadrant = 2;
-      v -= median;
-    }
-    u -= median;
-  }
-
-  return quadrant;
-}
-
-static void build_patch_map(PackedPatchTable &table,
-                            OpenSubdiv::Far::PatchTable *patch_table,
-                            int offset)
-{
-  int num_faces = 0;
-
-  for (int array = 0; array < table.num_arrays; array++) {
-    Far::ConstPatchParamArray params = patch_table->GetPatchParams(array);
-
-    for (int j = 0; j < patch_table->GetNumPatches(array); j++) {
-      num_faces = max(num_faces, (int)params[j].GetFaceId());
-    }
-  }
-  num_faces++;
-
-  vector<PatchMapQuadNode> quadtree;
-  quadtree.reserve(num_faces + table.num_patches);
-  quadtree.resize(num_faces);
-
-  /* adjust offsets to make indices relative to the table */
-  int handle_index = -(table.num_patches * PATCH_HANDLE_SIZE);
-  offset += table.total_size();
-
-  /* populate the quadtree from the FarPatchArrays sub-patches */
-  for (int array = 0; array < table.num_arrays; array++) {
-    Far::ConstPatchParamArray params = patch_table->GetPatchParams(array);
-
-    for (int i = 0; i < patch_table->GetNumPatches(array);
-         i++, handle_index += PATCH_HANDLE_SIZE) {
-      const Far::PatchParam &param = params[i];
-      unsigned short depth = param.GetDepth();
-
-      PatchMapQuadNode *node = &quadtree[params[i].GetFaceId()];
-
-      if (depth == (param.NonQuadRoot() ? 1 : 0)) {
-        /* special case : regular BSpline face w/ no sub-patches */
-        node->set_child(handle_index + offset);
-        continue;
-      }
-
-      int u = param.GetU();
-      int v = param.GetV();
-      int pdepth = param.NonQuadRoot() ? depth - 2 : depth - 1;
-      int half = 1 << pdepth;
-
-      for (int j = 0; j < depth; j++) {
-        int delta = half >> 1;
-
-        int quadrant = resolve_quadrant(half, u, v);
-        assert(quadrant >= 0);
-
-        half = delta;
-
-        if (j == pdepth) {
-          /* we have reached the depth of the sub-patch : add a leaf */
-          assert(!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET));
-          node->set_child(quadrant, handle_index + offset, true);
-          break;
-        }
-        else {
-          /* travel down the child node of the corresponding quadrant */
-          if (!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET)) {
-            /* create a new branch in the quadrant */
-            quadtree.push_back(PatchMapQuadNode());
-
-            int idx = (int)quadtree.size() - 1;
-            node->set_child(quadrant, idx * 4 + offset, false);
-
-            node = &quadtree[idx];
-          }
-          else {
-            /* travel down an existing branch */
-            uint idx = node->children[quadrant] & PATCH_MAP_NODE_INDEX_MASK;
-            node = &(quadtree[(idx - offset) / 4]);
-          }
-        }
-      }
-    }
-  }
-
-  /* copy into table */
-  assert(table.table.size() == table.total_size());
-  uint map_offset = table.total_size();
-
-  table.num_nodes = quadtree.size() * 4;
-  table.table.resize(table.total_size());
-
-  uint *data = &table.table[map_offset];
-
-  for (int i = 0; i < quadtree.size(); i++) {
-    for (int j = 0; j < 4; j++) {
-      assert(quadtree[i].children[j] & PATCH_MAP_NODE_IS_SET);
-      *(data++) = quadtree[i].children[j];
-    }
-  }
-}
-
-#endif
-
-/* packed patch table functions */
-
-size_t PackedPatchTable::total_size()
-{
-  return num_arrays * PATCH_ARRAY_SIZE + num_indices +
-         num_patches * (PATCH_PARAM_SIZE + PATCH_HANDLE_SIZE) + num_nodes * PATCH_NODE_SIZE;
-}
-
-void PackedPatchTable::pack(Far::PatchTable *patch_table, int offset)
-{
-  num_arrays = 0;
-  num_patches = 0;
-  num_indices = 0;
-  num_nodes = 0;
-
-#ifdef WITH_OPENSUBDIV
-  num_arrays = patch_table->GetNumPatchArrays();
-
-  for (int i = 0; i < num_arrays; i++) {
-    int patches = patch_table->GetNumPatches(i);
-    int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices();
-
-    num_patches += patches;
-    num_indices += patches * num_control;
-  }
-
-  table.resize(total_size());
-  uint *data = table.data();
-
-  uint *array = data;
-  uint *index = array + num_arrays * PATCH_ARRAY_SIZE;
-  uint *param = index + num_indices;
-  uint *handle = param + num_patches * PATCH_PARAM_SIZE;
-
-  uint current_param = 0;
-
-  for (int i = 0; i < num_arrays; i++) {
-    *(array++) = patch_table->GetPatchArrayDescriptor(i).GetType();
-    *(array++) = patch_table->GetNumPatches(i);
-    *(array++) = (index - data) + offset;
-    *(array++) = (param - data) + offset;
-
-    Far::ConstIndexArray indices = patch_table->GetPatchArrayVertices(i);
-
-    for (int j = 0; j < indices.size(); j++) {
-      *(index++) = indices[j];
-    }
-
-    const Far::PatchParamTable &param_table = patch_table->GetPatchParamTable();
-
-    int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices();
-    int patches = patch_table->GetNumPatches(i);
-
-    for (int j = 0; j < patches; j++, current_param++) {
-      *(param++) = param_table[current_param].field0;
-      *(param++) = param_table[current_param].field1;
-
-      *(handle++) = (array - data) - PATCH_ARRAY_SIZE + offset;
-      *(handle++) = (param - data) - PATCH_PARAM_SIZE + offset;
-      *(handle++) = j * num_control;
-    }
-  }
-
-  build_patch_map(*this, patch_table, offset);
-#else
-  (void)patch_table;
-  (void)offset;
-#endif
-}
-
-void PackedPatchTable::copy_adjusting_offsets(uint *dest, int doffset)
-{
-  uint *src = table.data();
-
-  /* arrays */
-  for (int i = 0; i < num_arrays; i++) {
-    *(dest++) = *(src++);
-    *(dest++) = *(src++);
-    *(dest++) = *(src++) + doffset;
-    *(dest++) = *(src++) + doffset;
-  }
-
-  /* indices */
-  for (int i = 0; i < num_indices; i++) {
-    *(dest++) = *(src++);
-  }
-
-  /* params */
-  for (int i = 0; i < num_patches; i++) {
-    *(dest++) = *(src++);
-    *(dest++) = *(src++);
-  }
-
-  /* handles */
-  for (int i = 0; i < num_patches; i++) {
-    *(dest++) = *(src++) + doffset;
-    *(dest++) = *(src++) + doffset;
-    *(dest++) = *(src++);
-  }
-
-  /* nodes */
-  for (int i = 0; i < num_nodes; i++) {
-    *(dest++) = *(src++) + doffset;
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_patch_table.h b/intern/cycles/subd/subd_patch_table.h
deleted file mode 100644
index 118d410f8f0..00000000000
--- a/intern/cycles/subd/subd_patch_table.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SUBD_PATCH_TABLE_H__
-#define __SUBD_PATCH_TABLE_H__
-
-#include "util/util_array.h"
-#include "util/util_types.h"
-
-#ifdef WITH_OPENSUBDIV
-#  ifdef _MSC_VER
-#    include "iso646.h"
-#  endif
-
-#  include <opensubdiv/far/patchTable.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef WITH_OPENSUBDIV
-using namespace OpenSubdiv;
-#else
-/* forward declare for when OpenSubdiv is unavailable */
-namespace Far {
-struct PatchTable;
-}
-#endif
-
-#define PATCH_ARRAY_SIZE 4
-#define PATCH_PARAM_SIZE 2
-#define PATCH_HANDLE_SIZE 3
-#define PATCH_NODE_SIZE 1
-
-struct PackedPatchTable {
-  array<uint> table;
-
-  size_t num_arrays;
-  size_t num_indices;
-  size_t num_patches;
-  size_t num_nodes;
-
-  /* calculated size from num_* members */
-  size_t total_size();
-
-  void pack(Far::PatchTable *patch_table, int offset = 0);
-  void copy_adjusting_offsets(uint *dest, int doffset);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __SUBD_PATCH_TABLE_H__ */
diff --git a/intern/cycles/subd/subd_split.cpp b/intern/cycles/subd/subd_split.cpp
deleted file mode 100644
index 6b352ab02c3..00000000000
--- a/intern/cycles/subd/subd_split.cpp
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scene/camera.h"
-#include "scene/mesh.h"
-
-#include "subd/subd_dice.h"
-#include "subd/subd_patch.h"
-#include "subd/subd_split.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_hash.h"
-#include "util/util_math.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* DiagSplit */
-
-#define DSPLIT_NON_UNIFORM -1
-#define STITCH_NGON_CENTER_VERT_INDEX_OFFSET 0x60000000
-#define STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG (0x60000000 - 1)
-
-DiagSplit::DiagSplit(const SubdParams &params_) : params(params_)
-{
-}
-
-float3 DiagSplit::to_world(Patch *patch, float2 uv)
-{
-  float3 P;
-
-  patch->eval(&P, NULL, NULL, NULL, uv.x, uv.y);
-  if (params.camera)
-    P = transform_point(&params.objecttoworld, P);
-
-  return P;
-}
-
-static void order_float2(float2 &a, float2 &b)
-{
-  if (b.x < a.x || b.y < a.y) {
-    swap(a, b);
-  }
-}
-
-int DiagSplit::T(Patch *patch, float2 Pstart, float2 Pend, bool recursive_resolve)
-{
-  order_float2(Pstart, Pend); /* May not be necessary, but better to be safe. */
-
-  float Lsum = 0.0f;
-  float Lmax = 0.0f;
-
-  float3 Plast = to_world(patch, Pstart);
-
-  for (int i = 1; i < params.test_steps; i++) {
-    float t = i / (float)(params.test_steps - 1);
-
-    float3 P = to_world(patch, Pstart + t * (Pend - Pstart));
-
-    float L;
-
-    if (!params.camera) {
-      L = len(P - Plast);
-    }
-    else {
-      Camera *cam = params.camera;
-
-      float pixel_width = cam->world_to_raster_size((P + Plast) * 0.5f);
-      L = len(P - Plast) / pixel_width;
-    }
-
-    Lsum += L;
-    Lmax = max(L, Lmax);
-
-    Plast = P;
-  }
-
-  int tmin = (int)ceilf(Lsum / params.dicing_rate);
-  int tmax = (int)ceilf((params.test_steps - 1) * Lmax /
-                        params.dicing_rate);  // XXX paper says N instead of N-1, seems wrong?
-  int res = max(tmax, 1);
-
-  if (tmax - tmin > params.split_threshold) {
-    if (!recursive_resolve) {
-      res = DSPLIT_NON_UNIFORM;
-    }
-    else {
-      float2 P = (Pstart + Pend) * 0.5f;
-      res = T(patch, Pstart, P, true) + T(patch, P, Pend, true);
-    }
-  }
-
-  limit_edge_factor(res, patch, Pstart, Pend);
-  return res;
-}
-
-void DiagSplit::partition_edge(
-    Patch *patch, float2 *P, int *t0, int *t1, float2 Pstart, float2 Pend, int t)
-{
-  if (t == DSPLIT_NON_UNIFORM) {
-    *P = (Pstart + Pend) * 0.5f;
-    *t0 = T(patch, Pstart, *P);
-    *t1 = T(patch, *P, Pend);
-  }
-  else {
-    assert(t >= 2); /* Need at least two segments to partition into. */
-
-    int I = (int)floorf((float)t * 0.5f);
-    *P = interp(Pstart, Pend, I / (float)t);
-    *t0 = I;
-    *t1 = t - I;
-  }
-}
-
-void DiagSplit::limit_edge_factor(int &T, Patch *patch, float2 Pstart, float2 Pend)
-{
-  int max_t = 1 << params.max_level;
-  int max_t_for_edge = int(max_t * len(Pstart - Pend));
-
-  if (patch->from_ngon) {
-    max_t_for_edge >>= 1; /* Initial split of ngon causes edges to extend half the distance. */
-  }
-
-  T = (max_t_for_edge <= 1) ? 1 : min(T, max_t_for_edge);
-
-  assert(T >= 1 || T == DSPLIT_NON_UNIFORM);
-}
-
-void DiagSplit::resolve_edge_factors(Subpatch &sub)
-{
-  /* Resolve DSPLIT_NON_UNIFORM to actual T value if splitting is no longer possible. */
-  if (sub.edge_u0.T == 1 && sub.edge_u1.T == DSPLIT_NON_UNIFORM) {
-    sub.edge_u1.T = T(sub.patch, sub.c01, sub.c11, true);
-  }
-  if (sub.edge_u1.T == 1 && sub.edge_u0.T == DSPLIT_NON_UNIFORM) {
-    sub.edge_u0.T = T(sub.patch, sub.c00, sub.c10, true);
-  }
-  if (sub.edge_v0.T == 1 && sub.edge_v1.T == DSPLIT_NON_UNIFORM) {
-    sub.edge_v1.T = T(sub.patch, sub.c11, sub.c10, true);
-  }
-  if (sub.edge_v1.T == 1 && sub.edge_v0.T == DSPLIT_NON_UNIFORM) {
-    sub.edge_v0.T = T(sub.patch, sub.c01, sub.c00, true);
-  }
-}
-
-void DiagSplit::split(Subpatch &sub, int depth)
-{
-  if (depth > 32) {
-    /* We should never get here, but just in case end recursion safely. */
-    assert(!"diagsplit recursion limit reached");
-
-    sub.edge_u0.T = 1;
-    sub.edge_u1.T = 1;
-    sub.edge_v0.T = 1;
-    sub.edge_v1.T = 1;
-
-    subpatches.push_back(sub);
-    return;
-  }
-
-  bool split_u = (sub.edge_u0.T == DSPLIT_NON_UNIFORM || sub.edge_u1.T == DSPLIT_NON_UNIFORM);
-  bool split_v = (sub.edge_v0.T == DSPLIT_NON_UNIFORM || sub.edge_v1.T == DSPLIT_NON_UNIFORM);
-
-  /* Split subpatches such that the ratio of T for opposite edges doesn't
-   * exceed 1.5, this reduces over tessellation for some patches
-   */
-  /* clang-format off */
-  if (min(sub.edge_u0.T, sub.edge_u1.T) > 8 && /* Must be uniform and preferably greater than 8 to split. */
-      min(sub.edge_v0.T, sub.edge_v1.T) >= 2 && /* Must be uniform and at least 2 to split. */
-      max(sub.edge_u0.T, sub.edge_u1.T) / min(sub.edge_u0.T, sub.edge_u1.T) > 1.5f)
-  {
-    split_v = true;
-  }
-  if (min(sub.edge_v0.T, sub.edge_v1.T) > 8 &&
-      min(sub.edge_u0.T, sub.edge_u1.T) >= 2 &&
-      max(sub.edge_v0.T, sub.edge_v1.T) / min(sub.edge_v0.T, sub.edge_v1.T) > 1.5f)
-  {
-    split_u = true;
-  }
-  /* clang-format on */
-
-  /* Alternate axis. */
-  if (split_u && split_v) {
-    split_u = depth % 2;
-  }
-
-  if (!split_u && !split_v) {
-    /* Add the unsplit subpatch. */
-    subpatches.push_back(sub);
-    Subpatch &subpatch = subpatches[subpatches.size() - 1];
-
-    /* Update T values and offsets. */
-    for (int i = 0; i < 4; i++) {
-      Subpatch::edge_t &edge = subpatch.edges[i];
-
-      edge.offset = edge.edge->T;
-      edge.edge->T += edge.T;
-    }
-  }
-  else {
-    /* Copy into new subpatches. */
-    Subpatch sub_a = sub;
-    Subpatch sub_b = sub;
-
-    /* Pointers to various subpatch elements. */
-    Subpatch::edge_t *sub_across_0, *sub_across_1;
-    Subpatch::edge_t *sub_a_across_0, *sub_a_across_1;
-    Subpatch::edge_t *sub_b_across_0, *sub_b_across_1;
-
-    Subpatch::edge_t *sub_a_split, *sub_b_split;
-
-    float2 *Pa, *Pb, *Pc, *Pd;
-
-    /* Set pointers based on split axis. */
-    if (split_u) {
-      sub_across_0 = &sub.edge_u0;
-      sub_across_1 = &sub.edge_u1;
-      sub_a_across_0 = &sub_a.edge_u0;
-      sub_a_across_1 = &sub_a.edge_u1;
-      sub_b_across_0 = &sub_b.edge_u0;
-      sub_b_across_1 = &sub_b.edge_u1;
-
-      sub_a_split = &sub_a.edge_v1;
-      sub_b_split = &sub_b.edge_v0;
-
-      Pa = &sub_a.c11;
-      Pb = &sub_a.c10;
-      Pc = &sub_b.c01;
-      Pd = &sub_b.c00;
-    }
-    else {
-      sub_across_0 = &sub.edge_v0;
-      sub_across_1 = &sub.edge_v1;
-      sub_a_across_0 = &sub_a.edge_v0;
-      sub_a_across_1 = &sub_a.edge_v1;
-      sub_b_across_0 = &sub_b.edge_v0;
-      sub_b_across_1 = &sub_b.edge_v1;
-
-      sub_a_split = &sub_a.edge_u0;
-      sub_b_split = &sub_b.edge_u1;
-
-      Pa = &sub_a.c10;
-      Pb = &sub_a.c00;
-      Pc = &sub_b.c11;
-      Pd = &sub_b.c01;
-    }
-
-    /* Partition edges */
-    float2 P0, P1;
-
-    partition_edge(
-        sub.patch, &P0, &sub_a_across_0->T, &sub_b_across_0->T, *Pd, *Pb, sub_across_0->T);
-    partition_edge(
-        sub.patch, &P1, &sub_a_across_1->T, &sub_b_across_1->T, *Pc, *Pa, sub_across_1->T);
-
-    /* Split */
-    *Pa = P1;
-    *Pb = P0;
-
-    *Pc = P1;
-    *Pd = P0;
-
-    int tsplit = T(sub.patch, P0, P1);
-
-    if (depth == -2 && tsplit == 1) {
-      tsplit = 2; /* Ensure we can always split at depth -1. */
-    }
-
-    sub_a_split->T = tsplit;
-    sub_b_split->T = tsplit;
-
-    resolve_edge_factors(sub_a);
-    resolve_edge_factors(sub_b);
-
-    /* Create new edge */
-    Edge &edge = *alloc_edge();
-
-    sub_a_split->edge = &edge;
-    sub_b_split->edge = &edge;
-
-    sub_a_split->offset = 0;
-    sub_b_split->offset = 0;
-
-    sub_a_split->indices_decrease_along_edge = false;
-    sub_b_split->indices_decrease_along_edge = true;
-
-    sub_a_split->sub_edges_created_in_reverse_order = !split_u;
-    sub_b_split->sub_edges_created_in_reverse_order = !split_u;
-
-    edge.top_indices_decrease = sub_across_1->sub_edges_created_in_reverse_order;
-    edge.bottom_indices_decrease = sub_across_0->sub_edges_created_in_reverse_order;
-
-    /* Recurse */
-    edge.T = 0;
-    split(sub_a, depth + 1);
-
-    int edge_t = edge.T;
-    (void)edge_t;
-
-    edge.top_offset = sub_across_1->edge->T;
-    edge.bottom_offset = sub_across_0->edge->T;
-
-    edge.T = 0; /* We calculate T twice along each edge. :/ */
-    split(sub_b, depth + 1);
-
-    assert(edge.T == edge_t); /* If this fails we will crash at some later point! */
-
-    edge.top = sub_across_1->edge;
-    edge.bottom = sub_across_0->edge;
-  }
-}
-
-int DiagSplit::alloc_verts(int n)
-{
-  int a = num_alloced_verts;
-  num_alloced_verts += n;
-  return a;
-}
-
-Edge *DiagSplit::alloc_edge()
-{
-  edges.emplace_back();
-  return &edges.back();
-}
-
-void DiagSplit::split_patches(Patch *patches, size_t patches_byte_stride)
-{
-  int patch_index = 0;
-
-  for (int f = 0; f < params.mesh->get_num_subd_faces(); f++) {
-    Mesh::SubdFace face = params.mesh->get_subd_face(f);
-
-    Patch *patch = (Patch *)(((char *)patches) + patch_index * patches_byte_stride);
-
-    if (face.is_quad()) {
-      patch_index++;
-
-      split_quad(face, patch);
-    }
-    else {
-      patch_index += face.num_corners;
-
-      split_ngon(face, patch, patches_byte_stride);
-    }
-  }
-
-  params.mesh->vert_to_stitching_key_map.clear();
-  params.mesh->vert_stitching_map.clear();
-
-  post_split();
-}
-
-static Edge *create_edge_from_corner(DiagSplit *split,
-                                     const Mesh *mesh,
-                                     const Mesh::SubdFace &face,
-                                     int corner,
-                                     bool &reversed,
-                                     int v0,
-                                     int v1)
-{
-  int a = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 0, face.num_corners)];
-  int b = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 1, face.num_corners)];
-
-  reversed = !(b < a);
-
-  if (b < a) {
-    swap(a, b);
-    swap(v0, v1);
-  }
-
-  Edge *edge = split->alloc_edge();
-
-  edge->is_stitch_edge = true;
-  edge->stitch_start_vert_index = a;
-  edge->stitch_end_vert_index = b;
-
-  edge->start_vert_index = v0;
-  edge->end_vert_index = v1;
-
-  edge->stitch_edge_key = {a, b};
-
-  return edge;
-}
-
-void DiagSplit::split_quad(const Mesh::SubdFace &face, Patch *patch)
-{
-  Subpatch subpatch(patch);
-
-  int v = alloc_verts(4);
-
-  bool v0_reversed, u1_reversed, v1_reversed, u0_reversed;
-  subpatch.edge_v0.edge = create_edge_from_corner(
-      this, params.mesh, face, 3, v0_reversed, v + 3, v + 0);
-  subpatch.edge_u1.edge = create_edge_from_corner(
-      this, params.mesh, face, 2, u1_reversed, v + 2, v + 3);
-  subpatch.edge_v1.edge = create_edge_from_corner(
-      this, params.mesh, face, 1, v1_reversed, v + 1, v + 2);
-  subpatch.edge_u0.edge = create_edge_from_corner(
-      this, params.mesh, face, 0, u0_reversed, v + 0, v + 1);
-
-  subpatch.edge_v0.sub_edges_created_in_reverse_order = !v0_reversed;
-  subpatch.edge_u1.sub_edges_created_in_reverse_order = u1_reversed;
-  subpatch.edge_v1.sub_edges_created_in_reverse_order = v1_reversed;
-  subpatch.edge_u0.sub_edges_created_in_reverse_order = !u0_reversed;
-
-  subpatch.edge_v0.indices_decrease_along_edge = v0_reversed;
-  subpatch.edge_u1.indices_decrease_along_edge = u1_reversed;
-  subpatch.edge_v1.indices_decrease_along_edge = v1_reversed;
-  subpatch.edge_u0.indices_decrease_along_edge = u0_reversed;
-
-  /* Forces a split in both axis for quads, needed to match split of ngons into quads. */
-  subpatch.edge_u0.T = DSPLIT_NON_UNIFORM;
-  subpatch.edge_u1.T = DSPLIT_NON_UNIFORM;
-  subpatch.edge_v0.T = DSPLIT_NON_UNIFORM;
-  subpatch.edge_v1.T = DSPLIT_NON_UNIFORM;
-
-  split(subpatch, -2);
-}
-
-static Edge *create_split_edge_from_corner(DiagSplit *split,
-                                           const Mesh *mesh,
-                                           const Mesh::SubdFace &face,
-                                           int corner,
-                                           int side,
-                                           bool &reversed,
-                                           int v0,
-                                           int v1,
-                                           int vc)
-{
-  Edge *edge = split->alloc_edge();
-
-  int a = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 0, face.num_corners)];
-  int b = mesh->get_subd_face_corners()[face.start_corner + mod(corner + 1, face.num_corners)];
-
-  if (b < a) {
-    edge->stitch_edge_key = {b, a};
-  }
-  else {
-    edge->stitch_edge_key = {a, b};
-  }
-
-  reversed = !(b < a);
-
-  if (side == 0) {
-    a = vc;
-  }
-  else {
-    b = vc;
-  }
-
-  if (!reversed) {
-    swap(a, b);
-    swap(v0, v1);
-  }
-
-  edge->is_stitch_edge = true;
-  edge->stitch_start_vert_index = a;
-  edge->stitch_end_vert_index = b;
-
-  edge->start_vert_index = v0;
-  edge->end_vert_index = v1;
-
-  return edge;
-}
-
-void DiagSplit::split_ngon(const Mesh::SubdFace &face, Patch *patches, size_t patches_byte_stride)
-{
-  Edge *prev_edge_u0 = nullptr;
-  Edge *first_edge_v0 = nullptr;
-
-  for (int corner = 0; corner < face.num_corners; corner++) {
-    Patch *patch = (Patch *)(((char *)patches) + corner * patches_byte_stride);
-
-    Subpatch subpatch(patch);
-
-    int v = alloc_verts(4);
-
-    /* Setup edges. */
-    Edge *edge_u1 = alloc_edge();
-    Edge *edge_v1 = alloc_edge();
-
-    edge_v1->is_stitch_edge = true;
-    edge_u1->is_stitch_edge = true;
-
-    edge_u1->stitch_start_vert_index = -(face.start_corner + mod(corner + 0, face.num_corners)) -
-                                       1;
-    edge_u1->stitch_end_vert_index = STITCH_NGON_CENTER_VERT_INDEX_OFFSET + face.ptex_offset;
-
-    edge_u1->start_vert_index = v + 3;
-    edge_u1->end_vert_index = v + 2;
-
-    edge_u1->stitch_edge_key = {edge_u1->stitch_start_vert_index, edge_u1->stitch_end_vert_index};
-
-    edge_v1->stitch_start_vert_index = -(face.start_corner + mod(corner + 1, face.num_corners)) -
-                                       1;
-    edge_v1->stitch_end_vert_index = STITCH_NGON_CENTER_VERT_INDEX_OFFSET + face.ptex_offset;
-
-    edge_v1->start_vert_index = v + 1;
-    edge_v1->end_vert_index = v + 2;
-
-    edge_v1->stitch_edge_key = {edge_v1->stitch_start_vert_index, edge_v1->stitch_end_vert_index};
-
-    bool v0_reversed, u0_reversed;
-
-    subpatch.edge_v0.edge = create_split_edge_from_corner(this,
-                                                          params.mesh,
-                                                          face,
-                                                          corner - 1,
-                                                          0,
-                                                          v0_reversed,
-                                                          v + 3,
-                                                          v + 0,
-                                                          STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG);
-
-    subpatch.edge_u1.edge = edge_u1;
-    subpatch.edge_v1.edge = edge_v1;
-
-    subpatch.edge_u0.edge = create_split_edge_from_corner(this,
-                                                          params.mesh,
-                                                          face,
-                                                          corner + 0,
-                                                          1,
-                                                          u0_reversed,
-                                                          v + 0,
-                                                          v + 1,
-                                                          STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG);
-
-    subpatch.edge_v0.sub_edges_created_in_reverse_order = !v0_reversed;
-    subpatch.edge_u1.sub_edges_created_in_reverse_order = false;
-    subpatch.edge_v1.sub_edges_created_in_reverse_order = true;
-    subpatch.edge_u0.sub_edges_created_in_reverse_order = !u0_reversed;
-
-    subpatch.edge_v0.indices_decrease_along_edge = v0_reversed;
-    subpatch.edge_u1.indices_decrease_along_edge = false;
-    subpatch.edge_v1.indices_decrease_along_edge = true;
-    subpatch.edge_u0.indices_decrease_along_edge = u0_reversed;
-
-    /* Perform split. */
-    {
-      subpatch.edge_u0.T = T(subpatch.patch, subpatch.c00, subpatch.c10);
-      subpatch.edge_u1.T = T(subpatch.patch, subpatch.c01, subpatch.c11);
-      subpatch.edge_v0.T = T(subpatch.patch, subpatch.c00, subpatch.c01);
-      subpatch.edge_v1.T = T(subpatch.patch, subpatch.c10, subpatch.c11);
-
-      resolve_edge_factors(subpatch);
-
-      split(subpatch, 0);
-    }
-
-    /* Update offsets after T is known from split. */
-    edge_u1->top = subpatch.edge_v0.edge;
-    edge_u1->stitch_top_offset = edge_u1->top->T * (v0_reversed ? -1 : 1);
-    edge_v1->top = subpatch.edge_u0.edge;
-    edge_v1->stitch_top_offset = edge_v1->top->T * (!u0_reversed ? -1 : 1);
-
-    if (corner == 0) {
-      first_edge_v0 = subpatch.edge_v0.edge;
-    }
-
-    if (prev_edge_u0) {
-      if (v0_reversed) {
-        subpatch.edge_v0.edge->stitch_offset = prev_edge_u0->T;
-      }
-      else {
-        prev_edge_u0->stitch_offset = subpatch.edge_v0.edge->T;
-      }
-
-      int T = subpatch.edge_v0.edge->T + prev_edge_u0->T;
-      subpatch.edge_v0.edge->stitch_edge_T = T;
-      prev_edge_u0->stitch_edge_T = T;
-    }
-
-    if (corner == face.num_corners - 1) {
-      if (v0_reversed) {
-        subpatch.edge_u0.edge->stitch_offset = first_edge_v0->T;
-      }
-      else {
-        first_edge_v0->stitch_offset = subpatch.edge_u0.edge->T;
-      }
-
-      int T = first_edge_v0->T + subpatch.edge_u0.edge->T;
-      first_edge_v0->stitch_edge_T = T;
-      subpatch.edge_u0.edge->stitch_edge_T = T;
-    }
-
-    prev_edge_u0 = subpatch.edge_u0.edge;
-  }
-}
-
-void DiagSplit::post_split()
-{
-  int num_stitch_verts = 0;
-
-  /* All patches are now split, and all T values known. */
-
-  foreach (Edge &edge, edges) {
-    if (edge.second_vert_index < 0) {
-      edge.second_vert_index = alloc_verts(edge.T - 1);
-    }
-
-    if (edge.is_stitch_edge) {
-      num_stitch_verts = max(num_stitch_verts,
-                             max(edge.stitch_start_vert_index, edge.stitch_end_vert_index));
-    }
-  }
-
-  num_stitch_verts += 1;
-
-  /* Map of edge key to edge stitching vert offset. */
-  struct pair_hasher {
-    size_t operator()(const pair<int, int> &k) const
-    {
-      return hash_uint2(k.first, k.second);
-    }
-  };
-  typedef unordered_map<pair<int, int>, int, pair_hasher> edge_stitch_verts_map_t;
-  edge_stitch_verts_map_t edge_stitch_verts_map;
-
-  foreach (Edge &edge, edges) {
-    if (edge.is_stitch_edge) {
-      if (edge.stitch_edge_T == 0) {
-        edge.stitch_edge_T = edge.T;
-      }
-
-      if (edge_stitch_verts_map.find(edge.stitch_edge_key) == edge_stitch_verts_map.end()) {
-        edge_stitch_verts_map[edge.stitch_edge_key] = num_stitch_verts;
-        num_stitch_verts += edge.stitch_edge_T - 1;
-      }
-    }
-  }
-
-  /* Set start and end indices for edges generated from a split. */
-  foreach (Edge &edge, edges) {
-    if (edge.start_vert_index < 0) {
-      /* Fix up offsets. */
-      if (edge.top_indices_decrease) {
-        edge.top_offset = edge.top->T - edge.top_offset;
-      }
-
-      edge.start_vert_index = edge.top->get_vert_along_edge(edge.top_offset);
-    }
-
-    if (edge.end_vert_index < 0) {
-      if (edge.bottom_indices_decrease) {
-        edge.bottom_offset = edge.bottom->T - edge.bottom_offset;
-      }
-
-      edge.end_vert_index = edge.bottom->get_vert_along_edge(edge.bottom_offset);
-    }
-  }
-
-  int vert_offset = params.mesh->verts.size();
-
-  /* Add verts to stitching map. */
-  foreach (const Edge &edge, edges) {
-    if (edge.is_stitch_edge) {
-      int second_stitch_vert_index = edge_stitch_verts_map[edge.stitch_edge_key];
-
-      for (int i = 0; i <= edge.T; i++) {
-        /* Get proper stitching key. */
-        int key;
-
-        if (i == 0) {
-          key = edge.stitch_start_vert_index;
-        }
-        else if (i == edge.T) {
-          key = edge.stitch_end_vert_index;
-        }
-        else {
-          key = second_stitch_vert_index + i - 1 + edge.stitch_offset;
-        }
-
-        if (key == STITCH_NGON_SPLIT_EDGE_CENTER_VERT_TAG) {
-          if (i == 0) {
-            key = second_stitch_vert_index - 1 + edge.stitch_offset;
-          }
-          else if (i == edge.T) {
-            key = second_stitch_vert_index - 1 + edge.T;
-          }
-        }
-        else if (key < 0 && edge.top) { /* ngon spoke edge */
-          int s = edge_stitch_verts_map[edge.top->stitch_edge_key];
-          if (edge.stitch_top_offset >= 0) {
-            key = s - 1 + edge.stitch_top_offset;
-          }
-          else {
-            key = s - 1 + edge.top->stitch_edge_T + edge.stitch_top_offset;
-          }
-        }
-
-        /* Get real vert index. */
-        int vert = edge.get_vert_along_edge(i) + vert_offset;
-
-        /* Add to map */
-        if (params.mesh->vert_to_stitching_key_map.find(vert) ==
-            params.mesh->vert_to_stitching_key_map.end()) {
-          params.mesh->vert_to_stitching_key_map[vert] = key;
-          params.mesh->vert_stitching_map.insert({key, vert});
-        }
-      }
-    }
-  }
-
-  /* Dice; TODO(mai): Move this out of split. */
-  QuadDice dice(params);
-
-  int num_verts = num_alloced_verts;
-  int num_triangles = 0;
-
-  for (size_t i = 0; i < subpatches.size(); i++) {
-    subpatches[i].inner_grid_vert_offset = num_verts;
-    num_verts += subpatches[i].calc_num_inner_verts();
-    num_triangles += subpatches[i].calc_num_triangles();
-  }
-
-  dice.reserve(num_verts, num_triangles);
-
-  for (size_t i = 0; i < subpatches.size(); i++) {
-    Subpatch &sub = subpatches[i];
-
-    sub.edge_u0.T = max(sub.edge_u0.T, 1);
-    sub.edge_u1.T = max(sub.edge_u1.T, 1);
-    sub.edge_v0.T = max(sub.edge_v0.T, 1);
-    sub.edge_v1.T = max(sub.edge_v1.T, 1);
-
-    dice.dice(sub);
-  }
-
-  /* Cleanup */
-  subpatches.clear();
-  edges.clear();
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/subd/subd_split.h b/intern/cycles/subd/subd_split.h
deleted file mode 100644
index 7416b2fbbf8..00000000000
--- a/intern/cycles/subd/subd_split.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SUBD_SPLIT_H__
-#define __SUBD_SPLIT_H__
-
-/* DiagSplit: Parallel, Crack-free, Adaptive Tessellation for Micropolygon Rendering
- * Splits up patches and determines edge tessellation factors for dicing. Patch
- * evaluation at arbitrary points is required for this to work. See the paper
- * for more details. */
-
-#include "subd/subd_dice.h"
-#include "subd/subd_subpatch.h"
-
-#include "util/util_deque.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-#include <deque>
-
-CCL_NAMESPACE_BEGIN
-
-class Mesh;
-class Patch;
-
-class DiagSplit {
-  SubdParams params;
-
-  vector<Subpatch> subpatches;
-  /* `deque` is used so that element pointers remain valid when size is changed. */
-  deque<Edge> edges;
-
-  float3 to_world(Patch *patch, float2 uv);
-  int T(Patch *patch, float2 Pstart, float2 Pend, bool recursive_resolve = false);
-
-  void limit_edge_factor(int &T, Patch *patch, float2 Pstart, float2 Pend);
-  void resolve_edge_factors(Subpatch &sub);
-
-  void partition_edge(
-      Patch *patch, float2 *P, int *t0, int *t1, float2 Pstart, float2 Pend, int t);
-
-  void split(Subpatch &sub, int depth = 0);
-
-  int num_alloced_verts = 0;
-  int alloc_verts(int n); /* Returns start index of new verts. */
-
- public:
-  Edge *alloc_edge();
-
-  explicit DiagSplit(const SubdParams &params);
-
-  void split_patches(Patch *patches, size_t patches_byte_stride);
-
-  void split_quad(const Mesh::SubdFace &face, Patch *patch);
-  void split_ngon(const Mesh::SubdFace &face, Patch *patches, size_t patches_byte_stride);
-
-  void post_split();
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __SUBD_SPLIT_H__ */
diff --git a/intern/cycles/subd/subd_subpatch.h b/intern/cycles/subd/subd_subpatch.h
deleted file mode 100644
index cdaa310916a..00000000000
--- a/intern/cycles/subd/subd_subpatch.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __SUBD_SUBPATCH_H__
-#define __SUBD_SUBPATCH_H__
-
-#include "util/util_map.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Subpatch */
-
-class Subpatch {
- public:
-  class Patch *patch; /* Patch this is a subpatch of. */
-  int inner_grid_vert_offset;
-
-  struct edge_t {
-    int T;
-    int offset; /* Offset along main edge, interpretation depends on the two flags below. */
-
-    bool indices_decrease_along_edge;
-    bool sub_edges_created_in_reverse_order;
-
-    struct Edge *edge;
-
-    int get_vert_along_edge(int n) const;
-  };
-
-  /*
-   *            eu1
-   *     c01 --------- c11
-   *     |               |
-   * ev0 |               | ev1
-   *     |               |
-   *     c00 --------- c10
-   *            eu0
-   */
-
-  union {
-    float2 corners[4]; /* UV within patch, clockwise starting from uv (0, 0) towards (0, 1) etc. */
-    struct {
-      float2 c00, c01, c11, c10;
-    };
-  };
-
-  union {
-    edge_t
-        edges[4]; /* Edges of this subpatch, each edge starts at the corner of the same index. */
-    struct {
-      edge_t edge_v0, edge_u1, edge_v1, edge_u0;
-    };
-  };
-
-  explicit Subpatch(Patch *patch = nullptr)
-      : patch(patch),
-        c00(zero_float2()),
-        c01(make_float2(0.0f, 1.0f)),
-        c11(one_float2()),
-        c10(make_float2(1.0f, 0.0f))
-  {
-  }
-
-  Subpatch(Patch *patch, float2 c00, float2 c01, float2 c11, float2 c10)
-      : patch(patch), c00(c00), c01(c01), c11(c11), c10(c10)
-  {
-  }
-
-  int calc_num_inner_verts() const
-  {
-    int Mu = max(edge_u0.T, edge_u1.T);
-    int Mv = max(edge_v0.T, edge_v1.T);
-    Mu = max(Mu, 2);
-    Mv = max(Mv, 2);
-    return (Mu - 1) * (Mv - 1);
-  }
-
-  int calc_num_triangles() const
-  {
-    int Mu = max(edge_u0.T, edge_u1.T);
-    int Mv = max(edge_v0.T, edge_v1.T);
-    Mu = max(Mu, 2);
-    Mv = max(Mv, 2);
-
-    int inner_triangles = (Mu - 2) * (Mv - 2) * 2;
-    int edge_triangles = edge_u0.T + edge_u1.T + edge_v0.T + edge_v1.T + (Mu - 2) * 2 +
-                         (Mv - 2) * 2;
-
-    return inner_triangles + edge_triangles;
-  }
-
-  int get_vert_along_edge(int e, int n) const;
-
-  int get_vert_along_grid_edge(int edge, int n) const
-  {
-    int Mu = max(edge_u0.T, edge_u1.T);
-    int Mv = max(edge_v0.T, edge_v1.T);
-    Mu = max(Mu, 2);
-    Mv = max(Mv, 2);
-
-    switch (edge) {
-      case 0:
-        return inner_grid_vert_offset + n * (Mu - 1);
-      case 1:
-        return inner_grid_vert_offset + (Mu - 1) * (Mv - 2) + n;
-      case 2:
-        return inner_grid_vert_offset + ((Mu - 1) * (Mv - 1) - 1) - n * (Mu - 1);
-      case 3:
-        return inner_grid_vert_offset + (Mu - 2) - n;
-    }
-
-    return -1;
-  }
-};
-
-struct Edge {
-  /* Number of segments the edge will be diced into, see DiagSplit paper. */
-  int T;
-
-  /* top is edge adjacent to start, bottom is adjacent to end. */
-  Edge *top, *bottom;
-
-  int top_offset, bottom_offset;
-  bool top_indices_decrease, bottom_indices_decrease;
-
-  int start_vert_index;
-  int end_vert_index;
-
-  /* Index of the second vert from this edges corner along the edge towards the next corner. */
-  int second_vert_index;
-
-  /* Vertices on edge are to be stitched. */
-  bool is_stitch_edge;
-
-  /* Key to match this edge with others to be stitched with.
-   * The ints in the pair are ordered stitching indices */
-  pair<int, int> stitch_edge_key;
-
-  /* Full T along edge (may be larger than T for edges split from ngon edges) */
-  int stitch_edge_T;
-  int stitch_offset;
-  int stitch_top_offset;
-  int stitch_start_vert_index;
-  int stitch_end_vert_index;
-
-  Edge()
-      : T(0),
-        top(nullptr),
-        bottom(nullptr),
-        top_offset(-1),
-        bottom_offset(-1),
-        top_indices_decrease(false),
-        bottom_indices_decrease(false),
-        start_vert_index(-1),
-        end_vert_index(-1),
-        second_vert_index(-1),
-        is_stitch_edge(false),
-        stitch_edge_T(0),
-        stitch_offset(0)
-  {
-  }
-
-  int get_vert_along_edge(int n) const
-  {
-    assert(n >= 0 && n <= T);
-
-    if (n == 0) {
-      return start_vert_index;
-    }
-    else if (n == T) {
-      return end_vert_index;
-    }
-
-    return second_vert_index + n - 1;
-  }
-};
-
-inline int Subpatch::edge_t::get_vert_along_edge(int n) const
-{
-  assert(n >= 0 && n <= T);
-
-  if (!indices_decrease_along_edge && !sub_edges_created_in_reverse_order) {
-    n = offset + n;
-  }
-  else if (!indices_decrease_along_edge && sub_edges_created_in_reverse_order) {
-    n = edge->T - offset - T + n;
-  }
-  else if (indices_decrease_along_edge && !sub_edges_created_in_reverse_order) {
-    n = offset + T - n;
-  }
-  else if (indices_decrease_along_edge && sub_edges_created_in_reverse_order) {
-    n = edge->T - offset - n;
-  }
-
-  return edge->get_vert_along_edge(n);
-}
-
-inline int Subpatch::get_vert_along_edge(int edge, int n) const
-{
-  return edges[edge].get_vert_along_edge(n);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __SUBD_SUBPATCH_H__ */
diff --git a/intern/cycles/subd/subpatch.h b/intern/cycles/subd/subpatch.h
new file mode 100644
index 00000000000..0ba8ed88aa8
--- /dev/null
+++ b/intern/cycles/subd/subpatch.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_SUBPATCH_H__
+#define __SUBD_SUBPATCH_H__
+
+#include "util/map.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Subpatch */
+
+class Subpatch {
+ public:
+  class Patch *patch; /* Patch this is a subpatch of. */
+  int inner_grid_vert_offset;
+
+  struct edge_t {
+    int T;
+    int offset; /* Offset along main edge, interpretation depends on the two flags below. */
+
+    bool indices_decrease_along_edge;
+    bool sub_edges_created_in_reverse_order;
+
+    struct Edge *edge;
+
+    int get_vert_along_edge(int n) const;
+  };
+
+  /*
+   *            eu1
+   *     c01 --------- c11
+   *     |               |
+   * ev0 |               | ev1
+   *     |               |
+   *     c00 --------- c10
+   *            eu0
+   */
+
+  union {
+    float2 corners[4]; /* UV within patch, clockwise starting from uv (0, 0) towards (0, 1) etc. */
+    struct {
+      float2 c00, c01, c11, c10;
+    };
+  };
+
+  union {
+    edge_t
+        edges[4]; /* Edges of this subpatch, each edge starts at the corner of the same index. */
+    struct {
+      edge_t edge_v0, edge_u1, edge_v1, edge_u0;
+    };
+  };
+
+  explicit Subpatch(Patch *patch = nullptr)
+      : patch(patch),
+        c00(zero_float2()),
+        c01(make_float2(0.0f, 1.0f)),
+        c11(one_float2()),
+        c10(make_float2(1.0f, 0.0f))
+  {
+  }
+
+  Subpatch(Patch *patch, float2 c00, float2 c01, float2 c11, float2 c10)
+      : patch(patch), c00(c00), c01(c01), c11(c11), c10(c10)
+  {
+  }
+
+  int calc_num_inner_verts() const
+  {
+    int Mu = max(edge_u0.T, edge_u1.T);
+    int Mv = max(edge_v0.T, edge_v1.T);
+    Mu = max(Mu, 2);
+    Mv = max(Mv, 2);
+    return (Mu - 1) * (Mv - 1);
+  }
+
+  int calc_num_triangles() const
+  {
+    int Mu = max(edge_u0.T, edge_u1.T);
+    int Mv = max(edge_v0.T, edge_v1.T);
+    Mu = max(Mu, 2);
+    Mv = max(Mv, 2);
+
+    int inner_triangles = (Mu - 2) * (Mv - 2) * 2;
+    int edge_triangles = edge_u0.T + edge_u1.T + edge_v0.T + edge_v1.T + (Mu - 2) * 2 +
+                         (Mv - 2) * 2;
+
+    return inner_triangles + edge_triangles;
+  }
+
+  int get_vert_along_edge(int e, int n) const;
+
+  int get_vert_along_grid_edge(int edge, int n) const
+  {
+    int Mu = max(edge_u0.T, edge_u1.T);
+    int Mv = max(edge_v0.T, edge_v1.T);
+    Mu = max(Mu, 2);
+    Mv = max(Mv, 2);
+
+    switch (edge) {
+      case 0:
+        return inner_grid_vert_offset + n * (Mu - 1);
+      case 1:
+        return inner_grid_vert_offset + (Mu - 1) * (Mv - 2) + n;
+      case 2:
+        return inner_grid_vert_offset + ((Mu - 1) * (Mv - 1) - 1) - n * (Mu - 1);
+      case 3:
+        return inner_grid_vert_offset + (Mu - 2) - n;
+    }
+
+    return -1;
+  }
+};
+
+struct Edge {
+  /* Number of segments the edge will be diced into, see DiagSplit paper. */
+  int T;
+
+  /* top is edge adjacent to start, bottom is adjacent to end. */
+  Edge *top, *bottom;
+
+  int top_offset, bottom_offset;
+  bool top_indices_decrease, bottom_indices_decrease;
+
+  int start_vert_index;
+  int end_vert_index;
+
+  /* Index of the second vert from this edges corner along the edge towards the next corner. */
+  int second_vert_index;
+
+  /* Vertices on edge are to be stitched. */
+  bool is_stitch_edge;
+
+  /* Key to match this edge with others to be stitched with.
+   * The ints in the pair are ordered stitching indices */
+  pair<int, int> stitch_edge_key;
+
+  /* Full T along edge (may be larger than T for edges split from ngon edges) */
+  int stitch_edge_T;
+  int stitch_offset;
+  int stitch_top_offset;
+  int stitch_start_vert_index;
+  int stitch_end_vert_index;
+
+  Edge()
+      : T(0),
+        top(nullptr),
+        bottom(nullptr),
+        top_offset(-1),
+        bottom_offset(-1),
+        top_indices_decrease(false),
+        bottom_indices_decrease(false),
+        start_vert_index(-1),
+        end_vert_index(-1),
+        second_vert_index(-1),
+        is_stitch_edge(false),
+        stitch_edge_T(0),
+        stitch_offset(0)
+  {
+  }
+
+  int get_vert_along_edge(int n) const
+  {
+    assert(n >= 0 && n <= T);
+
+    if (n == 0) {
+      return start_vert_index;
+    }
+    else if (n == T) {
+      return end_vert_index;
+    }
+
+    return second_vert_index + n - 1;
+  }
+};
+
+inline int Subpatch::edge_t::get_vert_along_edge(int n) const
+{
+  assert(n >= 0 && n <= T);
+
+  if (!indices_decrease_along_edge && !sub_edges_created_in_reverse_order) {
+    n = offset + n;
+  }
+  else if (!indices_decrease_along_edge && sub_edges_created_in_reverse_order) {
+    n = edge->T - offset - T + n;
+  }
+  else if (indices_decrease_along_edge && !sub_edges_created_in_reverse_order) {
+    n = offset + T - n;
+  }
+  else if (indices_decrease_along_edge && sub_edges_created_in_reverse_order) {
+    n = edge->T - offset - n;
+  }
+
+  return edge->get_vert_along_edge(n);
+}
+
+inline int Subpatch::get_vert_along_edge(int edge, int n) const
+{
+  return edges[edge].get_vert_along_edge(n);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_SUBPATCH_H__ */
diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt
index efd9cecb877..86a830b2b65 100644
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -20,13 +20,7 @@ if(WITH_GTESTS)
 endif()
 
 set(INC
-  .
   ..
-  ../device
-  ../graph
-  ../kernel
-  ../scene
-  ../util
 )
 
 set(ALL_CYCLES_LIBRARIES
diff --git a/intern/cycles/test/integrator_adaptive_sampling_test.cpp b/intern/cycles/test/integrator_adaptive_sampling_test.cpp
index 3ed6a23125d..30688605e44 100644
--- a/intern/cycles/test/integrator_adaptive_sampling_test.cpp
+++ b/intern/cycles/test/integrator_adaptive_sampling_test.cpp
@@ -17,7 +17,7 @@
 #include "testing/testing.h"
 
 #include "integrator/adaptive_sampling.h"
-#include "util/util_vector.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/integrator_tile_test.cpp b/intern/cycles/test/integrator_tile_test.cpp
index 5bb57b48c3c..e5ffa7c153d 100644
--- a/intern/cycles/test/integrator_tile_test.cpp
+++ b/intern/cycles/test/integrator_tile_test.cpp
@@ -17,7 +17,7 @@
 #include "testing/testing.h"
 
 #include "integrator/tile.h"
-#include "util/util_math.h"
+#include "util/math.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 4a87a382130..4207b437a41 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -23,11 +23,11 @@
 #include "scene/shader_graph.h"
 #include "scene/shader_nodes.h"
 
-#include "util/util_array.h"
-#include "util/util_logging.h"
-#include "util/util_stats.h"
-#include "util/util_string.h"
-#include "util/util_vector.h"
+#include "util/array.h"
+#include "util/log.h"
+#include "util/stats.h"
+#include "util/string.h"
+#include "util/vector.h"
 
 using testing::_;
 using testing::AnyNumber;
diff --git a/intern/cycles/test/util_aligned_malloc_test.cpp b/intern/cycles/test/util_aligned_malloc_test.cpp
index 8829c422a0f..2748db520eb 100644
--- a/intern/cycles/test/util_aligned_malloc_test.cpp
+++ b/intern/cycles/test/util_aligned_malloc_test.cpp
@@ -16,7 +16,7 @@
 
 #include "testing/testing.h"
 
-#include "util/util_aligned_malloc.h"
+#include "util/aligned_malloc.h"
 
 #define CHECK_ALIGNMENT(ptr, align) EXPECT_EQ((size_t)ptr % align, 0)
 
diff --git a/intern/cycles/test/util_avxf_test.h b/intern/cycles/test/util_avxf_test.h
index 64825200c9e..b178a0450d0 100644
--- a/intern/cycles/test/util_avxf_test.h
+++ b/intern/cycles/test/util_avxf_test.h
@@ -15,8 +15,8 @@
  */
 
 #include "testing/testing.h"
-#include "util/util_system.h"
-#include "util/util_types.h"
+#include "util/system.h"
+#include "util/types.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/util_math_test.cpp b/intern/cycles/test/util_math_test.cpp
index b6ce3ef0cf3..adbedf7adbe 100644
--- a/intern/cycles/test/util_math_test.cpp
+++ b/intern/cycles/test/util_math_test.cpp
@@ -16,7 +16,7 @@
 
 #include "testing/testing.h"
 
-#include "util/util_math.h"
+#include "util/math.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/util_path_test.cpp b/intern/cycles/test/util_path_test.cpp
index 76d48dc241d..7afdd1150a4 100644
--- a/intern/cycles/test/util_path_test.cpp
+++ b/intern/cycles/test/util_path_test.cpp
@@ -16,7 +16,7 @@
 
 #include "testing/testing.h"
 
-#include "util/util_path.h"
+#include "util/path.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/util_string_test.cpp b/intern/cycles/test/util_string_test.cpp
index c9022d1b132..f558dda9e47 100644
--- a/intern/cycles/test/util_string_test.cpp
+++ b/intern/cycles/test/util_string_test.cpp
@@ -16,7 +16,7 @@
 
 #include "testing/testing.h"
 
-#include "util/util_string.h"
+#include "util/string.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/util_task_test.cpp b/intern/cycles/test/util_task_test.cpp
index a8b4dfc3a37..17cfe4ff9b2 100644
--- a/intern/cycles/test/util_task_test.cpp
+++ b/intern/cycles/test/util_task_test.cpp
@@ -16,7 +16,7 @@
 
 #include "testing/testing.h"
 
-#include "util/util_task.h"
+#include "util/task.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/util_time_test.cpp b/intern/cycles/test/util_time_test.cpp
index ab5ead2c7b1..97a0134df67 100644
--- a/intern/cycles/test/util_time_test.cpp
+++ b/intern/cycles/test/util_time_test.cpp
@@ -16,7 +16,7 @@
 
 #include "testing/testing.h"
 
-#include "util/util_time.h"
+#include "util/time.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/test/util_transform_test.cpp b/intern/cycles/test/util_transform_test.cpp
index a5267df9fb7..11dd71ea0c2 100644
--- a/intern/cycles/test/util_transform_test.cpp
+++ b/intern/cycles/test/util_transform_test.cpp
@@ -16,8 +16,8 @@
 
 #include "testing/testing.h"
 
-#include "util/util_transform.h"
-#include "util/util_vector.h"
+#include "util/transform.h"
+#include "util/vector.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 18e14913884..b68646a44d5 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -22,23 +22,23 @@ set(INC_SYS
 )
 
 set(SRC
-  util_aligned_malloc.cpp
-  util_debug.cpp
-  util_ies.cpp
-  util_logging.cpp
-  util_math_cdf.cpp
-  util_md5.cpp
-  util_murmurhash.cpp
-  util_path.cpp
-  util_profiling.cpp
-  util_string.cpp
-  util_simd.cpp
-  util_system.cpp
-  util_task.cpp
-  util_thread.cpp
-  util_time.cpp
-  util_transform.cpp
-  util_windows.cpp
+  aligned_malloc.cpp
+  debug.cpp
+  ies.cpp
+  log.cpp
+  math_cdf.cpp
+  md5.cpp
+  murmurhash.cpp
+  path.cpp
+  profiling.cpp
+  string.cpp
+  simd.cpp
+  system.cpp
+  task.cpp
+  thread.cpp
+  time.cpp
+  transform.cpp
+  windows.cpp
 )
 
 set(LIB
@@ -48,7 +48,7 @@ set(LIB
 if(WITH_CYCLES_STANDALONE)
   if(WITH_CYCLES_STANDALONE_GUI)
     list(APPEND SRC
-      util_view.cpp
+      view.cpp
     )
   endif()
 endif()
@@ -64,108 +64,108 @@ else()
 endif()
 
 set(SRC_HEADERS
-  util_algorithm.h
-  util_aligned_malloc.h
-  util_args.h
-  util_array.h
-  util_atomic.h
-  util_boundbox.h
-  util_debug.h
-  util_defines.h
-  util_deque.h
-  util_disjoint_set.h
-  util_guarded_allocator.cpp
-  util_foreach.h
-  util_function.h
-  util_guarded_allocator.h
-  util_half.h
-  util_hash.h
-  util_ies.h
-  util_image.h
-  util_image_impl.h
-  util_list.h
-  util_logging.h
-  util_map.h
-  util_math.h
-  util_math_cdf.h
-  util_math_fast.h
-  util_math_intersect.h
-  util_math_float2.h
-  util_math_float3.h
-  util_math_float4.h
-  util_math_int2.h
-  util_math_int3.h
-  util_math_int4.h
-  util_math_matrix.h
-  util_md5.h
-  util_murmurhash.h
-  util_openimagedenoise.h
-  util_opengl.h
-  util_openvdb.h
-  util_optimization.h
-  util_param.h
-  util_path.h
-  util_profiling.h
-  util_progress.h
-  util_projection.h
-  util_queue.h
-  util_rect.h
-  util_set.h
-  util_simd.h
-  util_avxf.h
-  util_avxb.h
-  util_avxi.h
-  util_semaphore.h
-  util_sseb.h
-  util_ssef.h
-  util_ssei.h
-  util_stack_allocator.h
-  util_static_assert.h
-  util_stats.h
-  util_string.h
-  util_system.h
-  util_task.h
-  util_tbb.h
-  util_texture.h
-  util_thread.h
-  util_time.h
-  util_transform.h
-  util_types.h
-  util_types_float2.h
-  util_types_float2_impl.h
-  util_types_float3.h
-  util_types_float3_impl.h
-  util_types_float4.h
-  util_types_float4_impl.h
-  util_types_float8.h
-  util_types_float8_impl.h
-  util_types_int2.h
-  util_types_int2_impl.h
-  util_types_int3.h
-  util_types_int3_impl.h
-  util_types_int4.h
-  util_types_int4_impl.h
-  util_types_uchar2.h
-  util_types_uchar2_impl.h
-  util_types_uchar3.h
-  util_types_uchar3_impl.h
-  util_types_uchar4.h
-  util_types_uchar4_impl.h
-  util_types_uint2.h
-  util_types_uint2_impl.h
-  util_types_uint3.h
-  util_types_uint3_impl.h
-  util_types_uint4.h
-  util_types_uint4_impl.h
-  util_types_ushort4.h
-  util_types_vector3.h
-  util_types_vector3_impl.h
-  util_unique_ptr.h
-  util_vector.h
-  util_version.h
-  util_view.h
-  util_windows.h
-  util_xml.h
+  algorithm.h
+  aligned_malloc.h
+  args.h
+  array.h
+  atomic.h
+  boundbox.h
+  debug.h
+  defines.h
+  deque.h
+  disjoint_set.h
+  guarded_allocator.cpp
+  foreach.h
+  function.h
+  guarded_allocator.h
+  half.h
+  hash.h
+  ies.h
+  image.h
+  image_impl.h
+  list.h
+  log.h
+  map.h
+  math.h
+  math_cdf.h
+  math_fast.h
+  math_intersect.h
+  math_float2.h
+  math_float3.h
+  math_float4.h
+  math_int2.h
+  math_int3.h
+  math_int4.h
+  math_matrix.h
+  md5.h
+  murmurhash.h
+  openimagedenoise.h
+  opengl.h
+  openvdb.h
+  optimization.h
+  param.h
+  path.h
+  profiling.h
+  progress.h
+  projection.h
+  queue.h
+  rect.h
+  set.h
+  simd.h
+  avxf.h
+  avxb.h
+  avxi.h
+  semaphore.h
+  sseb.h
+  ssef.h
+  ssei.h
+  stack_allocator.h
+  static_assert.h
+  stats.h
+  string.h
+  system.h
+  task.h
+  tbb.h
+  texture.h
+  thread.h
+  time.h
+  transform.h
+  types.h
+  types_float2.h
+  types_float2_impl.h
+  types_float3.h
+  types_float3_impl.h
+  types_float4.h
+  types_float4_impl.h
+  types_float8.h
+  types_float8_impl.h
+  types_int2.h
+  types_int2_impl.h
+  types_int3.h
+  types_int3_impl.h
+  types_int4.h
+  types_int4_impl.h
+  types_uchar2.h
+  types_uchar2_impl.h
+  types_uchar3.h
+  types_uchar3_impl.h
+  types_uchar4.h
+  types_uchar4_impl.h
+  types_uint2.h
+  types_uint2_impl.h
+  types_uint3.h
+  types_uint3_impl.h
+  types_uint4.h
+  types_uint4_impl.h
+  types_ushort4.h
+  types_vector3.h
+  types_vector3_impl.h
+  unique_ptr.h
+  vector.h
+  version.h
+  view.h
+  windows.h
+  xml.h
 )
 
 include_directories(${INC})
diff --git a/intern/cycles/util/algorithm.h b/intern/cycles/util/algorithm.h
new file mode 100644
index 00000000000..63abd4e92a3
--- /dev/null
+++ b/intern/cycles/util/algorithm.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ALGORITHM_H__
+#define __UTIL_ALGORITHM_H__
+
+#include <algorithm>
+
+CCL_NAMESPACE_BEGIN
+
+using std::max;
+using std::min;
+using std::remove;
+using std::sort;
+using std::stable_sort;
+using std::swap;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ALGORITHM_H__ */
diff --git a/intern/cycles/util/aligned_malloc.cpp b/intern/cycles/util/aligned_malloc.cpp
new file mode 100644
index 00000000000..2b05559b55f
--- /dev/null
+++ b/intern/cycles/util/aligned_malloc.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/aligned_malloc.h"
+#include "util/guarded_allocator.h"
+
+#include <cassert>
+
+/* Adopted from Libmv. */
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
+/* Needed for memalign on Linux and _aligned_alloc on Windows. */
+#  ifdef FREE_WINDOWS
+/* Make sure _aligned_malloc is included. */
+#    ifdef __MSVCRT_VERSION__
+#      undef __MSVCRT_VERSION__
+#    endif
+#    define __MSVCRT_VERSION__ 0x0700
+#  endif /* FREE_WINDOWS */
+#  include <malloc.h>
+#else
+/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include
+ * stdilb instead.
+ */
+#  include <cstdlib>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+void *util_aligned_malloc(size_t size, int alignment)
+{
+#ifdef WITH_BLENDER_GUARDEDALLOC
+  return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc");
+#elif defined(_WIN32)
+  return _aligned_malloc(size, alignment);
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__)
+  void *result;
+  if (posix_memalign(&result, alignment, size)) {
+    /* Non-zero means allocation error
+     * either no allocation or bad alignment value.
+     */
+    return NULL;
+  }
+  return result;
+#else /* This is for Linux. */
+  return memalign(alignment, size);
+#endif
+}
+
+void util_aligned_free(void *ptr)
+{
+#if defined(WITH_BLENDER_GUARDEDALLOC)
+  if (ptr != NULL) {
+    MEM_freeN(ptr);
+  }
+#elif defined(_WIN32)
+  _aligned_free(ptr);
+#else
+  free(ptr);
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/aligned_malloc.h b/intern/cycles/util/aligned_malloc.h
new file mode 100644
index 00000000000..66c2ac1c593
--- /dev/null
+++ b/intern/cycles/util/aligned_malloc.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ALIGNED_MALLOC_H__
+#define __UTIL_ALIGNED_MALLOC_H__
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Minimum alignment needed by all CPU native data types (SSE, AVX). */
+#define MIN_ALIGNMENT_CPU_DATA_TYPES 16
+
+/* Allocate block of size bytes at least aligned to a given value. */
+void *util_aligned_malloc(size_t size, int alignment);
+
+/* Free memory allocated by util_aligned_malloc. */
+void util_aligned_free(void *ptr);
+
+/* Aligned new operator. */
+template<typename T, typename... Args> T *util_aligned_new(Args... args)
+{
+  void *mem = util_aligned_malloc(sizeof(T), alignof(T));
+  return new (mem) T(args...);
+}
+
+template<typename T> void util_aligned_delete(T *t)
+{
+  if (t) {
+    t->~T();
+    util_aligned_free(t);
+  }
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ALIGNED_MALLOC_H__ */
diff --git a/intern/cycles/util/args.h b/intern/cycles/util/args.h
new file mode 100644
index 00000000000..be6f2c2b9f1
--- /dev/null
+++ b/intern/cycles/util/args.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ARGS_H__
+#define __UTIL_ARGS_H__
+
+/* Argument Parsing for command line, we use the OpenImageIO
+ * library because it has nice functions to do this. */
+
+#include <OpenImageIO/argparse.h>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ARGS_H__ */
diff --git a/intern/cycles/util/array.h b/intern/cycles/util/array.h
new file mode 100644
index 00000000000..4c905b09138
--- /dev/null
+++ b/intern/cycles/util/array.h
@@ -0,0 +1,318 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ARRAY_H__
+#define __UTIL_ARRAY_H__
+
+#include <cassert>
+#include <cstring>
+
+#include "util/aligned_malloc.h"
+#include "util/guarded_allocator.h"
+#include "util/types.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Simplified version of vector, serving multiple purposes:
+ * - somewhat faster in that it does not clear memory on resize/alloc,
+ *   this was actually showing up in profiles quite significantly. it
+ *   also does not run any constructors/destructors
+ * - if this is used, we are not tempted to use inefficient operations
+ * - aligned allocation for CPU native data types */
+
+template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class array {
+ public:
+  array() : data_(NULL), datasize_(0), capacity_(0)
+  {
+  }
+
+  explicit array(size_t newsize)
+  {
+    if (newsize == 0) {
+      data_ = NULL;
+      datasize_ = 0;
+      capacity_ = 0;
+    }
+    else {
+      data_ = mem_allocate(newsize);
+      datasize_ = newsize;
+      capacity_ = datasize_;
+    }
+  }
+
+  array(const array &from)
+  {
+    if (from.datasize_ == 0) {
+      data_ = NULL;
+      datasize_ = 0;
+      capacity_ = 0;
+    }
+    else {
+      data_ = mem_allocate(from.datasize_);
+      if (from.datasize_ > 0) {
+        memcpy(data_, from.data_, from.datasize_ * sizeof(T));
+      }
+      datasize_ = from.datasize_;
+      capacity_ = datasize_;
+    }
+  }
+
+  array &operator=(const array &from)
+  {
+    if (this != &from) {
+      resize(from.size());
+      if (datasize_ > 0) {
+        memcpy((void *)data_, from.data_, datasize_ * sizeof(T));
+      }
+    }
+
+    return *this;
+  }
+
+  array &operator=(const vector<T> &from)
+  {
+    resize(from.size());
+
+    if (from.size() > 0 && datasize_ > 0) {
+      memcpy(data_, &from[0], datasize_ * sizeof(T));
+    }
+
+    return *this;
+  }
+
+  ~array()
+  {
+    mem_free(data_, capacity_);
+  }
+
+  bool operator==(const array<T> &other) const
+  {
+    if (datasize_ != other.datasize_) {
+      return false;
+    }
+    if (datasize_ == 0) {
+      return true;
+    }
+
+    return memcmp(data_, other.data_, datasize_ * sizeof(T)) == 0;
+  }
+
+  bool operator!=(const array<T> &other) const
+  {
+    return !(*this == other);
+  }
+
+  void steal_data(array &from)
+  {
+    if (this != &from) {
+      clear();
+
+      data_ = from.data_;
+      datasize_ = from.datasize_;
+      capacity_ = from.capacity_;
+
+      from.data_ = NULL;
+      from.datasize_ = 0;
+      from.capacity_ = 0;
+    }
+  }
+
+  void set_data(T *ptr_, size_t datasize)
+  {
+    clear();
+    data_ = ptr_;
+    datasize_ = datasize;
+    capacity_ = datasize;
+  }
+
+  T *steal_pointer()
+  {
+    T *ptr = data_;
+    data_ = NULL;
+    clear();
+    return ptr;
+  }
+
+  T *resize(size_t newsize)
+  {
+    if (newsize == 0) {
+      clear();
+    }
+    else if (newsize != datasize_) {
+      if (newsize > capacity_) {
+        T *newdata = mem_allocate(newsize);
+        if (newdata == NULL) {
+          /* Allocation failed, likely out of memory. */
+          clear();
+          return NULL;
+        }
+        else if (data_ != NULL) {
+          memcpy(
+              (void *)newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize) * sizeof(T));
+          mem_free(data_, capacity_);
+        }
+        data_ = newdata;
+        capacity_ = newsize;
+      }
+      datasize_ = newsize;
+    }
+    return data_;
+  }
+
+  T *resize(size_t newsize, const T &value)
+  {
+    size_t oldsize = size();
+    resize(newsize);
+
+    for (size_t i = oldsize; i < size(); i++) {
+      data_[i] = value;
+    }
+
+    return data_;
+  }
+
+  void clear()
+  {
+    if (data_ != NULL) {
+      mem_free(data_, capacity_);
+      data_ = NULL;
+    }
+    datasize_ = 0;
+    capacity_ = 0;
+  }
+
+  size_t empty() const
+  {
+    return datasize_ == 0;
+  }
+
+  size_t size() const
+  {
+    return datasize_;
+  }
+
+  T *data()
+  {
+    return data_;
+  }
+
+  const T *data() const
+  {
+    return data_;
+  }
+
+  T &operator[](size_t i) const
+  {
+    assert(i < datasize_);
+    return data_[i];
+  }
+
+  T *begin()
+  {
+    return data_;
+  }
+
+  const T *begin() const
+  {
+    return data_;
+  }
+
+  T *end()
+  {
+    return data_ + datasize_;
+  }
+
+  const T *end() const
+  {
+    return data_ + datasize_;
+  }
+
+  void reserve(size_t newcapacity)
+  {
+    if (newcapacity > capacity_) {
+      T *newdata = mem_allocate(newcapacity);
+      if (data_ != NULL) {
+        memcpy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity) * sizeof(T));
+        mem_free(data_, capacity_);
+      }
+      data_ = newdata;
+      capacity_ = newcapacity;
+    }
+  }
+
+  size_t capacity() const
+  {
+    return capacity_;
+  }
+
+  // do not use this method unless you are sure the code is not performance critical
+  void push_back_slow(const T &t)
+  {
+    if (capacity_ == datasize_) {
+      reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2));
+    }
+
+    data_[datasize_++] = t;
+  }
+
+  void push_back_reserved(const T &t)
+  {
+    assert(datasize_ < capacity_);
+    push_back_slow(t);
+  }
+
+  void append(const array<T> &from)
+  {
+    if (from.size()) {
+      size_t old_size = size();
+      resize(old_size + from.size());
+      memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
+    }
+  }
+
+ protected:
+  inline T *mem_allocate(size_t N)
+  {
+    if (N == 0) {
+      return NULL;
+    }
+    T *mem = (T *)util_aligned_malloc(sizeof(T) * N, alignment);
+    if (mem != NULL) {
+      util_guarded_mem_alloc(sizeof(T) * N);
+    }
+    else {
+      throw std::bad_alloc();
+    }
+    return mem;
+  }
+
+  inline void mem_free(T *mem, size_t N)
+  {
+    if (mem != NULL) {
+      util_guarded_mem_free(sizeof(T) * N);
+      util_aligned_free(mem);
+    }
+  }
+
+  T *data_;
+  size_t datasize_;
+  size_t capacity_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ARRAY_H__ */
diff --git a/intern/cycles/util/atomic.h b/intern/cycles/util/atomic.h
new file mode 100644
index 00000000000..faba411c769
--- /dev/null
+++ b/intern/cycles/util/atomic.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ATOMIC_H__
+#define __UTIL_ATOMIC_H__
+
+#ifndef __KERNEL_GPU__
+
+/* Using atomic ops header from Blender. */
+#  include "atomic_ops.h"
+
+#  define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
+#  define atomic_compare_and_swap_float(p, old_val, new_val) \
+    atomic_cas_float((p), (old_val), (new_val))
+
+#  define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
+#  define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
+
+#  define CCL_LOCAL_MEM_FENCE 0
+#  define ccl_barrier(flags) ((void)0)
+
+#else /* __KERNEL_GPU__ */
+
+#  if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+
+#    define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
+
+#    define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x))
+#    define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
+#    define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
+#    define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+#    define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
+
+ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
+                                                      const float old_val,
+                                                      const float new_val)
+{
+  union {
+    unsigned int int_value;
+    float float_value;
+  } new_value, prev_value, result;
+  prev_value.float_value = old_val;
+  new_value.float_value = new_val;
+  result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value);
+  return result.float_value;
+}
+
+#    define CCL_LOCAL_MEM_FENCE
+#    define ccl_barrier(flags) __syncthreads()
+
+#  endif /* __KERNEL_CUDA__ */
+
+#endif /* __KERNEL_GPU__ */
+
+#endif /* __UTIL_ATOMIC_H__ */
diff --git a/intern/cycles/util/avxb.h b/intern/cycles/util/avxb.h
new file mode 100644
index 00000000000..15215d04ca3
--- /dev/null
+++ b/intern/cycles/util/avxb.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_AVXB_H__
+#define __UTIL_AVXB_H__
+
+CCL_NAMESPACE_BEGIN
+
+struct avxf;
+
+/*! 4-wide SSE bool type. */
+struct avxb {
+  typedef avxb Mask;   // mask type
+  typedef avxf Float;  // float type
+
+  enum { size = 8 };  // number of SIMD elements
+  union {
+    __m256 m256;
+    int32_t v[8];
+  };  // data
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constructors, Assignment & Cast Operators
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline avxb()
+  {
+  }
+  __forceinline avxb(const avxb &other)
+  {
+    m256 = other.m256;
+  }
+  __forceinline avxb &operator=(const avxb &other)
+  {
+    m256 = other.m256;
+    return *this;
+  }
+
+  __forceinline avxb(const __m256 input) : m256(input)
+  {
+  }
+  __forceinline avxb(const __m128 &a, const __m128 &b)
+      : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1))
+  {
+  }
+  __forceinline operator const __m256 &(void) const
+  {
+    return m256;
+  }
+  __forceinline operator const __m256i(void) const
+  {
+    return _mm256_castps_si256(m256);
+  }
+  __forceinline operator const __m256d(void) const
+  {
+    return _mm256_castps_pd(m256);
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constants
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps())
+  {
+  }
+  __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1)))
+  {
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Array Access
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline bool operator[](const size_t i) const
+  {
+    assert(i < 8);
+    return (_mm256_movemask_ps(m256) >> i) & 1;
+  }
+  __forceinline int32_t &operator[](const size_t i)
+  {
+    assert(i < 8);
+    return v[i];
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator!(const avxb &a)
+{
+  return _mm256_xor_ps(a, avxb(True));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator&(const avxb &a, const avxb &b)
+{
+  return _mm256_and_ps(a, b);
+}
+__forceinline const avxb operator|(const avxb &a, const avxb &b)
+{
+  return _mm256_or_ps(a, b);
+}
+__forceinline const avxb operator^(const avxb &a, const avxb &b)
+{
+  return _mm256_xor_ps(a, b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator&=(avxb &a, const avxb &b)
+{
+  return a = a & b;
+}
+__forceinline const avxb operator|=(avxb &a, const avxb &b)
+{
+  return a = a | b;
+}
+__forceinline const avxb operator^=(avxb &a, const avxb &b)
+{
+  return a = a ^ b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator!=(const avxb &a, const avxb &b)
+{
+  return _mm256_xor_ps(a, b);
+}
+__forceinline const avxb operator==(const avxb &a, const avxb &b)
+{
+#ifdef __KERNEL_AVX2__
+  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+  __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+  __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+  __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+  __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+  __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+  __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+  __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+  return _mm256_castsi256_ps(result);
+#endif
+}
+
+__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
+{
+#if defined(__KERNEL_SSE41__)
+  return _mm256_blendv_ps(f, t, m);
+#else
+  return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
+{
+  return _mm256_unpacklo_ps(a, b);
+}
+__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
+{
+  return _mm256_unpackhi_ps(a, b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reduction Operations
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_SSE41__)
+__forceinline uint32_t popcnt(const avxb &a)
+{
+  return _mm_popcnt_u32(_mm256_movemask_ps(a));
+}
+#else
+__forceinline uint32_t popcnt(const avxb &a)
+{
+  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) +
+         bool(a[7]);
+}
+#endif
+
+__forceinline bool reduce_and(const avxb &a)
+{
+  return _mm256_movemask_ps(a) == 0xf;
+}
+__forceinline bool reduce_or(const avxb &a)
+{
+  return _mm256_movemask_ps(a) != 0x0;
+}
+__forceinline bool all(const avxb &b)
+{
+  return _mm256_movemask_ps(b) == 0xf;
+}
+__forceinline bool any(const avxb &b)
+{
+  return _mm256_movemask_ps(b) != 0x0;
+}
+__forceinline bool none(const avxb &b)
+{
+  return _mm256_movemask_ps(b) == 0x0;
+}
+
+__forceinline uint32_t movemask(const avxb &a)
+{
+  return _mm256_movemask_ps(a);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_avxb(const char *label, const avxb &a)
+{
+  printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/avxf.h b/intern/cycles/util/avxf.h
new file mode 100644
index 00000000000..1fb3ded422f
--- /dev/null
+++ b/intern/cycles/util/avxf.h
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_AVXF_H__
+#define __UTIL_AVXF_H__
+
+CCL_NAMESPACE_BEGIN
+
+struct avxb;
+
+struct avxf {
+  typedef avxf Float;
+
+  enum { size = 8 }; /* Number of SIMD elements. */
+
+  union {
+    __m256 m256;
+    float f[8];
+    int i[8];
+  };
+
+  __forceinline avxf()
+  {
+  }
+  __forceinline avxf(const avxf &other)
+  {
+    m256 = other.m256;
+  }
+  __forceinline avxf &operator=(const avxf &other)
+  {
+    m256 = other.m256;
+    return *this;
+  }
+
+  __forceinline avxf(const __m256 a) : m256(a)
+  {
+  }
+  __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps(a))
+  {
+  }
+
+  __forceinline operator const __m256 &() const
+  {
+    return m256;
+  }
+  __forceinline operator __m256 &()
+  {
+    return m256;
+  }
+
+  __forceinline avxf(float a) : m256(_mm256_set1_ps(a))
+  {
+  }
+
+  __forceinline avxf(float high32x4, float low32x4)
+      : m256(_mm256_set_ps(
+            high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4))
+  {
+  }
+
+  __forceinline avxf(float a3, float a2, float a1, float a0)
+      : m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0))
+  {
+  }
+
+  __forceinline avxf(
+      float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0)
+      : m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0))
+  {
+  }
+
+  __forceinline avxf(float3 a) : m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x))
+  {
+  }
+
+  __forceinline avxf(int a3, int a2, int a1, int a0)
+  {
+    const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0);
+    m256 = _mm256_castsi256_ps(foo);
+  }
+
+  __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0)
+  {
+    const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0);
+    m256 = _mm256_castsi256_ps(foo);
+  }
+
+  __forceinline avxf(__m128 a, __m128 b)
+  {
+    const __m256 foo = _mm256_castps128_ps256(a);
+    m256 = _mm256_insertf128_ps(foo, b, 1);
+  }
+
+  __forceinline const float &operator[](const size_t i) const
+  {
+    assert(i < 8);
+    return f[i];
+  }
+  __forceinline float &operator[](const size_t i)
+  {
+    assert(i < 8);
+    return f[i];
+  }
+};
+
+__forceinline avxf cross(const avxf &a, const avxf &b)
+{
+  avxf r(0.0,
+         a[4] * b[5] - a[5] * b[4],
+         a[6] * b[4] - a[4] * b[6],
+         a[5] * b[6] - a[6] * b[5],
+         0.0,
+         a[0] * b[1] - a[1] * b[0],
+         a[2] * b[0] - a[0] * b[2],
+         a[1] * b[2] - a[2] * b[1]);
+  return r;
+}
+
+__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
+{
+  const avxf t = _mm256_mul_ps(a.m256, b.m256);
+  den = ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
+  den2 = ((float *)&t)[4] + ((float *)&t)[5] + ((float *)&t)[6];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxf cast(const __m256i &a)
+{
+  return _mm256_castsi256_ps(a);
+}
+
+__forceinline const avxf mm256_sqrt(const avxf &a)
+{
+  return _mm256_sqrt_ps(a.m256);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxf operator+(const avxf &a, const avxf &b)
+{
+  return _mm256_add_ps(a.m256, b.m256);
+}
+__forceinline const avxf operator+(const avxf &a, const float &b)
+{
+  return a + avxf(b);
+}
+__forceinline const avxf operator+(const float &a, const avxf &b)
+{
+  return avxf(a) + b;
+}
+
+__forceinline const avxf operator-(const avxf &a, const avxf &b)
+{
+  return _mm256_sub_ps(a.m256, b.m256);
+}
+__forceinline const avxf operator-(const avxf &a, const float &b)
+{
+  return a - avxf(b);
+}
+__forceinline const avxf operator-(const float &a, const avxf &b)
+{
+  return avxf(a) - b;
+}
+
+__forceinline const avxf operator*(const avxf &a, const avxf &b)
+{
+  return _mm256_mul_ps(a.m256, b.m256);
+}
+__forceinline const avxf operator*(const avxf &a, const float &b)
+{
+  return a * avxf(b);
+}
+__forceinline const avxf operator*(const float &a, const avxf &b)
+{
+  return avxf(a) * b;
+}
+
+__forceinline const avxf operator/(const avxf &a, const avxf &b)
+{
+  return _mm256_div_ps(a.m256, b.m256);
+}
+__forceinline const avxf operator/(const avxf &a, const float &b)
+{
+  return a / avxf(b);
+}
+__forceinline const avxf operator/(const float &a, const avxf &b)
+{
+  return avxf(a) / b;
+}
+
+__forceinline const avxf operator|(const avxf &a, const avxf &b)
+{
+  return _mm256_or_ps(a.m256, b.m256);
+}
+
+__forceinline const avxf operator^(const avxf &a, const avxf &b)
+{
+  return _mm256_xor_ps(a.m256, b.m256);
+}
+
+__forceinline const avxf operator&(const avxf &a, const avxf &b)
+{
+  return _mm256_and_ps(a.m256, b.m256);
+}
+
+__forceinline const avxf max(const avxf &a, const avxf &b)
+{
+  return _mm256_max_ps(a.m256, b.m256);
+}
+__forceinline const avxf min(const avxf &a, const avxf &b)
+{
+  return _mm256_min_ps(a.m256, b.m256);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxf shuffle(const avxf &a, const __m256i &shuf)
+{
+  return _mm256_permutevar_ps(a, shuf);
+}
+
+template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
+__forceinline const avxf shuffle(const avxf &a)
+{
+  return _mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const avxf shuffle(const avxf &a, const avxf &b)
+{
+  return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
+}
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const avxf shuffle(const avxf &a)
+{
+  return shuffle<i0, i1, i2, i3>(a, a);
+}
+template<size_t i0> __forceinline const avxf shuffle(const avxf &a, const avxf &b)
+{
+  return shuffle<i0, i0, i0, i0>(a, b);
+}
+template<size_t i0> __forceinline const avxf shuffle(const avxf &a)
+{
+  return shuffle<i0>(a, a);
+}
+
+template<size_t i> __forceinline float extract(const avxf &a)
+{
+  __m256 b = shuffle<i, i, i, i>(a).m256;
+  return _mm256_cvtss_f32(b);
+}
+template<> __forceinline float extract<0>(const avxf &a)
+{
+  return _mm256_cvtss_f32(a.m256);
+}
+
+__forceinline ssef low(const avxf &a)
+{
+  return _mm256_extractf128_ps(a.m256, 0);
+}
+__forceinline ssef high(const avxf &a)
+{
+  return _mm256_extractf128_ps(a.m256, 1);
+}
+
+template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
+__forceinline const avxf permute(const avxf &a)
+{
+#ifdef __KERNEL_AVX2__
+  return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
+#else
+  float temp[8];
+  _mm256_storeu_ps((float *)&temp, a);
+  return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
+#endif
+}
+
+template<int S0, int S1, int S2, int S3, int S4, int S5, int S6, int S7>
+ccl_device_inline const avxf set_sign_bit(const avxf &a)
+{
+  return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31, S2 << 31, S1 << 31, S0 << 31);
+}
+
+template<size_t S0, size_t S1, size_t S2, size_t S3, size_t S4, size_t S5, size_t S6, size_t S7>
+ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
+{
+  return _mm256_blend_ps(
+      a, b, S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7);
+}
+
+template<size_t S0, size_t S1, size_t S2, size_t S3>
+ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
+{
+  return blend<S0, S1, S2, S3, S0, S1, S2, S3>(a, b);
+}
+
+//#if defined(__KERNEL_SSE41__)
+__forceinline avxf maxi(const avxf &a, const avxf &b)
+{
+  const avxf ci = _mm256_max_ps(a, b);
+  return ci;
+}
+
+__forceinline avxf mini(const avxf &a, const avxf &b)
+{
+  const avxf ci = _mm256_min_ps(a, b);
+  return ci;
+}
+//#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Ternary Operators
+////////////////////////////////////////////////////////////////////////////////
+__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
+{
+#ifdef __KERNEL_AVX2__
+  return _mm256_fmadd_ps(a, b, c);
+#else
+  return c + (a * b);
+#endif
+}
+
+__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
+{
+#ifdef __KERNEL_AVX2__
+  return _mm256_fnmadd_ps(a, b, c);
+#else
+  return c - (a * b);
+#endif
+}
+__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
+{
+#ifdef __KERNEL_AVX2__
+  return _mm256_fmsub_ps(a, b, c);
+#else
+  return (a * b) - c;
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+__forceinline const avxb operator<=(const avxf &a, const avxf &b)
+{
+  return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
+}
+
+__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f)
+{
+  return _mm256_blendv_ps(f, t, m);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Common Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t)
+{
+  return madd(t, b, (avxf(1.0f) - t) * a);
+}
+
+#ifndef _mm256_set_m128
+#  define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
+    _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
+#endif
+
+#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \
+  _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/avxi.h b/intern/cycles/util/avxi.h
new file mode 100644
index 00000000000..0ae4bf271c8
--- /dev/null
+++ b/intern/cycles/util/avxi.h
@@ -0,0 +1,745 @@
+/*
+ * Copyright 2009-2013 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_AVXI_H__
+#define __UTIL_AVXI_H__
+
+CCL_NAMESPACE_BEGIN
+
+struct avxb;
+
+struct avxi {
+  typedef avxb Mask;  // mask type for us
+  enum { size = 8 };  // number of SIMD elements
+  union {             // data
+    __m256i m256;
+#if !defined(__KERNEL_AVX2__)
+    struct {
+      __m128i l, h;
+    };
+#endif
+    int32_t v[8];
+  };
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constructors, Assignment & Cast Operators
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline avxi()
+  {
+  }
+  __forceinline avxi(const avxi &a)
+  {
+    m256 = a.m256;
+  }
+  __forceinline avxi &operator=(const avxi &a)
+  {
+    m256 = a.m256;
+    return *this;
+  }
+
+  __forceinline avxi(const __m256i a) : m256(a)
+  {
+  }
+  __forceinline operator const __m256i &(void) const
+  {
+    return m256;
+  }
+  __forceinline operator __m256i &(void)
+  {
+    return m256;
+  }
+
+  __forceinline explicit avxi(const ssei &a)
+      : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), a, 1))
+  {
+  }
+  __forceinline avxi(const ssei &a, const ssei &b)
+      : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
+  {
+  }
+#if defined(__KERNEL_AVX2__)
+  __forceinline avxi(const __m128i &a, const __m128i &b)
+      : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
+  {
+  }
+#else
+  __forceinline avxi(const __m128i &a, const __m128i &b) : l(a), h(b)
+  {
+  }
+#endif
+  __forceinline explicit avxi(const int32_t *const a)
+      : m256(_mm256_castps_si256(_mm256_loadu_ps((const float *)a)))
+  {
+  }
+  __forceinline avxi(int32_t a) : m256(_mm256_set1_epi32(a))
+  {
+  }
+  __forceinline avxi(int32_t a, int32_t b) : m256(_mm256_set_epi32(b, a, b, a, b, a, b, a))
+  {
+  }
+  __forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d)
+      : m256(_mm256_set_epi32(d, c, b, a, d, c, b, a))
+  {
+  }
+  __forceinline avxi(
+      int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h)
+      : m256(_mm256_set_epi32(h, g, f, e, d, c, b, a))
+  {
+  }
+
+  __forceinline explicit avxi(const __m256 a) : m256(_mm256_cvtps_epi32(a))
+  {
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constants
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline avxi(ZeroTy) : m256(_mm256_setzero_si256())
+  {
+  }
+#if defined(__KERNEL_AVX2__)
+  __forceinline avxi(OneTy) : m256(_mm256_set1_epi32(1))
+  {
+  }
+  __forceinline avxi(PosInfTy) : m256(_mm256_set1_epi32(pos_inf))
+  {
+  }
+  __forceinline avxi(NegInfTy) : m256(_mm256_set1_epi32(neg_inf))
+  {
+  }
+#else
+  __forceinline avxi(OneTy) : m256(_mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1))
+  {
+  }
+  __forceinline avxi(PosInfTy)
+      : m256(_mm256_set_epi32(
+            pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf))
+  {
+  }
+  __forceinline avxi(NegInfTy)
+      : m256(_mm256_set_epi32(
+            neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf))
+  {
+  }
+#endif
+  __forceinline avxi(StepTy) : m256(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0))
+  {
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Array Access
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline const int32_t &operator[](const size_t i) const
+  {
+    assert(i < 8);
+    return v[i];
+  }
+  __forceinline int32_t &operator[](const size_t i)
+  {
+    assert(i < 8);
+    return v[i];
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxi cast(const __m256 &a)
+{
+  return _mm256_castps_si256(a);
+}
+__forceinline const avxi operator+(const avxi &a)
+{
+  return a;
+}
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator-(const avxi &a)
+{
+  return _mm256_sub_epi32(_mm256_setzero_si256(), a.m256);
+}
+__forceinline const avxi abs(const avxi &a)
+{
+  return _mm256_abs_epi32(a.m256);
+}
+#else
+__forceinline const avxi operator-(const avxi &a)
+{
+  return avxi(_mm_sub_epi32(_mm_setzero_si128(), a.l), _mm_sub_epi32(_mm_setzero_si128(), a.h));
+}
+__forceinline const avxi abs(const avxi &a)
+{
+  return avxi(_mm_abs_epi32(a.l), _mm_abs_epi32(a.h));
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator+(const avxi &a, const avxi &b)
+{
+  return _mm256_add_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator+(const avxi &a, const avxi &b)
+{
+  return avxi(_mm_add_epi32(a.l, b.l), _mm_add_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi operator+(const avxi &a, const int32_t b)
+{
+  return a + avxi(b);
+}
+__forceinline const avxi operator+(const int32_t a, const avxi &b)
+{
+  return avxi(a) + b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator-(const avxi &a, const avxi &b)
+{
+  return _mm256_sub_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator-(const avxi &a, const avxi &b)
+{
+  return avxi(_mm_sub_epi32(a.l, b.l), _mm_sub_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi operator-(const avxi &a, const int32_t b)
+{
+  return a - avxi(b);
+}
+__forceinline const avxi operator-(const int32_t a, const avxi &b)
+{
+  return avxi(a) - b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator*(const avxi &a, const avxi &b)
+{
+  return _mm256_mullo_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator*(const avxi &a, const avxi &b)
+{
+  return avxi(_mm_mullo_epi32(a.l, b.l), _mm_mullo_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi operator*(const avxi &a, const int32_t b)
+{
+  return a * avxi(b);
+}
+__forceinline const avxi operator*(const int32_t a, const avxi &b)
+{
+  return avxi(a) * b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator&(const avxi &a, const avxi &b)
+{
+  return _mm256_and_si256(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator&(const avxi &a, const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+__forceinline const avxi operator&(const avxi &a, const int32_t b)
+{
+  return a & avxi(b);
+}
+__forceinline const avxi operator&(const int32_t a, const avxi &b)
+{
+  return avxi(a) & b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator|(const avxi &a, const avxi &b)
+{
+  return _mm256_or_si256(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator|(const avxi &a, const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+__forceinline const avxi operator|(const avxi &a, const int32_t b)
+{
+  return a | avxi(b);
+}
+__forceinline const avxi operator|(const int32_t a, const avxi &b)
+{
+  return avxi(a) | b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator^(const avxi &a, const avxi &b)
+{
+  return _mm256_xor_si256(a.m256, b.m256);
+}
+#else
+__forceinline const avxi operator^(const avxi &a, const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+__forceinline const avxi operator^(const avxi &a, const int32_t b)
+{
+  return a ^ avxi(b);
+}
+__forceinline const avxi operator^(const int32_t a, const avxi &b)
+{
+  return avxi(a) ^ b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi operator<<(const avxi &a, const int32_t n)
+{
+  return _mm256_slli_epi32(a.m256, n);
+}
+__forceinline const avxi operator>>(const avxi &a, const int32_t n)
+{
+  return _mm256_srai_epi32(a.m256, n);
+}
+
+__forceinline const avxi sra(const avxi &a, const int32_t b)
+{
+  return _mm256_srai_epi32(a.m256, b);
+}
+__forceinline const avxi srl(const avxi &a, const int32_t b)
+{
+  return _mm256_srli_epi32(a.m256, b);
+}
+#else
+__forceinline const avxi operator<<(const avxi &a, const int32_t n)
+{
+  return avxi(_mm_slli_epi32(a.l, n), _mm_slli_epi32(a.h, n));
+}
+__forceinline const avxi operator>>(const avxi &a, const int32_t n)
+{
+  return avxi(_mm_srai_epi32(a.l, n), _mm_srai_epi32(a.h, n));
+}
+
+__forceinline const avxi sra(const avxi &a, const int32_t b)
+{
+  return avxi(_mm_srai_epi32(a.l, b), _mm_srai_epi32(a.h, b));
+}
+__forceinline const avxi srl(const avxi &a, const int32_t b)
+{
+  return avxi(_mm_srli_epi32(a.l, b), _mm_srli_epi32(a.h, b));
+}
+#endif
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi min(const avxi &a, const avxi &b)
+{
+  return _mm256_min_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi min(const avxi &a, const avxi &b)
+{
+  return avxi(_mm_min_epi32(a.l, b.l), _mm_min_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi min(const avxi &a, const int32_t b)
+{
+  return min(a, avxi(b));
+}
+__forceinline const avxi min(const int32_t a, const avxi &b)
+{
+  return min(avxi(a), b);
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxi max(const avxi &a, const avxi &b)
+{
+  return _mm256_max_epi32(a.m256, b.m256);
+}
+#else
+__forceinline const avxi max(const avxi &a, const avxi &b)
+{
+  return avxi(_mm_max_epi32(a.l, b.l), _mm_max_epi32(a.h, b.h));
+}
+#endif
+__forceinline const avxi max(const avxi &a, const int32_t b)
+{
+  return max(a, avxi(b));
+}
+__forceinline const avxi max(const int32_t a, const avxi &b)
+{
+  return max(avxi(a), b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline avxi &operator+=(avxi &a, const avxi &b)
+{
+  return a = a + b;
+}
+__forceinline avxi &operator+=(avxi &a, const int32_t b)
+{
+  return a = a + b;
+}
+
+__forceinline avxi &operator-=(avxi &a, const avxi &b)
+{
+  return a = a - b;
+}
+__forceinline avxi &operator-=(avxi &a, const int32_t b)
+{
+  return a = a - b;
+}
+
+__forceinline avxi &operator*=(avxi &a, const avxi &b)
+{
+  return a = a * b;
+}
+__forceinline avxi &operator*=(avxi &a, const int32_t b)
+{
+  return a = a * b;
+}
+
+__forceinline avxi &operator&=(avxi &a, const avxi &b)
+{
+  return a = a & b;
+}
+__forceinline avxi &operator&=(avxi &a, const int32_t b)
+{
+  return a = a & b;
+}
+
+__forceinline avxi &operator|=(avxi &a, const avxi &b)
+{
+  return a = a | b;
+}
+__forceinline avxi &operator|=(avxi &a, const int32_t b)
+{
+  return a = a | b;
+}
+
+__forceinline avxi &operator^=(avxi &a, const avxi &b)
+{
+  return a = a ^ b;
+}
+__forceinline avxi &operator^=(avxi &a, const int32_t b)
+{
+  return a = a ^ b;
+}
+
+__forceinline avxi &operator<<=(avxi &a, const int32_t b)
+{
+  return a = a << b;
+}
+__forceinline avxi &operator>>=(avxi &a, const int32_t b)
+{
+  return a = a >> b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxb operator==(const avxi &a, const avxi &b)
+{
+  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a.m256, b.m256));
+}
+#else
+__forceinline const avxb operator==(const avxi &a, const avxi &b)
+{
+  return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(a.l, b.l)),
+              _mm_castsi128_ps(_mm_cmpeq_epi32(a.h, b.h)));
+}
+#endif
+__forceinline const avxb operator==(const avxi &a, const int32_t b)
+{
+  return a == avxi(b);
+}
+__forceinline const avxb operator==(const int32_t a, const avxi &b)
+{
+  return avxi(a) == b;
+}
+
+__forceinline const avxb operator!=(const avxi &a, const avxi &b)
+{
+  return !(a == b);
+}
+__forceinline const avxb operator!=(const avxi &a, const int32_t b)
+{
+  return a != avxi(b);
+}
+__forceinline const avxb operator!=(const int32_t a, const avxi &b)
+{
+  return avxi(a) != b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxb operator<(const avxi &a, const avxi &b)
+{
+  return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.m256, a.m256));
+}
+#else
+__forceinline const avxb operator<(const avxi &a, const avxi &b)
+{
+  return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(a.l, b.l)),
+              _mm_castsi128_ps(_mm_cmplt_epi32(a.h, b.h)));
+}
+#endif
+__forceinline const avxb operator<(const avxi &a, const int32_t b)
+{
+  return a < avxi(b);
+}
+__forceinline const avxb operator<(const int32_t a, const avxi &b)
+{
+  return avxi(a) < b;
+}
+
+__forceinline const avxb operator>=(const avxi &a, const avxi &b)
+{
+  return !(a < b);
+}
+__forceinline const avxb operator>=(const avxi &a, const int32_t b)
+{
+  return a >= avxi(b);
+}
+__forceinline const avxb operator>=(const int32_t a, const avxi &b)
+{
+  return avxi(a) >= b;
+}
+
+#if defined(__KERNEL_AVX2__)
+__forceinline const avxb operator>(const avxi &a, const avxi &b)
+{
+  return _mm256_castsi256_ps(_mm256_cmpgt_epi32(a.m256, b.m256));
+}
+#else
+__forceinline const avxb operator>(const avxi &a, const avxi &b)
+{
+  return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(a.l, b.l)),
+              _mm_castsi128_ps(_mm_cmpgt_epi32(a.h, b.h)));
+}
+#endif
+__forceinline const avxb operator>(const avxi &a, const int32_t b)
+{
+  return a > avxi(b);
+}
+__forceinline const avxb operator>(const int32_t a, const avxi &b)
+{
+  return avxi(a) > b;
+}
+
+__forceinline const avxb operator<=(const avxi &a, const avxi &b)
+{
+  return !(a > b);
+}
+__forceinline const avxb operator<=(const avxi &a, const int32_t b)
+{
+  return a <= avxi(b);
+}
+__forceinline const avxb operator<=(const int32_t a, const avxi &b)
+{
+  return avxi(a) <= b;
+}
+
+__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f)
+{
+  return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_AVX2__)
+__forceinline avxi unpacklo(const avxi &a, const avxi &b)
+{
+  return _mm256_unpacklo_epi32(a.m256, b.m256);
+}
+__forceinline avxi unpackhi(const avxi &a, const avxi &b)
+{
+  return _mm256_unpackhi_epi32(a.m256, b.m256);
+}
+#else
+__forceinline avxi unpacklo(const avxi &a, const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+__forceinline avxi unpackhi(const avxi &a, const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
+}
+#endif
+
+template<size_t i> __forceinline const avxi shuffle(const avxi &a)
+{
+  return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i)));
+}
+
+template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a)
+{
+  return _mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0));
+}
+
+template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a, const avxi &b)
+{
+  return _mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const avxi shuffle(const avxi &a)
+{
+  return _mm256_castps_si256(
+      _mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0)));
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const avxi shuffle(const avxi &a, const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_shuffle_ps(
+      _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
+}
+
+template<> __forceinline const avxi shuffle<0, 0, 2, 2>(const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b)));
+}
+template<> __forceinline const avxi shuffle<1, 1, 3, 3>(const avxi &b)
+{
+  return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b)));
+}
+template<> __forceinline const avxi shuffle<0, 1, 0, 1>(const avxi &b)
+{
+  return _mm256_castps_si256(
+      _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b)))));
+}
+
+__forceinline const avxi broadcast(const int *ptr)
+{
+  return _mm256_castps_si256(_mm256_broadcast_ss((const float *)ptr));
+}
+template<size_t i> __forceinline const avxi insert(const avxi &a, const ssei &b)
+{
+  return _mm256_insertf128_si256(a, b, i);
+}
+template<size_t i> __forceinline const ssei extract(const avxi &a)
+{
+  return _mm256_extractf128_si256(a, i);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reductions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxi vreduce_min2(const avxi &v)
+{
+  return min(v, shuffle<1, 0, 3, 2>(v));
+}
+__forceinline const avxi vreduce_min4(const avxi &v)
+{
+  avxi v1 = vreduce_min2(v);
+  return min(v1, shuffle<2, 3, 0, 1>(v1));
+}
+__forceinline const avxi vreduce_min(const avxi &v)
+{
+  avxi v1 = vreduce_min4(v);
+  return min(v1, shuffle<1, 0>(v1));
+}
+
+__forceinline const avxi vreduce_max2(const avxi &v)
+{
+  return max(v, shuffle<1, 0, 3, 2>(v));
+}
+__forceinline const avxi vreduce_max4(const avxi &v)
+{
+  avxi v1 = vreduce_max2(v);
+  return max(v1, shuffle<2, 3, 0, 1>(v1));
+}
+__forceinline const avxi vreduce_max(const avxi &v)
+{
+  avxi v1 = vreduce_max4(v);
+  return max(v1, shuffle<1, 0>(v1));
+}
+
+__forceinline const avxi vreduce_add2(const avxi &v)
+{
+  return v + shuffle<1, 0, 3, 2>(v);
+}
+__forceinline const avxi vreduce_add4(const avxi &v)
+{
+  avxi v1 = vreduce_add2(v);
+  return v1 + shuffle<2, 3, 0, 1>(v1);
+}
+__forceinline const avxi vreduce_add(const avxi &v)
+{
+  avxi v1 = vreduce_add4(v);
+  return v1 + shuffle<1, 0>(v1);
+}
+
+__forceinline int reduce_min(const avxi &v)
+{
+  return extract<0>(extract<0>(vreduce_min(v)));
+}
+__forceinline int reduce_max(const avxi &v)
+{
+  return extract<0>(extract<0>(vreduce_max(v)));
+}
+__forceinline int reduce_add(const avxi &v)
+{
+  return extract<0>(extract<0>(vreduce_add(v)));
+}
+
+__forceinline uint32_t select_min(const avxi &v)
+{
+  return __bsf(movemask(v == vreduce_min(v)));
+}
+__forceinline uint32_t select_max(const avxi &v)
+{
+  return __bsf(movemask(v == vreduce_max(v)));
+}
+
+__forceinline uint32_t select_min(const avxb &valid, const avxi &v)
+{
+  const avxi a = select(valid, v, avxi(pos_inf));
+  return __bsf(movemask(valid & (a == vreduce_min(a))));
+}
+__forceinline uint32_t select_max(const avxb &valid, const avxi &v)
+{
+  const avxi a = select(valid, v, avxi(neg_inf));
+  return __bsf(movemask(valid & (a == vreduce_max(a))));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Output Operators
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_avxi(const char *label, const avxi &a)
+{
+  printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
+}
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/boundbox.h b/intern/cycles/util/boundbox.h
new file mode 100644
index 00000000000..ed81e4cf8c3
--- /dev/null
+++ b/intern/cycles/util/boundbox.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_BOUNDBOX_H__
+#define __UTIL_BOUNDBOX_H__
+
+#include <float.h>
+#include <math.h>
+
+#include "util/math.h"
+#include "util/string.h"
+#include "util/transform.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* 3D BoundBox */
+
+class BoundBox {
+ public:
+  float3 min, max;
+
+  __forceinline BoundBox()
+  {
+  }
+
+  __forceinline BoundBox(const float3 &pt) : min(pt), max(pt)
+  {
+  }
+
+  __forceinline BoundBox(const float3 &min_, const float3 &max_) : min(min_), max(max_)
+  {
+  }
+
+  enum empty_t { empty = 0 };
+
+  __forceinline BoundBox(empty_t)
+      : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX))
+  {
+  }
+
+  __forceinline void grow(const float3 &pt)
+  {
+    /* the order of arguments to min is such that if pt is nan, it will not
+     * influence the resulting bounding box */
+    min = ccl::min(pt, min);
+    max = ccl::max(pt, max);
+  }
+
+  __forceinline void grow(const float3 &pt, float border)
+  {
+    float3 shift = make_float3(border, border, border);
+    min = ccl::min(pt - shift, min);
+    max = ccl::max(pt + shift, max);
+  }
+
+  __forceinline void grow(const BoundBox &bbox)
+  {
+    grow(bbox.min);
+    grow(bbox.max);
+  }
+
+  __forceinline void grow_safe(const float3 &pt)
+  {
+    /* the order of arguments to min is such that if pt is nan, it will not
+     * influence the resulting bounding box */
+    if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z)) {
+      min = ccl::min(pt, min);
+      max = ccl::max(pt, max);
+    }
+  }
+
+  __forceinline void grow_safe(const float3 &pt, float border)
+  {
+    if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z) && isfinite(border)) {
+      float3 shift = make_float3(border, border, border);
+      min = ccl::min(pt - shift, min);
+      max = ccl::max(pt + shift, max);
+    }
+  }
+
+  __forceinline void grow_safe(const BoundBox &bbox)
+  {
+    grow_safe(bbox.min);
+    grow_safe(bbox.max);
+  }
+
+  __forceinline void intersect(const BoundBox &bbox)
+  {
+    min = ccl::max(min, bbox.min);
+    max = ccl::min(max, bbox.max);
+  }
+
+  /* todo: avoid using this */
+  __forceinline float safe_area() const
+  {
+    if (!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z)))
+      return 0.0f;
+
+    return area();
+  }
+
+  __forceinline float area() const
+  {
+    return half_area() * 2.0f;
+  }
+
+  __forceinline float half_area() const
+  {
+    float3 d = max - min;
+    return (d.x * d.z + d.y * d.z + d.x * d.y);
+  }
+
+  __forceinline float3 center() const
+  {
+    return 0.5f * (min + max);
+  }
+
+  __forceinline float3 center2() const
+  {
+    return min + max;
+  }
+
+  __forceinline float3 size() const
+  {
+    return max - min;
+  }
+
+  __forceinline bool valid() const
+  {
+    return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) &&
+           (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) &&
+           (isfinite(max.x) && isfinite(max.y) && isfinite(max.z));
+  }
+
+  BoundBox transformed(const Transform *tfm) const
+  {
+    BoundBox result = BoundBox::empty;
+
+    for (int i = 0; i < 8; i++) {
+      float3 p;
+
+      p.x = (i & 1) ? min.x : max.x;
+      p.y = (i & 2) ? min.y : max.y;
+      p.z = (i & 4) ? min.z : max.z;
+
+      result.grow(transform_point(tfm, p));
+    }
+
+    return result;
+  }
+
+  __forceinline bool intersects(const BoundBox &other)
+  {
+    float3 center_diff = center() - other.center(), total_size = (size() + other.size()) * 0.5f;
+    return fabsf(center_diff.x) <= total_size.x && fabsf(center_diff.y) <= total_size.y &&
+           fabsf(center_diff.z) <= total_size.z;
+  }
+};
+
+__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt)
+{
+  return BoundBox(min(bbox.min, pt), max(bbox.max, pt));
+}
+
+__forceinline BoundBox merge(const BoundBox &a, const BoundBox &b)
+{
+  return BoundBox(min(a.min, b.min), max(a.max, b.max));
+}
+
+__forceinline BoundBox merge(const BoundBox &a,
+                             const BoundBox &b,
+                             const BoundBox &c,
+                             const BoundBox &d)
+{
+  return merge(merge(a, b), merge(c, d));
+}
+
+__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b)
+{
+  return BoundBox(max(a.min, b.min), min(a.max, b.max));
+}
+
+__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b, const BoundBox &c)
+{
+  return intersect(a, intersect(b, c));
+}
+
+/* 2D BoundBox */
+
+class BoundBox2D {
+ public:
+  float left;
+  float right;
+  float bottom;
+  float top;
+
+  BoundBox2D() : left(0.0f), right(1.0f), bottom(0.0f), top(1.0f)
+  {
+  }
+
+  bool operator==(const BoundBox2D &other) const
+  {
+    return (left == other.left && right == other.right && bottom == other.bottom &&
+            top == other.top);
+  }
+
+  float width()
+  {
+    return right - left;
+  }
+
+  float height()
+  {
+    return top - bottom;
+  }
+
+  BoundBox2D operator*(float f) const
+  {
+    BoundBox2D result;
+
+    result.left = left * f;
+    result.right = right * f;
+    result.bottom = bottom * f;
+    result.top = top * f;
+
+    return result;
+  }
+
+  BoundBox2D subset(const BoundBox2D &other) const
+  {
+    BoundBox2D subset;
+
+    subset.left = left + other.left * (right - left);
+    subset.right = left + other.right * (right - left);
+    subset.bottom = bottom + other.bottom * (top - bottom);
+    subset.top = bottom + other.top * (top - bottom);
+
+    return subset;
+  }
+
+  BoundBox2D make_relative_to(const BoundBox2D &other) const
+  {
+    BoundBox2D result;
+
+    result.left = ((left - other.left) / (other.right - other.left));
+    result.right = ((right - other.left) / (other.right - other.left));
+    result.bottom = ((bottom - other.bottom) / (other.top - other.bottom));
+    result.top = ((top - other.bottom) / (other.top - other.bottom));
+
+    return result;
+  }
+
+  BoundBox2D clamp(float mn = 0.0f, float mx = 1.0f)
+  {
+    BoundBox2D result;
+
+    result.left = ccl::clamp(left, mn, mx);
+    result.right = ccl::clamp(right, mn, mx);
+    result.bottom = ccl::clamp(bottom, mn, mx);
+    result.top = ccl::clamp(top, mn, mx);
+
+    return result;
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_BOUNDBOX_H__ */
diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h
new file mode 100644
index 00000000000..e2a5c5b9c4a
--- /dev/null
+++ b/intern/cycles/util/color.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_COLOR_H__
+#define __UTIL_COLOR_H__
+
+#include "util/math.h"
+#include "util/types.h"
+
+#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
+#  include "util/simd.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device uchar float_to_byte(float val)
+{
+  return ((val <= 0.0f) ? 0 :
+                          ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f)));
+}
+
+ccl_device uchar4 color_float_to_byte(float3 c)
+{
+  uchar r, g, b;
+
+  r = float_to_byte(c.x);
+  g = float_to_byte(c.y);
+  b = float_to_byte(c.z);
+
+  return make_uchar4(r, g, b, 0);
+}
+
+ccl_device uchar4 color_float4_to_uchar4(float4 c)
+{
+  uchar r, g, b, a;
+
+  r = float_to_byte(c.x);
+  g = float_to_byte(c.y);
+  b = float_to_byte(c.z);
+  a = float_to_byte(c.w);
+
+  return make_uchar4(r, g, b, a);
+}
+
+ccl_device_inline float3 color_byte_to_float(uchar4 c)
+{
+  return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f));
+}
+
+ccl_device_inline float4 color_uchar4_to_float4(uchar4 c)
+{
+  return make_float4(
+      c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f), c.w * (1.0f / 255.0f));
+}
+
+ccl_device float color_srgb_to_linear(float c)
+{
+  if (c < 0.04045f)
+    return (c < 0.0f) ? 0.0f : c * (1.0f / 12.92f);
+  else
+    return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f);
+}
+
+ccl_device float color_linear_to_srgb(float c)
+{
+  if (c < 0.0031308f)
+    return (c < 0.0f) ? 0.0f : c * 12.92f;
+  else
+    return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f;
+}
+
+ccl_device float3 rgb_to_hsv(float3 rgb)
+{
+  float cmax, cmin, h, s, v, cdelta;
+  float3 c;
+
+  cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
+  cmin = min(rgb.x, min(rgb.y, rgb.z));
+  cdelta = cmax - cmin;
+
+  v = cmax;
+
+  if (cmax != 0.0f) {
+    s = cdelta / cmax;
+  }
+  else {
+    s = 0.0f;
+    h = 0.0f;
+  }
+
+  if (s != 0.0f) {
+    float3 cmax3 = make_float3(cmax, cmax, cmax);
+    c = (cmax3 - rgb) / cdelta;
+
+    if (rgb.x == cmax)
+      h = c.z - c.y;
+    else if (rgb.y == cmax)
+      h = 2.0f + c.x - c.z;
+    else
+      h = 4.0f + c.y - c.x;
+
+    h /= 6.0f;
+
+    if (h < 0.0f)
+      h += 1.0f;
+  }
+  else {
+    h = 0.0f;
+  }
+
+  return make_float3(h, s, v);
+}
+
+ccl_device float3 hsv_to_rgb(float3 hsv)
+{
+  float i, f, p, q, t, h, s, v;
+  float3 rgb;
+
+  h = hsv.x;
+  s = hsv.y;
+  v = hsv.z;
+
+  if (s != 0.0f) {
+    if (h == 1.0f)
+      h = 0.0f;
+
+    h *= 6.0f;
+    i = floorf(h);
+    f = h - i;
+    rgb = make_float3(f, f, f);
+    p = v * (1.0f - s);
+    q = v * (1.0f - (s * f));
+    t = v * (1.0f - (s * (1.0f - f)));
+
+    if (i == 0.0f)
+      rgb = make_float3(v, t, p);
+    else if (i == 1.0f)
+      rgb = make_float3(q, v, p);
+    else if (i == 2.0f)
+      rgb = make_float3(p, v, t);
+    else if (i == 3.0f)
+      rgb = make_float3(p, q, v);
+    else if (i == 4.0f)
+      rgb = make_float3(t, p, v);
+    else
+      rgb = make_float3(v, p, q);
+  }
+  else {
+    rgb = make_float3(v, v, v);
+  }
+
+  return rgb;
+}
+
+ccl_device float3 xyY_to_xyz(float x, float y, float Y)
+{
+  float X, Z;
+
+  if (y != 0.0f)
+    X = (x / y) * Y;
+  else
+    X = 0.0f;
+
+  if (y != 0.0f && Y != 0.0f)
+    Z = (1.0f - x - y) / y * Y;
+  else
+    Z = 0.0f;
+
+  return make_float3(X, Y, Z);
+}
+
+#ifdef __KERNEL_SSE2__
+/*
+ * Calculate initial guess for arg^exp based on float representation
+ * This method gives a constant bias,
+ * which can be easily compensated by multiplication with bias_coeff.
+ * Gives better results for exponents near 1 (e. g. 4/5).
+ * exp = exponent, encoded as uint32_t
+ * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
+ */
+template<unsigned exp, unsigned e2coeff> ccl_device_inline ssef fastpow(const ssef &arg)
+{
+  ssef ret;
+  ret = arg * cast(ssei(e2coeff));
+  ret = ssef(cast(ret));
+  ret = ret * cast(ssei(exp));
+  ret = cast(ssei(ret));
+  return ret;
+}
+
+/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
+ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x)
+{
+  ssef approx2 = old_result * old_result;
+  ssef approx4 = approx2 * approx2;
+  ssef t = x / approx4;
+  ssef summ = madd(ssef(4.0f), old_result, t);
+  return summ * ssef(1.0f / 5.0f);
+}
+
+/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
+ccl_device_inline ssef fastpow24(const ssef &arg)
+{
+  /* max, avg and |avg| errors were calculated in gcc without FMA instructions
+   * The final precision should be better than powf in glibc */
+
+  /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
+  /* 0x3F4CCCCD = 4/5 */
+  /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
+  ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg);  // error max = 0.17  avg = 0.0018    |avg| = 0.05
+  ssef arg2 = arg * arg;
+  ssef arg4 = arg2 * arg2;
+
+  /* error max = 0.018     avg = 0.0031    |avg| = 0.0031 */
+  x = improve_5throot_solution(x, arg4);
+  /* error max = 0.00021   avg = 1.6e-05   |avg| = 1.6e-05 */
+  x = improve_5throot_solution(x, arg4);
+  /* error max = 6.1e-07   avg = 5.2e-08   |avg| = 1.1e-07 */
+  x = improve_5throot_solution(x, arg4);
+
+  return x * (x * x);
+}
+
+ccl_device ssef color_srgb_to_linear(const ssef &c)
+{
+  sseb cmp = c < ssef(0.04045f);
+  ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f));
+  ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */
+  ssef gte = fastpow24(gtebase);
+  return select(cmp, lt, gte);
+}
+#endif /* __KERNEL_SSE2__ */
+
+ccl_device float3 color_srgb_to_linear_v3(float3 c)
+{
+  return make_float3(
+      color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z));
+}
+
+ccl_device float3 color_linear_to_srgb_v3(float3 c)
+{
+  return make_float3(
+      color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z));
+}
+
+ccl_device float4 color_linear_to_srgb_v4(float4 c)
+{
+  return make_float4(
+      color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z), c.w);
+}
+
+ccl_device float4 color_srgb_to_linear_v4(float4 c)
+{
+#ifdef __KERNEL_SSE2__
+  ssef r_ssef;
+  float4 &r = (float4 &)r_ssef;
+  r = c;
+  r_ssef = color_srgb_to_linear(r_ssef);
+  r.w = c.w;
+  return r;
+#else
+  return make_float4(
+      color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z), c.w);
+#endif
+}
+
+ccl_device float3 color_highlight_compress(float3 color, ccl_private float3 *variance)
+{
+  color += one_float3();
+  if (variance) {
+    *variance *= sqr3(one_float3() / color);
+  }
+  return log3(color);
+}
+
+ccl_device float3 color_highlight_uncompress(float3 color)
+{
+  return exp3(color) - one_float3();
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/debug.cpp b/intern/cycles/util/debug.cpp
new file mode 100644
index 00000000000..b49df3d42bc
--- /dev/null
+++ b/intern/cycles/util/debug.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/debug.h"
+
+#include <stdlib.h>
+
+#include "bvh/params.h"
+
+#include "util/log.h"
+#include "util/string.h"
+
+CCL_NAMESPACE_BEGIN
+
+DebugFlags::CPU::CPU()
+    : avx2(true), avx(true), sse41(true), sse3(true), sse2(true), bvh_layout(BVH_LAYOUT_AUTO)
+{
+  reset();
+}
+
+void DebugFlags::CPU::reset()
+{
+#define STRINGIFY(x) #x
+#define CHECK_CPU_FLAGS(flag, env) \
+  do { \
+    flag = (getenv(env) == NULL); \
+    if (!flag) { \
+      VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \
+    } \
+  } while (0)
+
+  CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
+  CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX");
+  CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41");
+  CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3");
+  CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2");
+
+#undef STRINGIFY
+#undef CHECK_CPU_FLAGS
+
+  bvh_layout = BVH_LAYOUT_AUTO;
+}
+
+DebugFlags::CUDA::CUDA() : adaptive_compile(false)
+{
+  reset();
+}
+
+DebugFlags::HIP::HIP() : adaptive_compile(false)
+{
+  reset();
+}
+
+void DebugFlags::CUDA::reset()
+{
+  if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
+    adaptive_compile = true;
+}
+
+void DebugFlags::HIP::reset()
+{
+  if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL)
+    adaptive_compile = true;
+}
+
+DebugFlags::OptiX::OptiX()
+{
+  reset();
+}
+
+void DebugFlags::OptiX::reset()
+{
+  use_debug = false;
+}
+
+DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false)
+{
+  /* Nothing for now. */
+}
+
+void DebugFlags::reset()
+{
+  viewport_static_bvh = false;
+  cpu.reset();
+  cuda.reset();
+  optix.reset();
+}
+
+std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
+{
+  os << "CPU flags:\n"
+     << "  AVX2       : " << string_from_bool(debug_flags.cpu.avx2) << "\n"
+     << "  AVX        : " << string_from_bool(debug_flags.cpu.avx) << "\n"
+     << "  SSE4.1     : " << string_from_bool(debug_flags.cpu.sse41) << "\n"
+     << "  SSE3       : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
+     << "  SSE2       : " << string_from_bool(debug_flags.cpu.sse2) << "\n"
+     << "  BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n";
+
+  os << "CUDA flags:\n"
+     << "  Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
+
+  os << "OptiX flags:\n"
+     << "  Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
+
+  os << "HIP flags:\n"
+     << "  HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n";
+
+  return os;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/debug.h b/intern/cycles/util/debug.h
new file mode 100644
index 00000000000..58b2b047261
--- /dev/null
+++ b/intern/cycles/util/debug.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_DEBUG_H__
+#define __UTIL_DEBUG_H__
+
+#include <cassert>
+#include <iostream>
+
+#include "bvh/params.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Global storage for all sort of flags used to fine-tune behavior of particular
+ * areas for the development purposes, without officially exposing settings to
+ * the interface.
+ */
+class DebugFlags {
+ public:
+  /* Use static BVH in viewport, to match final render exactly. */
+  bool viewport_static_bvh;
+
+  bool running_inside_blender;
+
+  /* Descriptor of CPU feature-set to be used. */
+  struct CPU {
+    CPU();
+
+    /* Reset flags to their defaults. */
+    void reset();
+
+    /* Flags describing which instructions sets are allowed for use. */
+    bool avx2;
+    bool avx;
+    bool sse41;
+    bool sse3;
+    bool sse2;
+
+    /* Check functions to see whether instructions up to the given one
+     * are allowed for use.
+     */
+    bool has_avx2()
+    {
+      return has_avx() && avx2;
+    }
+    bool has_avx()
+    {
+      return has_sse41() && avx;
+    }
+    bool has_sse41()
+    {
+      return has_sse3() && sse41;
+    }
+    bool has_sse3()
+    {
+      return has_sse2() && sse3;
+    }
+    bool has_sse2()
+    {
+      return sse2;
+    }
+
+    /* Requested BVH layout.
+     *
+     * By default the fastest will be used. For debugging the BVH used by other
+     * CPUs and GPUs can be selected here instead.
+     */
+    BVHLayout bvh_layout;
+  };
+
+  /* Descriptor of CUDA feature-set to be used. */
+  struct CUDA {
+    CUDA();
+
+    /* Reset flags to their defaults. */
+    void reset();
+
+    /* Whether adaptive feature based runtime compile is enabled or not.
+     * Requires the CUDA Toolkit and only works on Linux at the moment. */
+    bool adaptive_compile;
+  };
+
+  /* Descriptor of HIP feature-set to be used. */
+  struct HIP {
+    HIP();
+
+    /* Reset flags to their defaults. */
+    void reset();
+
+    /* Whether adaptive feature based runtime compile is enabled or not.*/
+    bool adaptive_compile;
+  };
+
+  /* Descriptor of OptiX feature-set to be used. */
+  struct OptiX {
+    OptiX();
+
+    /* Reset flags to their defaults. */
+    void reset();
+
+    /* Load OptiX module with debug capabilities. Will lower logging verbosity level, enable
+     * validations, and lower optimization level. */
+    bool use_debug;
+  };
+
+  /* Get instance of debug flags registry. */
+  static DebugFlags &get()
+  {
+    static DebugFlags instance;
+    return instance;
+  }
+
+  /* Reset flags to their defaults. */
+  void reset();
+
+  /* Requested CPU flags. */
+  CPU cpu;
+
+  /* Requested CUDA flags. */
+  CUDA cuda;
+
+  /* Requested OptiX flags. */
+  OptiX optix;
+
+  /* Requested HIP flags. */
+  HIP hip;
+
+ private:
+  DebugFlags();
+
+#if (__cplusplus > 199711L)
+ public:
+  explicit DebugFlags(DebugFlags const & /*other*/) = delete;
+  void operator=(DebugFlags const & /*other*/) = delete;
+#else
+ private:
+  explicit DebugFlags(DebugFlags const & /*other*/);
+  void operator=(DebugFlags const & /*other*/);
+#endif
+};
+
+typedef DebugFlags &DebugFlagsRef;
+typedef const DebugFlags &DebugFlagsConstRef;
+
+inline DebugFlags &DebugFlags()
+{
+  return DebugFlags::get();
+}
+
+std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_DEBUG_H__ */
diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h
new file mode 100644
index 00000000000..9b1698d461a
--- /dev/null
+++ b/intern/cycles/util/defines.h
@@ -0,0 +1,146 @@
+
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* clang-format off */
+
+/* #define __forceinline triggers a bug in some clang-format versions, disable
+ * format for entire file to keep results consistent. */
+
+#ifndef __UTIL_DEFINES_H__
+#define __UTIL_DEFINES_H__
+
+/* Bitness */
+
+#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \
+    defined(_M_X64) || defined(__aarch64__)
+#  define __KERNEL_64_BIT__
+#endif
+
+/* Qualifiers for kernel code shared by CPU and GPU */
+
+#ifndef __KERNEL_GPU__
+#  define ccl_device static inline
+#  define ccl_device_noinline static
+#  define ccl_device_noinline_cpu ccl_device_noinline
+#  define ccl_global
+#  define ccl_static_constant static const
+#  define ccl_constant const
+#  define ccl_local
+#  define ccl_local_param
+#  define ccl_private
+#  define ccl_restrict __restrict
+#  define ccl_optional_struct_init
+#  define ccl_loop_no_unroll
+#  define ccl_attr_maybe_unused [[maybe_unused]]
+#  define __KERNEL_WITH_SSE_ALIGN__
+
+#  if defined(_WIN32) && !defined(FREE_WINDOWS)
+#    define ccl_device_inline static __forceinline
+#    define ccl_device_forceinline static __forceinline
+#    define ccl_align(...) __declspec(align(__VA_ARGS__))
+#    ifdef __KERNEL_64_BIT__
+#      define ccl_try_align(...) __declspec(align(__VA_ARGS__))
+#    else /* __KERNEL_64_BIT__ */
+#      undef __KERNEL_WITH_SSE_ALIGN__
+/* No support for function arguments (error C2719). */
+#      define ccl_try_align(...)
+#    endif /* __KERNEL_64_BIT__ */
+#    define ccl_may_alias
+#    define ccl_always_inline __forceinline
+#    define ccl_never_inline __declspec(noinline)
+#  else /* _WIN32 && !FREE_WINDOWS */
+#    define ccl_device_inline static inline __attribute__((always_inline))
+#    define ccl_device_forceinline static inline __attribute__((always_inline))
+#    define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
+#    ifndef FREE_WINDOWS64
+#      define __forceinline inline __attribute__((always_inline))
+#    endif
+#    define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
+#    define ccl_may_alias __attribute__((__may_alias__))
+#    define ccl_always_inline __attribute__((always_inline))
+#    define ccl_never_inline __attribute__((noinline))
+#  endif /* _WIN32 && !FREE_WINDOWS */
+
+/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
+#  ifndef ATTR_FALLTHROUGH
+#    if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
+#      define ATTR_FALLTHROUGH __attribute__((fallthrough))
+#    else
+#      define ATTR_FALLTHROUGH ((void)0)
+#    endif
+#  endif
+#endif /* __KERNEL_GPU__ */
+
+/* macros */
+
+/* hints for branch prediction, only use in code that runs a _lot_ */
+#if defined(__GNUC__) && defined(__KERNEL_CPU__)
+#  define LIKELY(x) __builtin_expect(!!(x), 1)
+#  define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#  define LIKELY(x) (x)
+#  define UNLIKELY(x) (x)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#  if defined(__cplusplus)
+/* Some magic to be sure we don't have reference in the type. */
+template<typename T> static inline T decltype_helper(T x)
+{
+  return x;
+}
+#    define TYPEOF(x) decltype(decltype_helper(x))
+#  else
+#    define TYPEOF(x) typeof(x)
+#  endif
+#endif
+
+/* Causes warning:
+ * incompatible types when assigning to type 'Foo' from type 'Bar'
+ * ... the compiler optimizes away the temp var */
+#ifdef __GNUC__
+#  define CHECK_TYPE(var, type) \
+    { \
+      TYPEOF(var) * __tmp; \
+      __tmp = (type *)NULL; \
+      (void)__tmp; \
+    } \
+    (void)0
+
+#  define CHECK_TYPE_PAIR(var_a, var_b) \
+    { \
+      TYPEOF(var_a) * __tmp; \
+      __tmp = (typeof(var_b) *)NULL; \
+      (void)__tmp; \
+    } \
+    (void)0
+#else
+#  define CHECK_TYPE(var, type)
+#  define CHECK_TYPE_PAIR(var_a, var_b)
+#endif
+
+/* can be used in simple macros */
+#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val)))
+
+#ifndef __KERNEL_GPU__
+#  include <cassert>
+#  define util_assert(statement) assert(statement)
+#else
+#  define util_assert(statement)
+#endif
+
+#endif /* __UTIL_DEFINES_H__ */
diff --git a/intern/cycles/util/deque.h b/intern/cycles/util/deque.h
new file mode 100644
index 00000000000..ccac961aa7d
--- /dev/null
+++ b/intern/cycles/util/deque.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_DEQUE_H__
+#define __UTIL_DEQUE_H__
+
+#include <deque>
+
+CCL_NAMESPACE_BEGIN
+
+using std::deque;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_DEQUE_H__ */
diff --git a/intern/cycles/util/disjoint_set.h b/intern/cycles/util/disjoint_set.h
new file mode 100644
index 00000000000..5226423d7cd
--- /dev/null
+++ b/intern/cycles/util/disjoint_set.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_DISJOINT_SET_H__
+#define __UTIL_DISJOINT_SET_H__
+
+#include "util/array.h"
+#include <utility>
+
+CCL_NAMESPACE_BEGIN
+
+class DisjointSet {
+ private:
+  array<size_t> parents;
+  array<size_t> ranks;
+
+ public:
+  DisjointSet(size_t size) : parents(size), ranks(size)
+  {
+    for (size_t i = 0; i < size; i++) {
+      parents[i] = i;
+      ranks[i] = 0;
+    }
+  }
+
+  size_t find(size_t x)
+  {
+    size_t root = x;
+    while (parents[root] != root) {
+      root = parents[root];
+    }
+    while (parents[x] != root) {
+      size_t parent = parents[x];
+      parents[x] = root;
+      x = parent;
+    }
+    return root;
+  }
+
+  void join(size_t x, size_t y)
+  {
+    size_t x_root = find(x);
+    size_t y_root = find(y);
+
+    if (x_root == y_root) {
+      return;
+    }
+
+    if (ranks[x_root] < ranks[y_root]) {
+      std::swap(x_root, y_root);
+    }
+    parents[y_root] = x_root;
+
+    if (ranks[x_root] == ranks[y_root]) {
+      ranks[x_root]++;
+    }
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_DISJOINT_SET_H__ */
diff --git a/intern/cycles/util/foreach.h b/intern/cycles/util/foreach.h
new file mode 100644
index 00000000000..d907974be91
--- /dev/null
+++ b/intern/cycles/util/foreach.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_FOREACH_H__
+#define __UTIL_FOREACH_H__
+
+/* Nice foreach() loops for STL data structures. */
+
+#define foreach(x, y) for (x : y)
+
+#endif /* __UTIL_FOREACH_H__ */
diff --git a/intern/cycles/util/function.h b/intern/cycles/util/function.h
new file mode 100644
index 00000000000..f3cc00329ad
--- /dev/null
+++ b/intern/cycles/util/function.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_FUNCTION_H__
+#define __UTIL_FUNCTION_H__
+
+#include <functional>
+
+CCL_NAMESPACE_BEGIN
+
+#define function_bind std::bind
+#define function_null nullptr
+using std::function;
+using std::placeholders::_1;
+using std::placeholders::_2;
+using std::placeholders::_3;
+using std::placeholders::_4;
+using std::placeholders::_5;
+using std::placeholders::_6;
+using std::placeholders::_7;
+using std::placeholders::_8;
+using std::placeholders::_9;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_FUNCTION_H__ */
diff --git a/intern/cycles/util/guarded_allocator.cpp b/intern/cycles/util/guarded_allocator.cpp
new file mode 100644
index 00000000000..4063b301331
--- /dev/null
+++ b/intern/cycles/util/guarded_allocator.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/guarded_allocator.h"
+#include "util/stats.h"
+
+CCL_NAMESPACE_BEGIN
+
+static Stats global_stats(Stats::static_init);
+
+/* Internal API. */
+
+void util_guarded_mem_alloc(size_t n)
+{
+  global_stats.mem_alloc(n);
+}
+
+void util_guarded_mem_free(size_t n)
+{
+  global_stats.mem_free(n);
+}
+
+/* Public API. */
+
+size_t util_guarded_get_mem_used()
+{
+  return global_stats.mem_used;
+}
+
+size_t util_guarded_get_mem_peak()
+{
+  return global_stats.mem_peak;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/guarded_allocator.h b/intern/cycles/util/guarded_allocator.h
new file mode 100644
index 00000000000..f78cc5f5da9
--- /dev/null
+++ b/intern/cycles/util/guarded_allocator.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_GUARDED_ALLOCATOR_H__
+#define __UTIL_GUARDED_ALLOCATOR_H__
+
+#include <cstddef>
+#include <cstdlib>
+#include <memory>
+
+#ifdef WITH_BLENDER_GUARDEDALLOC
+#  include "../../guardedalloc/MEM_guardedalloc.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Internal use only. */
+void util_guarded_mem_alloc(size_t n);
+void util_guarded_mem_free(size_t n);
+
+/* Guarded allocator for the use with STL. */
+template<typename T> class GuardedAllocator {
+ public:
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  typedef T *pointer;
+  typedef const T *const_pointer;
+  typedef T &reference;
+  typedef const T &const_reference;
+  typedef T value_type;
+
+  GuardedAllocator()
+  {
+  }
+  GuardedAllocator(const GuardedAllocator &)
+  {
+  }
+
+  T *allocate(size_t n, const void *hint = 0)
+  {
+    (void)hint;
+    size_t size = n * sizeof(T);
+    util_guarded_mem_alloc(size);
+    if (n == 0) {
+      return NULL;
+    }
+    T *mem;
+#ifdef WITH_BLENDER_GUARDEDALLOC
+    /* C++ standard requires allocation functions to allocate memory suitably
+     * aligned for any standard type. This is 16 bytes for 64 bit platform as
+     * far as i concerned. We might over-align on 32bit here, but that should
+     * be all safe actually.
+     */
+    mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc");
+#else
+    mem = (T *)malloc(size);
+#endif
+    if (mem == NULL) {
+      throw std::bad_alloc();
+    }
+    return mem;
+  }
+
+  void deallocate(T *p, size_t n)
+  {
+    util_guarded_mem_free(n * sizeof(T));
+    if (p != NULL) {
+#ifdef WITH_BLENDER_GUARDEDALLOC
+      MEM_freeN(p);
+#else
+      free(p);
+#endif
+    }
+  }
+
+  T *address(T &x) const
+  {
+    return &x;
+  }
+
+  const T *address(const T &x) const
+  {
+    return &x;
+  }
+
+  GuardedAllocator<T> &operator=(const GuardedAllocator &)
+  {
+    return *this;
+  }
+
+  size_t max_size() const
+  {
+    return size_t(-1);
+  }
+
+  template<class U> struct rebind {
+    typedef GuardedAllocator<U> other;
+  };
+
+  template<class U> GuardedAllocator(const GuardedAllocator<U> &)
+  {
+  }
+
+  template<class U> GuardedAllocator &operator=(const GuardedAllocator<U> &)
+  {
+    return *this;
+  }
+
+  inline bool operator==(GuardedAllocator const & /*other*/) const
+  {
+    return true;
+  }
+  inline bool operator!=(GuardedAllocator const &other) const
+  {
+    return !operator==(other);
+  }
+
+#ifdef _MSC_VER
+  /* Welcome to the black magic here.
+   *
+   * The issue is that MSVC C++ allocates container proxy on any
+   * vector initialization, including static vectors which don't
+   * have any data yet. This leads to several issues:
+   *
+   * - Static objects initialization fiasco (global_stats from
+   *   util_stats.h might not be initialized yet).
+   * - If main() function changes allocator type (for example,
+   *   this might happen with `blender --debug-memory`) nobody
+   *   will know how to convert already allocated memory to a new
+   *   guarded allocator.
+   *
+   * Here we work this around by making it so container proxy does
+   * not use guarded allocation. A bit fragile, unfortunately.
+   */
+  template<> struct rebind<std::_Container_proxy> {
+    typedef std::allocator<std::_Container_proxy> other;
+  };
+
+  operator std::allocator<std::_Container_proxy>() const
+  {
+    return std::allocator<std::_Container_proxy>();
+  }
+#endif
+};
+
+/* Get memory usage and peak from the guarded STL allocator. */
+size_t util_guarded_get_mem_used();
+size_t util_guarded_get_mem_peak();
+
+/* Call given function and keep track if it runs out of memory.
+ *
+ * If it does run out f memory, stop execution and set progress
+ * to do a global cancel.
+ *
+ * It's not fully robust, but good enough to catch obvious issues
+ * when running out of memory.
+ */
+#define MEM_GUARDED_CALL(progress, func, ...) \
+  do { \
+    try { \
+      (func)(__VA_ARGS__); \
+    } \
+    catch (std::bad_alloc &) { \
+      fprintf(stderr, "Error: run out of memory!\n"); \
+      fflush(stderr); \
+      (progress)->set_error("Out of memory"); \
+    } \
+  } while (false)
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/half.h b/intern/cycles/util/half.h
new file mode 100644
index 00000000000..016975e3c25
--- /dev/null
+++ b/intern/cycles/util/half.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_HALF_H__
+#define __UTIL_HALF_H__
+
+#include "util/math.h"
+#include "util/types.h"
+
+#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
+#  include "util/simd.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Half Floats */
+
+/* CUDA has its own half data type, no need to define then */
+#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__)
+/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
+ * unsigned shorts. */
+class half {
+ public:
+  half() : v(0)
+  {
+  }
+  half(const unsigned short &i) : v(i)
+  {
+  }
+  operator unsigned short()
+  {
+    return v;
+  }
+  half &operator=(const unsigned short &i)
+  {
+    v = i;
+    return *this;
+  }
+
+ private:
+  unsigned short v;
+};
+#endif
+
+struct half4 {
+  half x, y, z, w;
+};
+
+/* Conversion to/from half float for image textures
+ *
+ * Simplified float to half for fast sampling on processor without a native
+ * instruction, and eliminating any NaN and inf values. */
+
+ccl_device_inline half float_to_half_image(float f)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+  return __float2half(f);
+#else
+  const uint u = __float_as_uint(f);
+  /* Sign bit, shifted to its position. */
+  uint sign_bit = u & 0x80000000;
+  sign_bit >>= 16;
+  /* Exponent. */
+  uint exponent_bits = u & 0x7f800000;
+  /* Non-sign bits. */
+  uint value_bits = u & 0x7fffffff;
+  value_bits >>= 13;     /* Align mantissa on MSB. */
+  value_bits -= 0x1c000; /* Adjust bias. */
+  /* Flush-to-zero. */
+  value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
+  /* Clamp-to-max. */
+  value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
+  /* Denormals-as-zero. */
+  value_bits = (exponent_bits == 0 ? 0 : value_bits);
+  /* Re-insert sign bit and return. */
+  return (value_bits | sign_bit);
+#endif
+}
+
+ccl_device_inline float half_to_float_image(half h)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+  return __half2float(h);
+#else
+  const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
+  return __int_as_float(x);
+#endif
+}
+
+ccl_device_inline float4 half4_to_float4_image(const half4 h)
+{
+  /* Unable to use because it gives different results half_to_float_image, can we
+   * modify float_to_half_image so the conversion results are identical? */
+#if 0 /* defined(__KERNEL_AVX2__) */
+  /* CPU: AVX. */
+  __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h));
+  return float4(_mm_cvtph_ps(x));
+#endif
+
+  const float4 f = make_float4(half_to_float_image(h.x),
+                               half_to_float_image(h.y),
+                               half_to_float_image(h.z),
+                               half_to_float_image(h.w));
+  return f;
+}
+
+/* Conversion to half float texture for display.
+ *
+ * Simplified float to half for fast display texture conversion on processors
+ * without a native instruction. Assumes no negative, no NaN, no inf, and sets
+ * denormal to 0. */
+
+ccl_device_inline half float_to_half_display(const float f)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+  return __float2half(f);
+#else
+  const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
+  const int absolute = x & 0x7FFFFFFF;
+  const int Z = absolute + 0xC8000000;
+  const int result = (absolute < 0x38800000) ? 0 : Z;
+  const int rshift = (result >> 13);
+  return (rshift & 0x7FFF);
+#endif
+}
+
+ccl_device_inline half4 float4_to_half4_display(const float4 f)
+{
+#ifdef __KERNEL_SSE2__
+  /* CPU: SSE and AVX. */
+  ssef x = min(max(load4f(f), 0.0f), 65504.0f);
+#  ifdef __KERNEL_AVX2__
+  ssei rpack = _mm_cvtps_ph(x, 0);
+#  else
+  ssei absolute = cast(x) & 0x7FFFFFFF;
+  ssei Z = absolute + 0xC8000000;
+  ssei result = andnot(absolute < 0x38800000, Z);
+  ssei rshift = (result >> 13) & 0x7FFF;
+  ssei rpack = _mm_packs_epi32(rshift, rshift);
+#  endif
+  half4 h;
+  _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
+  return h;
+#else
+  /* GPU and scalar fallback. */
+  const half4 h = {float_to_half_display(f.x),
+                   float_to_half_display(f.y),
+                   float_to_half_display(f.z),
+                   float_to_half_display(f.w)};
+  return h;
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_HALF_H__ */
diff --git a/intern/cycles/util/hash.h b/intern/cycles/util/hash.h
new file mode 100644
index 00000000000..013a0f90a27
--- /dev/null
+++ b/intern/cycles/util/hash.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_HASH_H__
+#define __UTIL_HASH_H__
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* ***** Jenkins Lookup3 Hash Functions ***** */
+
+/* Source: http://burtleburtle.net/bob/c/lookup3.c */
+
+#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+
+#define mix(a, b, c) \
+  { \
+    a -= c; \
+    a ^= rot(c, 4); \
+    c += b; \
+    b -= a; \
+    b ^= rot(a, 6); \
+    a += c; \
+    c -= b; \
+    c ^= rot(b, 8); \
+    b += a; \
+    a -= c; \
+    a ^= rot(c, 16); \
+    c += b; \
+    b -= a; \
+    b ^= rot(a, 19); \
+    a += c; \
+    c -= b; \
+    c ^= rot(b, 4); \
+    b += a; \
+  } \
+  ((void)0)
+
+#define final(a, b, c) \
+  { \
+    c ^= b; \
+    c -= rot(b, 14); \
+    a ^= c; \
+    a -= rot(c, 11); \
+    b ^= a; \
+    b -= rot(a, 25); \
+    c ^= b; \
+    c -= rot(b, 16); \
+    a ^= c; \
+    a -= rot(c, 4); \
+    b ^= a; \
+    b -= rot(a, 14); \
+    c ^= b; \
+    c -= rot(b, 24); \
+  } \
+  ((void)0)
+
+ccl_device_inline uint hash_uint(uint kx)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (1 << 2) + 13;
+
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline uint hash_uint2(uint kx, uint ky)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (2 << 2) + 13;
+
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (3 << 2) + 13;
+
+  c += kz;
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (4 << 2) + 13;
+
+  a += kx;
+  b += ky;
+  c += kz;
+  mix(a, b, c);
+
+  a += kw;
+  final(a, b, c);
+
+  return c;
+}
+
+#undef rot
+#undef final
+#undef mix
+
+/* Hashing uint or uint[234] into a float in the range [0, 1]. */
+
+ccl_device_inline float hash_uint_to_float(uint kx)
+{
+  return (float)hash_uint(kx) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint2_to_float(uint kx, uint ky)
+{
+  return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz)
+{
+  return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw)
+{
+  return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu;
+}
+
+/* Hashing float or float[234] into a float in the range [0, 1]. */
+
+ccl_device_inline float hash_float_to_float(float k)
+{
+  return hash_uint_to_float(__float_as_uint(k));
+}
+
+ccl_device_inline float hash_float2_to_float(float2 k)
+{
+  return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y));
+}
+
+ccl_device_inline float hash_float3_to_float(float3 k)
+{
+  return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z));
+}
+
+ccl_device_inline float hash_float4_to_float(float4 k)
+{
+  return hash_uint4_to_float(
+      __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w));
+}
+
+/* Hashing float[234] into float[234] of components in the range [0, 1]. */
+
+ccl_device_inline float2 hash_float2_to_float2(float2 k)
+{
+  return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0)));
+}
+
+ccl_device_inline float3 hash_float3_to_float3(float3 k)
+{
+  return make_float3(hash_float3_to_float(k),
+                     hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)),
+                     hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0)));
+}
+
+ccl_device_inline float4 hash_float4_to_float4(float4 k)
+{
+  return make_float4(hash_float4_to_float(k),
+                     hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)),
+                     hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)),
+                     hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x)));
+}
+
+/* Hashing float or float[234] into float3 of components in range [0, 1]. */
+
+ccl_device_inline float3 hash_float_to_float3(float k)
+{
+  return make_float3(hash_float_to_float(k),
+                     hash_float2_to_float(make_float2(k, 1.0)),
+                     hash_float2_to_float(make_float2(k, 2.0)));
+}
+
+ccl_device_inline float3 hash_float2_to_float3(float2 k)
+{
+  return make_float3(hash_float2_to_float(k),
+                     hash_float3_to_float(make_float3(k.x, k.y, 1.0)),
+                     hash_float3_to_float(make_float3(k.x, k.y, 2.0)));
+}
+
+ccl_device_inline float3 hash_float4_to_float3(float4 k)
+{
+  return make_float3(hash_float4_to_float(k),
+                     hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)),
+                     hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x)));
+}
+
+/* SSE Versions Of Jenkins Lookup3 Hash Functions */
+
+#ifdef __KERNEL_SSE2__
+#  define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
+
+#  define mix(a, b, c) \
+    { \
+      a -= c; \
+      a ^= rot(c, 4); \
+      c += b; \
+      b -= a; \
+      b ^= rot(a, 6); \
+      a += c; \
+      c -= b; \
+      c ^= rot(b, 8); \
+      b += a; \
+      a -= c; \
+      a ^= rot(c, 16); \
+      c += b; \
+      b -= a; \
+      b ^= rot(a, 19); \
+      a += c; \
+      c -= b; \
+      c ^= rot(b, 4); \
+      b += a; \
+    }
+
+#  define final(a, b, c) \
+    { \
+      c ^= b; \
+      c -= rot(b, 14); \
+      a ^= c; \
+      a -= rot(c, 11); \
+      b ^= a; \
+      b -= rot(a, 25); \
+      c ^= b; \
+      c -= rot(b, 16); \
+      a ^= c; \
+      a -= rot(c, 4); \
+      b ^= a; \
+      b -= rot(a, 14); \
+      c ^= b; \
+      c -= rot(b, 24); \
+    }
+
+ccl_device_inline ssei hash_ssei(ssei kx)
+{
+  ssei a, b, c;
+  a = b = c = ssei(0xdeadbeef + (1 << 2) + 13);
+
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky)
+{
+  ssei a, b, c;
+  a = b = c = ssei(0xdeadbeef + (2 << 2) + 13);
+
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz)
+{
+  ssei a, b, c;
+  a = b = c = ssei(0xdeadbeef + (3 << 2) + 13);
+
+  c += kz;
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw)
+{
+  ssei a, b, c;
+  a = b = c = ssei(0xdeadbeef + (4 << 2) + 13);
+
+  a += kx;
+  b += ky;
+  c += kz;
+  mix(a, b, c);
+
+  a += kw;
+  final(a, b, c);
+
+  return c;
+}
+
+#  if defined(__KERNEL_AVX__)
+ccl_device_inline avxi hash_avxi(avxi kx)
+{
+  avxi a, b, c;
+  a = b = c = avxi(0xdeadbeef + (1 << 2) + 13);
+
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky)
+{
+  avxi a, b, c;
+  a = b = c = avxi(0xdeadbeef + (2 << 2) + 13);
+
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz)
+{
+  avxi a, b, c;
+  a = b = c = avxi(0xdeadbeef + (3 << 2) + 13);
+
+  c += kz;
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw)
+{
+  avxi a, b, c;
+  a = b = c = avxi(0xdeadbeef + (4 << 2) + 13);
+
+  a += kx;
+  b += ky;
+  c += kz;
+  mix(a, b, c);
+
+  a += kw;
+  final(a, b, c);
+
+  return c;
+}
+#  endif
+
+#  undef rot
+#  undef final
+#  undef mix
+
+#endif
+
+#ifndef __KERNEL_GPU__
+static inline uint hash_string(const char *str)
+{
+  uint i = 0, c;
+
+  while ((c = *str++))
+    i = i * 37 + c;
+
+  return i;
+}
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_HASH_H__ */
diff --git a/intern/cycles/util/ies.cpp b/intern/cycles/util/ies.cpp
new file mode 100644
index 00000000000..5e879478df5
--- /dev/null
+++ b/intern/cycles/util/ies.cpp
@@ -0,0 +1,411 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+
+#include "util/foreach.h"
+#include "util/ies.h"
+#include "util/math.h"
+#include "util/string.h"
+
+CCL_NAMESPACE_BEGIN
+
+// NOTE: For some reason gcc-7.2 does not instantiate this versio of allocator
+// gere (used in IESTextParser). Works fine for gcc-6, gcc-7.3 and gcc-8.
+//
+// TODO(sergey): Get to the root of this issue, or confirm this i a compiler
+// issue.
+template class GuardedAllocator<char>;
+
+bool IESFile::load(const string &ies)
+{
+  clear();
+  if (!parse(ies) || !process()) {
+    clear();
+    return false;
+  }
+  return true;
+}
+
+void IESFile::clear()
+{
+  intensity.clear();
+  v_angles.clear();
+  h_angles.clear();
+}
+
+int IESFile::packed_size()
+{
+  if (v_angles.size() && h_angles.size() > 0) {
+    return 2 + h_angles.size() + v_angles.size() + h_angles.size() * v_angles.size();
+  }
+  return 0;
+}
+
+void IESFile::pack(float *data)
+{
+  if (v_angles.size() && h_angles.size()) {
+    *(data++) = __int_as_float(h_angles.size());
+    *(data++) = __int_as_float(v_angles.size());
+
+    memcpy(data, &h_angles[0], h_angles.size() * sizeof(float));
+    data += h_angles.size();
+    memcpy(data, &v_angles[0], v_angles.size() * sizeof(float));
+    data += v_angles.size();
+
+    for (int h = 0; h < intensity.size(); h++) {
+      memcpy(data, &intensity[h][0], v_angles.size() * sizeof(float));
+      data += v_angles.size();
+    }
+  }
+}
+
+class IESTextParser {
+ public:
+  vector<char> text;
+  char *data;
+
+  IESTextParser(const string &str) : text(str.begin(), str.end())
+  {
+    std::replace(text.begin(), text.end(), ',', ' ');
+    data = strstr(&text[0], "\nTILT=");
+  }
+
+  bool eof()
+  {
+    return (data == NULL) || (data[0] == '\0');
+  }
+
+  double get_double()
+  {
+    if (eof()) {
+      return 0.0;
+    }
+    char *old_data = data;
+    double val = strtod(data, &data);
+    if (data == old_data) {
+      data = NULL;
+      return 0.0;
+    }
+    return val;
+  }
+
+  long get_long()
+  {
+    if (eof()) {
+      return 0;
+    }
+    char *old_data = data;
+    long val = strtol(data, &data, 10);
+    if (data == old_data) {
+      data = NULL;
+      return 0;
+    }
+    return val;
+  }
+};
+
+bool IESFile::parse(const string &ies)
+{
+  if (ies.empty()) {
+    return false;
+  }
+
+  IESTextParser parser(ies);
+  if (parser.eof()) {
+    return false;
+  }
+
+  /* Handle the tilt data block. */
+  if (strncmp(parser.data, "\nTILT=INCLUDE", 13) == 0) {
+    parser.data += 13;
+    parser.get_double();              /* Lamp to Luminaire geometry */
+    int num_tilt = parser.get_long(); /* Amount of tilt angles and factors */
+    /* Skip over angles and factors. */
+    for (int i = 0; i < 2 * num_tilt; i++) {
+      parser.get_double();
+    }
+  }
+  else {
+    /* Skip to next line. */
+    parser.data = strstr(parser.data + 1, "\n");
+  }
+
+  if (parser.eof()) {
+    return false;
+  }
+  parser.data++;
+
+  parser.get_long();                    /* Number of lamps */
+  parser.get_double();                  /* Lumens per lamp */
+  double factor = parser.get_double();  /* Candela multiplier */
+  int v_angles_num = parser.get_long(); /* Number of vertical angles */
+  int h_angles_num = parser.get_long(); /* Number of horizontal angles */
+  type = (IESType)parser.get_long();    /* Photometric type */
+
+  /* TODO(lukas): Test whether the current type B processing can also deal with type A files.
+   * In theory the only difference should be orientation which we ignore anyways, but with IES you
+   * never know...
+   */
+  if (type != TYPE_B && type != TYPE_C) {
+    return false;
+  }
+
+  parser.get_long();             /* Unit of the geometry data */
+  parser.get_double();           /* Width */
+  parser.get_double();           /* Length */
+  parser.get_double();           /* Height */
+  factor *= parser.get_double(); /* Ballast factor */
+  factor *= parser.get_double(); /* Ballast-Lamp Photometric factor */
+  parser.get_double();           /* Input Watts */
+
+  /* Intensity values in IES files are specified in candela (lumen/sr), a photometric quantity.
+   * Cycles expects radiometric quantities, though, which requires a conversion.
+   * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution
+   * of the light source since lumens take human perception into account.
+   * Since this spectral distribution is not known from the IES file, a typical one must be
+   * assumed. The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to
+   * convert to Watt/sr. A more advanced approach would be to add a Blackbody Temperature input to
+   * the node and numerically integrate the Luminous efficacy from the resulting spectral
+   * distribution. Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that
+   * Cycles expects for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela
+   * to Watt factor.
+   */
+  factor *= 0.0706650768394;
+
+  v_angles.reserve(v_angles_num);
+  for (int i = 0; i < v_angles_num; i++) {
+    v_angles.push_back((float)parser.get_double());
+  }
+
+  h_angles.reserve(h_angles_num);
+  for (int i = 0; i < h_angles_num; i++) {
+    h_angles.push_back((float)parser.get_double());
+  }
+
+  intensity.resize(h_angles_num);
+  for (int i = 0; i < h_angles_num; i++) {
+    intensity[i].reserve(v_angles_num);
+    for (int j = 0; j < v_angles_num; j++) {
+      intensity[i].push_back((float)(factor * parser.get_double()));
+    }
+  }
+
+  return !parser.eof();
+}
+
+bool IESFile::process_type_b()
+{
+  vector<vector<float>> newintensity;
+  newintensity.resize(v_angles.size());
+  for (int i = 0; i < v_angles.size(); i++) {
+    newintensity[i].reserve(h_angles.size());
+    for (int j = 0; j < h_angles.size(); j++) {
+      newintensity[i].push_back(intensity[j][i]);
+    }
+  }
+  intensity.swap(newintensity);
+  h_angles.swap(v_angles);
+
+  float h_first = h_angles[0], h_last = h_angles[h_angles.size() - 1];
+  if (h_last != 90.0f) {
+    return false;
+  }
+
+  if (h_first == 0.0f) {
+    /* The range in the file corresponds to 90°-180°, we need to mirror that to get the
+     * full 180° range. */
+    vector<float> new_h_angles;
+    vector<vector<float>> new_intensity;
+    int hnum = h_angles.size();
+    new_h_angles.reserve(2 * hnum - 1);
+    new_intensity.reserve(2 * hnum - 1);
+    for (int i = hnum - 1; i > 0; i--) {
+      new_h_angles.push_back(90.0f - h_angles[i]);
+      new_intensity.push_back(intensity[i]);
+    }
+    for (int i = 0; i < hnum; i++) {
+      new_h_angles.push_back(90.0f + h_angles[i]);
+      new_intensity.push_back(intensity[i]);
+    }
+    h_angles.swap(new_h_angles);
+    intensity.swap(new_intensity);
+  }
+  else if (h_first == -90.0f) {
+    /* We have full 180° coverage, so just shift to match the angle range convention. */
+    for (int i = 0; i < h_angles.size(); i++) {
+      h_angles[i] += 90.0f;
+    }
+  }
+  /* To get correct results with the cubic interpolation in the kernel, the horizontal range
+   * has to cover all 360°. Therefore, we copy the 0° entry to 360° to ensure full coverage
+   * and seamless interpolation. */
+  h_angles.push_back(360.0f);
+  intensity.push_back(intensity[0]);
+
+  float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1];
+  if (v_last != 90.0f) {
+    return false;
+  }
+
+  if (v_first == 0.0f) {
+    /* The range in the file corresponds to 90°-180°, we need to mirror that to get the
+     * full 180° range. */
+    vector<float> new_v_angles;
+    int hnum = h_angles.size();
+    int vnum = v_angles.size();
+    new_v_angles.reserve(2 * vnum - 1);
+    for (int i = vnum - 1; i > 0; i--) {
+      new_v_angles.push_back(90.0f - v_angles[i]);
+    }
+    for (int i = 0; i < vnum; i++) {
+      new_v_angles.push_back(90.0f + v_angles[i]);
+    }
+    for (int i = 0; i < hnum; i++) {
+      vector<float> new_intensity;
+      new_intensity.reserve(2 * vnum - 1);
+      for (int j = vnum - 2; j >= 0; j--) {
+        new_intensity.push_back(intensity[i][j]);
+      }
+      new_intensity.insert(new_intensity.end(), intensity[i].begin(), intensity[i].end());
+      intensity[i].swap(new_intensity);
+    }
+    v_angles.swap(new_v_angles);
+  }
+  else if (v_first == -90.0f) {
+    /* We have full 180° coverage, so just shift to match the angle range convention. */
+    for (int i = 0; i < v_angles.size(); i++) {
+      v_angles[i] += 90.0f;
+    }
+  }
+
+  return true;
+}
+
+bool IESFile::process_type_c()
+{
+  if (h_angles[0] == 90.0f) {
+    /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range
+     * here. */
+    for (int i = 0; i < h_angles.size(); i++) {
+      h_angles[i] -= 90.0f;
+    }
+  }
+
+  if (h_angles[0] != 0.0f) {
+    return false;
+  }
+
+  if (h_angles.size() == 1) {
+    h_angles.push_back(360.0f);
+    intensity.push_back(intensity[0]);
+  }
+
+  if (h_angles[h_angles.size() - 1] == 90.0f) {
+    /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four).
+     * Since the two->four mirroring step might also be required if we get an input of two
+     * quadrants, we only do the first mirror here and later do the second mirror in either case.
+     */
+    int hnum = h_angles.size();
+    for (int i = hnum - 2; i >= 0; i--) {
+      h_angles.push_back(180.0f - h_angles[i]);
+      intensity.push_back(intensity[i]);
+    }
+  }
+
+  if (h_angles[h_angles.size() - 1] == 180.0f) {
+    /* Mirror half to the full range. */
+    int hnum = h_angles.size();
+    for (int i = hnum - 2; i >= 0; i--) {
+      h_angles.push_back(360.0f - h_angles[i]);
+      intensity.push_back(intensity[i]);
+    }
+  }
+
+  /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to
+   * the 0° entry. If the file has a discernible order in its spacing, just fix this. */
+  if (h_angles[h_angles.size() - 1] != 360.0f) {
+    int hnum = h_angles.size();
+    float last_step = h_angles[hnum - 1] - h_angles[hnum - 2];
+    float first_step = h_angles[1] - h_angles[0];
+    float difference = 360.0f - h_angles[hnum - 1];
+    if (last_step == difference || first_step == difference) {
+      h_angles.push_back(360.0f);
+      intensity.push_back(intensity[0]);
+    }
+    else {
+      return false;
+    }
+  }
+
+  float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1];
+  if (v_first == 90.0f) {
+    if (v_last == 180.0f) {
+      /* Flip to ensure that vertical angles always start at 0°. */
+      for (int i = 0; i < v_angles.size(); i++) {
+        v_angles[i] = 180.0f - v_angles[i];
+      }
+    }
+    else {
+      return false;
+    }
+  }
+  else if (v_first != 0.0f) {
+    return false;
+  }
+
+  return true;
+}
+
+bool IESFile::process()
+{
+  if (h_angles.size() == 0 || v_angles.size() == 0) {
+    return false;
+  }
+
+  if (type == TYPE_B) {
+    if (!process_type_b()) {
+      return false;
+    }
+  }
+  else {
+    assert(type == TYPE_C);
+    if (!process_type_c()) {
+      return false;
+    }
+  }
+
+  assert(v_angles[0] == 0.0f);
+  assert(h_angles[0] == 0.0f);
+  assert(h_angles[h_angles.size() - 1] == 360.0f);
+
+  /* Convert from deg to rad. */
+  for (int i = 0; i < v_angles.size(); i++) {
+    v_angles[i] *= M_PI_F / 180.f;
+  }
+  for (int i = 0; i < h_angles.size(); i++) {
+    h_angles[i] *= M_PI_F / 180.f;
+  }
+
+  return true;
+}
+
+IESFile::~IESFile()
+{
+  clear();
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/ies.h b/intern/cycles/util/ies.h
new file mode 100644
index 00000000000..7be072dd5f5
--- /dev/null
+++ b/intern/cycles/util/ies.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_IES_H__
+#define __UTIL_IES_H__
+
+#include "util/string.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class IESFile {
+ public:
+  IESFile()
+  {
+  }
+  ~IESFile();
+
+  int packed_size();
+  void pack(float *data);
+
+  bool load(const string &ies);
+  void clear();
+
+ protected:
+  bool parse(const string &ies);
+  bool process();
+  bool process_type_b();
+  bool process_type_c();
+
+  /* The brightness distribution is stored in spherical coordinates.
+   * The horizontal angles correspond to theta in the regular notation
+   * and always span the full range from 0° to 360°.
+   * The vertical angles correspond to phi and always start at 0°. */
+  vector<float> v_angles, h_angles;
+  /* The actual values are stored here, with every entry storing the values
+   * of one horizontal segment. */
+  vector<vector<float>> intensity;
+
+  /* Types of angle representation in IES files. Currently, only B and C are supported. */
+  enum IESType { TYPE_A = 3, TYPE_B = 2, TYPE_C = 1 } type;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_IES_H__ */
diff --git a/intern/cycles/util/image.h b/intern/cycles/util/image.h
new file mode 100644
index 00000000000..69fc3a50c1d
--- /dev/null
+++ b/intern/cycles/util/image.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_IMAGE_H__
+#  define __UTIL_IMAGE_H__
+
+/* OpenImageIO is used for all image file reading and writing. */
+
+#  include <OpenImageIO/imageio.h>
+
+#  include "util/half.h"
+#  include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+template<typename T>
+void util_image_resize_pixels(const vector<T> &input_pixels,
+                              const size_t input_width,
+                              const size_t input_height,
+                              const size_t input_depth,
+                              const size_t components,
+                              vector<T> *output_pixels,
+                              size_t *output_width,
+                              size_t *output_height,
+                              size_t *output_depth);
+
+/* Cast input pixel from unknown storage to float. */
+template<typename T> inline float util_image_cast_to_float(T value);
+
+template<> inline float util_image_cast_to_float(float value)
+{
+  return value;
+}
+template<> inline float util_image_cast_to_float(uchar value)
+{
+  return (float)value / 255.0f;
+}
+template<> inline float util_image_cast_to_float(uint16_t value)
+{
+  return (float)value / 65535.0f;
+}
+template<> inline float util_image_cast_to_float(half value)
+{
+  return half_to_float_image(value);
+}
+
+/* Cast float value to output pixel type. */
+template<typename T> inline T util_image_cast_from_float(float value);
+
+template<> inline float util_image_cast_from_float(float value)
+{
+  return value;
+}
+template<> inline uchar util_image_cast_from_float(float value)
+{
+  if (value < 0.0f) {
+    return 0;
+  }
+  else if (value > (1.0f - 0.5f / 255.0f)) {
+    return 255;
+  }
+  return (uchar)((255.0f * value) + 0.5f);
+}
+template<> inline uint16_t util_image_cast_from_float(float value)
+{
+  if (value < 0.0f) {
+    return 0;
+  }
+  else if (value > (1.0f - 0.5f / 65535.0f)) {
+    return 65535;
+  }
+  return (uint16_t)((65535.0f * value) + 0.5f);
+}
+template<> inline half util_image_cast_from_float(float value)
+{
+  return float_to_half_image(value);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_IMAGE_H__ */
+
+#include "util/image_impl.h"
diff --git a/intern/cycles/util/image_impl.h b/intern/cycles/util/image_impl.h
new file mode 100644
index 00000000000..3d8eed80775
--- /dev/null
+++ b/intern/cycles/util/image_impl.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_IMAGE_IMPL_H__
+#define __UTIL_IMAGE_IMPL_H__
+
+#include "util/algorithm.h"
+#include "util/half.h"
+#include "util/image.h"
+
+CCL_NAMESPACE_BEGIN
+
+namespace {
+
+template<typename T>
+const T *util_image_read(const vector<T> &pixels,
+                         const size_t width,
+                         const size_t height,
+                         const size_t /*depth*/,
+                         const size_t components,
+                         const size_t x,
+                         const size_t y,
+                         const size_t z)
+{
+  const size_t index = ((size_t)z * (width * height) + (size_t)y * width + (size_t)x) * components;
+  return &pixels[index];
+}
+
+template<typename T>
+void util_image_downscale_sample(const vector<T> &pixels,
+                                 const size_t width,
+                                 const size_t height,
+                                 const size_t depth,
+                                 const size_t components,
+                                 const size_t kernel_size,
+                                 const float x,
+                                 const float y,
+                                 const float z,
+                                 T *result)
+{
+  assert(components <= 4);
+  const size_t ix = (size_t)x, iy = (size_t)y, iz = (size_t)z;
+  /* TODO(sergey): Support something smarter than box filer. */
+  float accum[4] = {0};
+  size_t count = 0;
+  for (size_t dz = 0; dz < kernel_size; ++dz) {
+    for (size_t dy = 0; dy < kernel_size; ++dy) {
+      for (size_t dx = 0; dx < kernel_size; ++dx) {
+        const size_t nx = ix + dx, ny = iy + dy, nz = iz + dz;
+        if (nx >= width || ny >= height || nz >= depth) {
+          continue;
+        }
+        const T *pixel = util_image_read(pixels, width, height, depth, components, nx, ny, nz);
+        for (size_t k = 0; k < components; ++k) {
+          accum[k] += util_image_cast_to_float(pixel[k]);
+        }
+        ++count;
+      }
+    }
+  }
+  if (count != 0) {
+    const float inv_count = 1.0f / (float)count;
+    for (size_t k = 0; k < components; ++k) {
+      result[k] = util_image_cast_from_float<T>(accum[k] * inv_count);
+    }
+  }
+  else {
+    for (size_t k = 0; k < components; ++k) {
+      result[k] = T(0.0f);
+    }
+  }
+}
+
+template<typename T>
+void util_image_downscale_pixels(const vector<T> &input_pixels,
+                                 const size_t input_width,
+                                 const size_t input_height,
+                                 const size_t input_depth,
+                                 const size_t components,
+                                 const float inv_scale_factor,
+                                 const size_t output_width,
+                                 const size_t output_height,
+                                 const size_t output_depth,
+                                 vector<T> *output_pixels)
+{
+  const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f);
+  for (size_t z = 0; z < output_depth; ++z) {
+    for (size_t y = 0; y < output_height; ++y) {
+      for (size_t x = 0; x < output_width; ++x) {
+        const float input_x = (float)x * inv_scale_factor, input_y = (float)y * inv_scale_factor,
+                    input_z = (float)z * inv_scale_factor;
+        const size_t output_index = (z * output_width * output_height + y * output_width + x) *
+                                    components;
+        util_image_downscale_sample(input_pixels,
+                                    input_width,
+                                    input_height,
+                                    input_depth,
+                                    components,
+                                    kernel_size,
+                                    input_x,
+                                    input_y,
+                                    input_z,
+                                    &output_pixels->at(output_index));
+      }
+    }
+  }
+}
+
+} /* namespace */
+
+template<typename T>
+void util_image_resize_pixels(const vector<T> &input_pixels,
+                              const size_t input_width,
+                              const size_t input_height,
+                              const size_t input_depth,
+                              const size_t components,
+                              const float scale_factor,
+                              vector<T> *output_pixels,
+                              size_t *output_width,
+                              size_t *output_height,
+                              size_t *output_depth)
+{
+  /* Early output for case when no scaling is applied. */
+  if (scale_factor == 1.0f) {
+    *output_width = input_width;
+    *output_height = input_height;
+    *output_depth = input_depth;
+    *output_pixels = input_pixels;
+    return;
+  }
+  /* First of all, we calculate output image dimensions.
+   * We clamp them to be 1 pixel at least so we do not generate degenerate
+   * image.
+   */
+  *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1);
+  *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1);
+  *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1);
+  /* Prepare pixel storage for the result. */
+  const size_t num_output_pixels = ((*output_width) * (*output_height) * (*output_depth)) *
+                                   components;
+  output_pixels->resize(num_output_pixels);
+  if (scale_factor < 1.0f) {
+    const float inv_scale_factor = 1.0f / scale_factor;
+    util_image_downscale_pixels(input_pixels,
+                                input_width,
+                                input_height,
+                                input_depth,
+                                components,
+                                inv_scale_factor,
+                                *output_width,
+                                *output_height,
+                                *output_depth,
+                                output_pixels);
+  }
+  else {
+    /* TODO(sergey): Needs implementation. */
+  }
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_IMAGE_IMPL_H__ */
diff --git a/intern/cycles/util/list.h b/intern/cycles/util/list.h
new file mode 100644
index 00000000000..f555b001186
--- /dev/null
+++ b/intern/cycles/util/list.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_LIST_H__
+#define __UTIL_LIST_H__
+
+#include <list>
+
+CCL_NAMESPACE_BEGIN
+
+using std::list;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/log.cpp b/intern/cycles/util/log.cpp
new file mode 100644
index 00000000000..68a5a3f576f
--- /dev/null
+++ b/intern/cycles/util/log.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/log.h"
+
+#include "util/math.h"
+#include "util/string.h"
+
+#include <stdio.h>
+#ifdef _MSC_VER
+#  define snprintf _snprintf
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_CYCLES_LOGGING
+static bool is_verbosity_set()
+{
+  using CYCLES_GFLAGS_NAMESPACE::GetCommandLineOption;
+
+  std::string verbosity;
+  if (!GetCommandLineOption("v", &verbosity)) {
+    return false;
+  }
+  return verbosity != "0";
+}
+#endif
+
+void util_logging_init(const char *argv0)
+{
+#ifdef WITH_CYCLES_LOGGING
+  using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
+
+  google::InitGoogleLogging(argv0);
+  SetCommandLineOption("logtostderr", "1");
+  if (!is_verbosity_set()) {
+    SetCommandLineOption("v", "0");
+  }
+  SetCommandLineOption("stderrthreshold", "0");
+  SetCommandLineOption("minloglevel", "0");
+#else
+  (void)argv0;
+#endif
+}
+
+void util_logging_start()
+{
+#ifdef WITH_CYCLES_LOGGING
+  using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
+  SetCommandLineOption("logtostderr", "1");
+  if (!is_verbosity_set()) {
+    SetCommandLineOption("v", "2");
+  }
+  SetCommandLineOption("stderrthreshold", "0");
+  SetCommandLineOption("minloglevel", "0");
+#endif
+}
+
+void util_logging_verbosity_set(int verbosity)
+{
+#ifdef WITH_CYCLES_LOGGING
+  using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
+  char val[10];
+  snprintf(val, sizeof(val), "%d", verbosity);
+  SetCommandLineOption("v", val);
+#else
+  (void)verbosity;
+#endif
+}
+
+std::ostream &operator<<(std::ostream &os, const int2 &value)
+{
+  os << "(" << value.x << ", " << value.y << ")";
+  return os;
+}
+
+std::ostream &operator<<(std::ostream &os, const float3 &value)
+{
+  os << "(" << value.x << ", " << value.y << ", " << value.z << ")";
+  return os;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/log.h b/intern/cycles/util/log.h
new file mode 100644
index 00000000000..35c2d436d09
--- /dev/null
+++ b/intern/cycles/util/log.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_LOGGING_H__
+#define __UTIL_LOGGING_H__
+
+#if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__)
+#  include <gflags/gflags.h>
+#  include <glog/logging.h>
+#endif
+
+#include <iostream>
+
+CCL_NAMESPACE_BEGIN
+
+#if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__)
+class StubStream {
+ public:
+  template<class T> StubStream &operator<<(const T &)
+  {
+    return *this;
+  }
+};
+
+class LogMessageVoidify {
+ public:
+  LogMessageVoidify()
+  {
+  }
+  void operator&(const StubStream &)
+  {
+  }
+};
+
+#  define LOG_SUPPRESS() (true) ? ((void)0) : LogMessageVoidify() & StubStream()
+#  define LOG(severity) LOG_SUPPRESS()
+#  define VLOG(severity) LOG_SUPPRESS()
+#  define VLOG_IF(severity, condition) LOG_SUPPRESS()
+#  define VLOG_IS_ON(severity) false
+
+#  define CHECK(expression) LOG_SUPPRESS()
+
+#  define CHECK_NOTNULL(expression) LOG_SUPPRESS()
+#  define CHECK_NULL(expression) LOG_SUPPRESS()
+
+#  define CHECK_NEAR(actual, expected, eps) LOG_SUPPRESS()
+
+#  define CHECK_GE(a, b) LOG_SUPPRESS()
+#  define CHECK_NE(a, b) LOG_SUPPRESS()
+#  define CHECK_EQ(a, b) LOG_SUPPRESS()
+#  define CHECK_GT(a, b) LOG_SUPPRESS()
+#  define CHECK_LT(a, b) LOG_SUPPRESS()
+#  define CHECK_LE(a, b) LOG_SUPPRESS()
+
+#  define DCHECK(expression) LOG_SUPPRESS()
+
+#  define DCHECK_NOTNULL(expression) LOG_SUPPRESS()
+#  define DCHECK_NULL(expression) LOG_SUPPRESS()
+
+#  define DCHECK_NEAR(actual, expected, eps) LOG_SUPPRESS()
+
+#  define DCHECK_GE(a, b) LOG_SUPPRESS()
+#  define DCHECK_NE(a, b) LOG_SUPPRESS()
+#  define DCHECK_EQ(a, b) LOG_SUPPRESS()
+#  define DCHECK_GT(a, b) LOG_SUPPRESS()
+#  define DCHECK_LT(a, b) LOG_SUPPRESS()
+#  define DCHECK_LE(a, b) LOG_SUPPRESS()
+
+#  define LOG_ASSERT(expression) LOG_SUPPRESS()
+#endif
+
+#define VLOG_ONCE(level, flag) \
+  if (!flag) \
+  flag = true, VLOG(level)
+
+struct int2;
+struct float3;
+
+void util_logging_init(const char *argv0);
+void util_logging_start();
+void util_logging_verbosity_set(int verbosity);
+
+std::ostream &operator<<(std::ostream &os, const int2 &value);
+std::ostream &operator<<(std::ostream &os, const float3 &value);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_LOGGING_H__ */
diff --git a/intern/cycles/util/map.h b/intern/cycles/util/map.h
new file mode 100644
index 00000000000..f1b2522362f
--- /dev/null
+++ b/intern/cycles/util/map.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MAP_H__
+#define __UTIL_MAP_H__
+
+#include <map>
+#include <unordered_map>
+
+CCL_NAMESPACE_BEGIN
+
+using std::map;
+using std::pair;
+using std::unordered_map;
+using std::unordered_multimap;
+
+template<typename T> static void map_free_memory(T &data)
+{
+  /* Use swap() trick to actually free all internal memory. */
+  T empty_data;
+  data.swap(empty_data);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MAP_H__ */
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h
new file mode 100644
index 00000000000..e7fc492733f
--- /dev/null
+++ b/intern/cycles/util/math.h
@@ -0,0 +1,870 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_H__
+#define __UTIL_MATH_H__
+
+/* Math
+ *
+ * Basic math functions on scalar and vector types. This header is used by
+ * both the kernel code when compiled as C++, and other C++ non-kernel code. */
+
+#ifndef __KERNEL_GPU__
+#  include <cmath>
+#endif
+
+#ifdef __HIP__
+#  include <hip/hip_vector_types.h>
+#endif
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Float Pi variations */
+
+/* Division */
+#ifndef M_PI_F
+#  define M_PI_F (3.1415926535897932f) /* pi */
+#endif
+#ifndef M_PI_2_F
+#  define M_PI_2_F (1.5707963267948966f) /* pi/2 */
+#endif
+#ifndef M_PI_4_F
+#  define M_PI_4_F (0.7853981633974830f) /* pi/4 */
+#endif
+#ifndef M_1_PI_F
+#  define M_1_PI_F (0.3183098861837067f) /* 1/pi */
+#endif
+#ifndef M_2_PI_F
+#  define M_2_PI_F (0.6366197723675813f) /* 2/pi */
+#endif
+#ifndef M_1_2PI_F
+#  define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */
+#endif
+#ifndef M_SQRT_PI_8_F
+#  define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */
+#endif
+#ifndef M_LN_2PI_F
+#  define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */
+#endif
+
+/* Multiplication */
+#ifndef M_2PI_F
+#  define M_2PI_F (6.2831853071795864f) /* 2*pi */
+#endif
+#ifndef M_4PI_F
+#  define M_4PI_F (12.566370614359172f) /* 4*pi */
+#endif
+
+/* Float sqrt variations */
+#ifndef M_SQRT2_F
+#  define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
+#endif
+#ifndef M_LN2_F
+#  define M_LN2_F (0.6931471805599453f) /* ln(2) */
+#endif
+#ifndef M_LN10_F
+#  define M_LN10_F (2.3025850929940457f) /* ln(10) */
+#endif
+
+/* Scalar */
+
+#ifndef __HIP__
+#  ifdef _WIN32
+ccl_device_inline float fmaxf(float a, float b)
+{
+  return (a > b) ? a : b;
+}
+
+ccl_device_inline float fminf(float a, float b)
+{
+  return (a < b) ? a : b;
+}
+
+#  endif /* _WIN32 */
+#endif   /* __HIP__ */
+
+#ifndef __KERNEL_GPU__
+using std::isfinite;
+using std::isnan;
+using std::sqrt;
+
+ccl_device_inline int abs(int x)
+{
+  return (x > 0) ? x : -x;
+}
+
+ccl_device_inline int max(int a, int b)
+{
+  return (a > b) ? a : b;
+}
+
+ccl_device_inline int min(int a, int b)
+{
+  return (a < b) ? a : b;
+}
+
+ccl_device_inline uint min(uint a, uint b)
+{
+  return (a < b) ? a : b;
+}
+
+ccl_device_inline float max(float a, float b)
+{
+  return (a > b) ? a : b;
+}
+
+ccl_device_inline float min(float a, float b)
+{
+  return (a < b) ? a : b;
+}
+
+ccl_device_inline double max(double a, double b)
+{
+  return (a > b) ? a : b;
+}
+
+ccl_device_inline double min(double a, double b)
+{
+  return (a < b) ? a : b;
+}
+
+/* These 2 guys are templated for usage with registers data.
+ *
+ * NOTE: Since this is CPU-only functions it is ok to use references here.
+ * But for other devices we'll need to be careful about this.
+ */
+
+template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
+{
+  return min(min(a, b), min(c, d));
+}
+
+template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
+{
+  return max(max(a, b), max(c, d));
+}
+#endif /* __KERNEL_GPU__ */
+
+ccl_device_inline float min4(float a, float b, float c, float d)
+{
+  return min(min(a, b), min(c, d));
+}
+
+ccl_device_inline float max4(float a, float b, float c, float d)
+{
+  return max(max(a, b), max(c, d));
+}
+
+/* Int/Float conversion */
+
+ccl_device_inline int as_int(uint i)
+{
+  union {
+    uint ui;
+    int i;
+  } u;
+  u.ui = i;
+  return u.i;
+}
+
+ccl_device_inline uint as_uint(int i)
+{
+  union {
+    uint ui;
+    int i;
+  } u;
+  u.i = i;
+  return u.ui;
+}
+
+ccl_device_inline uint as_uint(float f)
+{
+  union {
+    uint i;
+    float f;
+  } u;
+  u.f = f;
+  return u.i;
+}
+
+#ifndef __HIP__
+ccl_device_inline int __float_as_int(float f)
+{
+  union {
+    int i;
+    float f;
+  } u;
+  u.f = f;
+  return u.i;
+}
+
+ccl_device_inline float __int_as_float(int i)
+{
+  union {
+    int i;
+    float f;
+  } u;
+  u.i = i;
+  return u.f;
+}
+
+ccl_device_inline uint __float_as_uint(float f)
+{
+  union {
+    uint i;
+    float f;
+  } u;
+  u.f = f;
+  return u.i;
+}
+
+ccl_device_inline float __uint_as_float(uint i)
+{
+  union {
+    uint i;
+    float f;
+  } u;
+  u.i = i;
+  return u.f;
+}
+#endif
+
+ccl_device_inline int4 __float4_as_int4(float4 f)
+{
+#ifdef __KERNEL_SSE__
+  return int4(_mm_castps_si128(f.m128));
+#else
+  return make_int4(
+      __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
+#endif
+}
+
+ccl_device_inline float4 __int4_as_float4(int4 i)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_castsi128_ps(i.m128));
+#else
+  return make_float4(
+      __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
+#endif
+}
+
+template<typename T> ccl_device_inline uint pointer_pack_to_uint_0(T *ptr)
+{
+  return ((uint64_t)ptr) & 0xFFFFFFFF;
+}
+
+template<typename T> ccl_device_inline uint pointer_pack_to_uint_1(T *ptr)
+{
+  return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF;
+}
+
+template<typename T> ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b)
+{
+  return (T *)(((uint64_t)b << 32) | a);
+}
+
+ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b)
+{
+  return (a << 16) | b;
+}
+
+ccl_device_inline uint uint16_unpack_from_uint_0(const uint i)
+{
+  return i >> 16;
+}
+
+ccl_device_inline uint uint16_unpack_from_uint_1(const uint i)
+{
+  return i & 0xFFFF;
+}
+
+/* Versions of functions which are safe for fast math. */
+ccl_device_inline bool isnan_safe(float f)
+{
+  unsigned int x = __float_as_uint(f);
+  return (x << 1) > 0xff000000u;
+}
+
+ccl_device_inline bool isfinite_safe(float f)
+{
+  /* By IEEE 754 rule, 2*Inf equals Inf */
+  unsigned int x = __float_as_uint(f);
+  return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u);
+}
+
+ccl_device_inline float ensure_finite(float v)
+{
+  return isfinite_safe(v) ? v : 0.0f;
+}
+
+ccl_device_inline int clamp(int a, int mn, int mx)
+{
+  return min(max(a, mn), mx);
+}
+
+ccl_device_inline float clamp(float a, float mn, float mx)
+{
+  return min(max(a, mn), mx);
+}
+
+ccl_device_inline float mix(float a, float b, float t)
+{
+  return a + t * (b - a);
+}
+
+ccl_device_inline float smoothstep(float edge0, float edge1, float x)
+{
+  float result;
+  if (x < edge0)
+    result = 0.0f;
+  else if (x >= edge1)
+    result = 1.0f;
+  else {
+    float t = (x - edge0) / (edge1 - edge0);
+    result = (3.0f - 2.0f * t) * (t * t);
+  }
+  return result;
+}
+
+#ifndef __KERNEL_CUDA__
+ccl_device_inline float saturate(float a)
+{
+  return clamp(a, 0.0f, 1.0f);
+}
+#endif /* __KERNEL_CUDA__ */
+
+ccl_device_inline int float_to_int(float f)
+{
+  return (int)f;
+}
+
+ccl_device_inline int floor_to_int(float f)
+{
+  return float_to_int(floorf(f));
+}
+
+ccl_device_inline int quick_floor_to_int(float x)
+{
+  return float_to_int(x) - ((x < 0) ? 1 : 0);
+}
+
+ccl_device_inline float floorfrac(float x, ccl_private int *i)
+{
+  *i = quick_floor_to_int(x);
+  return x - *i;
+}
+
+ccl_device_inline int ceil_to_int(float f)
+{
+  return float_to_int(ceilf(f));
+}
+
+ccl_device_inline float fractf(float x)
+{
+  return x - floorf(x);
+}
+
+/* Adapted from godot-engine math_funcs.h. */
+ccl_device_inline float wrapf(float value, float max, float min)
+{
+  float range = max - min;
+  return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min;
+}
+
+ccl_device_inline float pingpongf(float a, float b)
+{
+  return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f;
+}
+
+ccl_device_inline float smoothminf(float a, float b, float k)
+{
+  if (k != 0.0f) {
+    float h = fmaxf(k - fabsf(a - b), 0.0f) / k;
+    return fminf(a, b) - h * h * h * k * (1.0f / 6.0f);
+  }
+  else {
+    return fminf(a, b);
+  }
+}
+
+ccl_device_inline float signf(float f)
+{
+  return (f < 0.0f) ? -1.0f : 1.0f;
+}
+
+ccl_device_inline float nonzerof(float f, float eps)
+{
+  if (fabsf(f) < eps)
+    return signf(f) * eps;
+  else
+    return f;
+}
+
+/* `signum` function testing for zero. Matches GLSL and OSL functions. */
+ccl_device_inline float compatible_signf(float f)
+{
+  if (f == 0.0f) {
+    return 0.0f;
+  }
+  else {
+    return signf(f);
+  }
+}
+
+ccl_device_inline float smoothstepf(float f)
+{
+  float ff = f * f;
+  return (3.0f * ff - 2.0f * ff * f);
+}
+
+ccl_device_inline int mod(int x, int m)
+{
+  return (x % m + m) % m;
+}
+
+ccl_device_inline float3 float2_to_float3(const float2 a)
+{
+  return make_float3(a.x, a.y, 0.0f);
+}
+
+ccl_device_inline float3 float4_to_float3(const float4 a)
+{
+  return make_float3(a.x, a.y, a.z);
+}
+
+ccl_device_inline float4 float3_to_float4(const float3 a)
+{
+  return make_float4(a.x, a.y, a.z, 1.0f);
+}
+
+ccl_device_inline float inverse_lerp(float a, float b, float x)
+{
+  return (x - a) / (b - a);
+}
+
+/* Cubic interpolation between b and c, a and d are the previous and next point. */
+ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
+{
+  return 0.5f *
+             (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x +
+              (c - a)) *
+             x +
+         b;
+}
+
+CCL_NAMESPACE_END
+
+#include "util/math_int2.h"
+#include "util/math_int3.h"
+#include "util/math_int4.h"
+
+#include "util/math_float2.h"
+#include "util/math_float3.h"
+#include "util/math_float4.h"
+
+#include "util/rect.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Interpolation */
+
+template<class A, class B> A lerp(const A &a, const A &b, const B &t)
+{
+  return (A)(a * ((B)1 - t) + b * t);
+}
+
+/* Triangle */
+
+ccl_device_inline float triangle_area(ccl_private const float3 &v1,
+                                      ccl_private const float3 &v2,
+                                      ccl_private const float3 &v3)
+{
+  return len(cross(v3 - v2, v1 - v2)) * 0.5f;
+}
+
+/* Orthonormal vectors */
+
+ccl_device_inline void make_orthonormals(const float3 N,
+                                         ccl_private float3 *a,
+                                         ccl_private float3 *b)
+{
+#if 0
+  if (fabsf(N.y) >= 0.999f) {
+    *a = make_float3(1, 0, 0);
+    *b = make_float3(0, 0, 1);
+    return;
+  }
+  if (fabsf(N.z) >= 0.999f) {
+    *a = make_float3(1, 0, 0);
+    *b = make_float3(0, 1, 0);
+    return;
+  }
+#endif
+
+  if (N.x != N.y || N.x != N.z)
+    *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x);  //(1,1,1)x N
+  else
+    *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x);  //(-1,1,1)x N
+
+  *a = normalize(*a);
+  *b = cross(N, *a);
+}
+
+/* Color division */
+
+ccl_device_inline float3 safe_invert_color(float3 a)
+{
+  float x, y, z;
+
+  x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f;
+  y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f;
+  z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f;
+
+  return make_float3(x, y, z);
+}
+
+ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
+{
+  float x, y, z;
+
+  x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
+  y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
+  z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
+
+  return make_float3(x, y, z);
+}
+
+ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
+{
+  float x, y, z;
+
+  x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
+  y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
+  z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
+
+  /* try to get gray even if b is zero */
+  if (b.x == 0.0f) {
+    if (b.y == 0.0f) {
+      x = z;
+      y = z;
+    }
+    else if (b.z == 0.0f) {
+      x = y;
+      z = y;
+    }
+    else
+      x = 0.5f * (y + z);
+  }
+  else if (b.y == 0.0f) {
+    if (b.z == 0.0f) {
+      y = x;
+      z = x;
+    }
+    else
+      y = 0.5f * (x + z);
+  }
+  else if (b.z == 0.0f) {
+    z = 0.5f * (x + y);
+  }
+
+  return make_float3(x, y, z);
+}
+
+/* Rotation of point around axis and angle */
+
+ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
+{
+  float costheta = cosf(angle);
+  float sintheta = sinf(angle);
+  float3 r;
+
+  r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
+        (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
+        (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
+
+  r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
+        ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
+        (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
+
+  r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
+        (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
+        ((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
+
+  return r;
+}
+
+/* NaN-safe math ops */
+
+ccl_device_inline float safe_sqrtf(float f)
+{
+  return sqrtf(max(f, 0.0f));
+}
+
+ccl_device_inline float inversesqrtf(float f)
+{
+  return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f;
+}
+
+ccl_device float safe_asinf(float a)
+{
+  return asinf(clamp(a, -1.0f, 1.0f));
+}
+
+ccl_device float safe_acosf(float a)
+{
+  return acosf(clamp(a, -1.0f, 1.0f));
+}
+
+ccl_device float compatible_powf(float x, float y)
+{
+#ifdef __KERNEL_GPU__
+  if (y == 0.0f) /* x^0 -> 1, including 0^0 */
+    return 1.0f;
+
+  /* GPU pow doesn't accept negative x, do manual checks here */
+  if (x < 0.0f) {
+    if (fmodf(-y, 2.0f) == 0.0f)
+      return powf(-x, y);
+    else
+      return -powf(-x, y);
+  }
+  else if (x == 0.0f)
+    return 0.0f;
+#endif
+  return powf(x, y);
+}
+
+ccl_device float safe_powf(float a, float b)
+{
+  if (UNLIKELY(a < 0.0f && b != float_to_int(b)))
+    return 0.0f;
+
+  return compatible_powf(a, b);
+}
+
+ccl_device float safe_divide(float a, float b)
+{
+  return (b != 0.0f) ? a / b : 0.0f;
+}
+
+ccl_device float safe_logf(float a, float b)
+{
+  if (UNLIKELY(a <= 0.0f || b <= 0.0f))
+    return 0.0f;
+
+  return safe_divide(logf(a), logf(b));
+}
+
+ccl_device float safe_modulo(float a, float b)
+{
+  return (b != 0.0f) ? fmodf(a, b) : 0.0f;
+}
+
+ccl_device_inline float sqr(float a)
+{
+  return a * a;
+}
+
+ccl_device_inline float pow20(float a)
+{
+  return sqr(sqr(sqr(sqr(a)) * a));
+}
+
+ccl_device_inline float pow22(float a)
+{
+  return sqr(a * sqr(sqr(sqr(a)) * a));
+}
+
+ccl_device_inline float beta(float x, float y)
+{
+  return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
+}
+
+ccl_device_inline float xor_signmask(float x, int y)
+{
+  return __int_as_float(__float_as_int(x) ^ y);
+}
+
+ccl_device float bits_to_01(uint bits)
+{
+  return bits * (1.0f / (float)0xFFFFFFFF);
+}
+
+ccl_device_inline uint count_leading_zeros(uint x)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
+  return __clz(x);
+#else
+  assert(x != 0);
+#  ifdef _MSC_VER
+  unsigned long leading_zero = 0;
+  _BitScanReverse(&leading_zero, x);
+  return (31 - leading_zero);
+#  else
+  return __builtin_clz(x);
+#  endif
+#endif
+}
+
+ccl_device_inline uint count_trailing_zeros(uint x)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
+  return (__ffs(x) - 1);
+#else
+  assert(x != 0);
+#  ifdef _MSC_VER
+  unsigned long ctz = 0;
+  _BitScanForward(&ctz, x);
+  return ctz;
+#  else
+  return __builtin_ctz(x);
+#  endif
+#endif
+}
+
+ccl_device_inline uint find_first_set(uint x)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
+  return __ffs(x);
+#else
+#  ifdef _MSC_VER
+  return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
+#  else
+  return __builtin_ffs(x);
+#  endif
+#endif
+}
+
+/* projections */
+ccl_device_inline float2 map_to_tube(const float3 co)
+{
+  float len, u, v;
+  len = sqrtf(co.x * co.x + co.y * co.y);
+  if (len > 0.0f) {
+    u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
+    v = (co.z + 1.0f) * 0.5f;
+  }
+  else {
+    u = v = 0.0f;
+  }
+  return make_float2(u, v);
+}
+
+ccl_device_inline float2 map_to_sphere(const float3 co)
+{
+  float l = len(co);
+  float u, v;
+  if (l > 0.0f) {
+    if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
+      u = 0.0f; /* Otherwise domain error. */
+    }
+    else {
+      u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
+    }
+    v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
+  }
+  else {
+    u = v = 0.0f;
+  }
+  return make_float2(u, v);
+}
+
+/* Compares two floats.
+ * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
+ * or their relative difference is less than ulp_diff ULPs.
+ * Based on
+ * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+ */
+
+ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff)
+{
+  if (fabsf(a - b) < abs_diff) {
+    return true;
+  }
+
+  if ((a < 0.0f) != (b < 0.0f)) {
+    return false;
+  }
+
+  return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
+}
+
+/* Calculate the angle between the two vectors a and b.
+ * The usual approach `acos(dot(a, b))` has severe precision issues for small angles,
+ * which are avoided by this method.
+ * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
+ */
+ccl_device_inline float precise_angle(float3 a, float3 b)
+{
+  return 2.0f * atan2f(len(a - b), len(a + b));
+}
+
+/* Return value which is greater than the given one and is a power of two. */
+ccl_device_inline uint next_power_of_two(uint x)
+{
+  return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x));
+}
+
+/* Return value which is lower than the given one and is a power of two. */
+ccl_device_inline uint prev_power_of_two(uint x)
+{
+  return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1));
+}
+
+#ifndef __has_builtin
+#  define __has_builtin(v) 0
+#endif
+
+/* Reverses the bits of a 32 bit integer. */
+ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
+{
+  /* Use a native instruction if it exists. */
+#if defined(__arm__) || defined(__aarch64__)
+  __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
+  return x;
+#elif defined(__KERNEL_CUDA__)
+  return __brev(x);
+#elif __has_builtin(__builtin_bitreverse32)
+  return __builtin_bitreverse32(x);
+#else
+  /* Flip pairwise. */
+  x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
+  /* Flip pairs. */
+  x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
+  /* Flip nibbles. */
+  x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
+  /* Flip bytes. CPUs have an instruction for that, pretty fast one. */
+#  ifdef _MSC_VER
+  return _byteswap_ulong(x);
+#  elif defined(__INTEL_COMPILER)
+  return (uint32_t)_bswap((int)x);
+#  else
+  /* Assuming gcc or clang. */
+  return __builtin_bswap32(x);
+#  endif
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/math_cdf.cpp b/intern/cycles/util/math_cdf.cpp
new file mode 100644
index 00000000000..02c6646f824
--- /dev/null
+++ b/intern/cycles/util/math_cdf.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/math_cdf.h"
+
+#include "util/algorithm.h"
+#include "util/math.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Invert pre-calculated CDF function. */
+void util_cdf_invert(const int resolution,
+                     const float from,
+                     const float to,
+                     const vector<float> &cdf,
+                     const bool make_symmetric,
+                     vector<float> &inv_cdf)
+{
+  const float inv_resolution = 1.0f / (float)resolution;
+  const float range = to - from;
+  inv_cdf.resize(resolution);
+  if (make_symmetric) {
+    const int half_size = (resolution - 1) / 2;
+    for (int i = 0; i <= half_size; i++) {
+      float x = i / (float)half_size;
+      int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin();
+      float t;
+      if (index < cdf.size() - 1) {
+        t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]);
+      }
+      else {
+        t = 0.0f;
+        index = cdf.size() - 1;
+      }
+      float y = ((index + t) / (resolution - 1)) * (2.0f * range);
+      inv_cdf[half_size + i] = 0.5f * (1.0f + y);
+      inv_cdf[half_size - i] = 0.5f * (1.0f - y);
+    }
+  }
+  else {
+    for (int i = 0; i < resolution; i++) {
+      float x = from + range * (float)i * inv_resolution;
+      int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin();
+      float t;
+      if (index < cdf.size() - 1) {
+        t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]);
+      }
+      else {
+        t = 0.0f;
+        index = resolution;
+      }
+      inv_cdf[i] = (index + t) * inv_resolution;
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/math_cdf.h b/intern/cycles/util/math_cdf.h
new file mode 100644
index 00000000000..4c57dac4bbe
--- /dev/null
+++ b/intern/cycles/util/math_cdf.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_CDF_H__
+#define __UTIL_MATH_CDF_H__
+
+#include "util/algorithm.h"
+#include "util/math.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Evaluate CDF of a given functor with given range and resolution. */
+template<typename Functor>
+void util_cdf_evaluate(
+    const int resolution, const float from, const float to, Functor functor, vector<float> &cdf)
+{
+  const int cdf_count = resolution + 1;
+  const float range = to - from;
+  cdf.resize(cdf_count);
+  cdf[0] = 0.0f;
+  /* Actual CDF evaluation. */
+  for (int i = 0; i < resolution; ++i) {
+    float x = from + range * (float)i / (resolution - 1);
+    float y = functor(x);
+    cdf[i + 1] = cdf[i] + fabsf(y);
+  }
+  /* Normalize the CDF. */
+  for (int i = 0; i <= resolution; i++) {
+    cdf[i] /= cdf[resolution];
+  }
+}
+
+/* Invert pre-calculated CDF function. */
+void util_cdf_invert(const int resolution,
+                     const float from,
+                     const float to,
+                     const vector<float> &cdf,
+                     const bool make_symmetric,
+                     vector<float> &inv_cdf);
+
+/* Evaluate inverted CDF of a given functor with given range and resolution. */
+template<typename Functor>
+void util_cdf_inverted(const int resolution,
+                       const float from,
+                       const float to,
+                       Functor functor,
+                       const bool make_symmetric,
+                       vector<float> &inv_cdf)
+{
+  vector<float> cdf;
+  /* There is no much smartness going around lower resolution for the CDF table,
+   * this just to match the old code from pixel filter so it all stays exactly
+   * the same and no regression tests are failed.
+   */
+  util_cdf_evaluate(resolution - 1, from, to, functor, cdf);
+  util_cdf_invert(resolution, from, to, cdf, make_symmetric, inv_cdf);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_H_CDF__ */
diff --git a/intern/cycles/util/math_fast.h b/intern/cycles/util/math_fast.h
new file mode 100644
index 00000000000..cc924f36a71
--- /dev/null
+++ b/intern/cycles/util/math_fast.h
@@ -0,0 +1,652 @@
+/*
+ * Adapted from OpenImageIO library with this license:
+ *
+ * Copyright 2008-2014 Larry Gritz and the other authors and contributors.
+ * All Rights Reserved.
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * * Neither the name of the software's owners nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * (This is the Modified BSD License)
+ *
+ * A few bits here are based upon code from NVIDIA that was also released
+ * under the same modified BSD license, and marked as:
+ *    Copyright 2004 NVIDIA Corporation. All Rights Reserved.
+ *
+ * Some parts of this file were first open-sourced in Open Shading Language,
+ * then later moved here. The original copyright notice was:
+ *    Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al.
+ *
+ * Many of the math functions were copied from or inspired by other
+ * public domain sources or open source packages with compatible licenses.
+ * The individual functions give references were applicable.
+ */
+
+#ifndef __UTIL_FAST_MATH__
+#define __UTIL_FAST_MATH__
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline float madd(const float a, const float b, const float c)
+{
+  /* NOTE: In the future we may want to explicitly ask for a fused
+   * multiply-add in a specialized version for float.
+   *
+   * NOTE: GCC/ICC will turn this (for float) into a FMA unless
+   * explicitly asked not to, clang seems to leave the code alone.
+   */
+  return a * b + c;
+}
+
+ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c)
+{
+  return a * b + c;
+}
+
+/*
+ * FAST & APPROXIMATE MATH
+ *
+ * The functions named "fast_*" provide a set of replacements to libm that
+ * are much faster at the expense of some accuracy and robust handling of
+ * extreme values. One design goal for these approximation was to avoid
+ * branches as much as possible and operate on single precision values only
+ * so that SIMD versions should be straightforward ports We also try to
+ * implement "safe" semantics (ie: clamp to valid range where possible)
+ * natively since wrapping these inline calls in another layer would be
+ * wasteful.
+ *
+ * Some functions are fast_safe_*, which is both a faster approximation as
+ * well as clamped input domain to ensure no NaN, Inf, or divide by zero.
+ */
+
+/* Round to nearest integer, returning as an int. */
+ccl_device_inline int fast_rint(float x)
+{
+  /* used by sin/cos/tan range reduction. */
+#ifdef __KERNEL_SSE4__
+  /* Single `roundps` instruction on SSE4.1+ (for gcc/clang at least). */
+  return float_to_int(rintf(x));
+#else
+  /* emulate rounding by adding/subtracting 0.5. */
+  return float_to_int(x + copysignf(0.5f, x));
+#endif
+}
+
+ccl_device float fast_sinf(float x)
+{
+  /* Very accurate argument reduction from SLEEF,
+   * starts failing around x=262000
+   *
+   * Results on: [-2pi,2pi].
+   *
+   * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp,
+   * 1.19209e-07 max error
+   */
+  int q = fast_rint(x * M_1_PI_F);
+  float qf = (float)q;
+  x = madd(qf, -0.78515625f * 4, x);
+  x = madd(qf, -0.00024187564849853515625f * 4, x);
+  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
+  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
+  x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */
+  float s = x * x;
+  if ((q & 1) != 0)
+    x = -x;
+  /* This polynomial approximation has very low error on [-pi/2,+pi/2]
+   * 1.19209e-07 max error in total over [-2pi,+2pi]. */
+  float u = 2.6083159809786593541503e-06f;
+  u = madd(u, s, -0.0001981069071916863322258f);
+  u = madd(u, s, +0.00833307858556509017944336f);
+  u = madd(u, s, -0.166666597127914428710938f);
+  u = madd(s, u * x, x);
+  /* For large x, the argument reduction can fail and the polynomial can be
+   * evaluated with arguments outside the valid internal. Just clamp the bad
+   * values away (setting to 0.0f means no branches need to be generated). */
+  if (fabsf(u) > 1.0f) {
+    u = 0.0f;
+  }
+  return u;
+}
+
+ccl_device float fast_cosf(float x)
+{
+  /* Same argument reduction as fast_sinf(). */
+  int q = fast_rint(x * M_1_PI_F);
+  float qf = (float)q;
+  x = madd(qf, -0.78515625f * 4, x);
+  x = madd(qf, -0.00024187564849853515625f * 4, x);
+  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
+  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
+  x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */
+  float s = x * x;
+  /* Polynomial from SLEEF's sincosf, max error is
+   * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */
+  float u = -2.71811842367242206819355e-07f;
+  u = madd(u, s, +2.47990446951007470488548e-05f);
+  u = madd(u, s, -0.00138888787478208541870117f);
+  u = madd(u, s, +0.0416666641831398010253906f);
+  u = madd(u, s, -0.5f);
+  u = madd(u, s, +1.0f);
+  if ((q & 1) != 0) {
+    u = -u;
+  }
+  if (fabsf(u) > 1.0f) {
+    u = 0.0f;
+  }
+  return u;
+}
+
+ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
+{
+  /* Same argument reduction as fast_sin. */
+  int q = fast_rint(x * M_1_PI_F);
+  float qf = (float)q;
+  x = madd(qf, -0.78515625f * 4, x);
+  x = madd(qf, -0.00024187564849853515625f * 4, x);
+  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
+  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
+  x = M_PI_2_F - (M_PI_2_F - x);  // crush denormals
+  float s = x * x;
+  /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */
+  if ((q & 1) != 0) {
+    x = -x;
+  }
+  float su = 2.6083159809786593541503e-06f;
+  su = madd(su, s, -0.0001981069071916863322258f);
+  su = madd(su, s, +0.00833307858556509017944336f);
+  su = madd(su, s, -0.166666597127914428710938f);
+  su = madd(s, su * x, x);
+  float cu = -2.71811842367242206819355e-07f;
+  cu = madd(cu, s, +2.47990446951007470488548e-05f);
+  cu = madd(cu, s, -0.00138888787478208541870117f);
+  cu = madd(cu, s, +0.0416666641831398010253906f);
+  cu = madd(cu, s, -0.5f);
+  cu = madd(cu, s, +1.0f);
+  if ((q & 1) != 0) {
+    cu = -cu;
+  }
+  if (fabsf(su) > 1.0f) {
+    su = 0.0f;
+  }
+  if (fabsf(cu) > 1.0f) {
+    cu = 0.0f;
+  }
+  *sine = su;
+  *cosine = cu;
+}
+
+/* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts
+ * becoming really poor outside of this range because the reciprocal amplifies
+ * errors.
+ */
+ccl_device float fast_tanf(float x)
+{
+  /* Derived from SLEEF implementation.
+   *
+   * Note that we cannot apply the "denormal crush" trick everywhere because
+   * we sometimes need to take the reciprocal of the polynomial
+   */
+  int q = fast_rint(x * 2.0f * M_1_PI_F);
+  float qf = (float)q;
+  x = madd(qf, -0.78515625f * 2, x);
+  x = madd(qf, -0.00024187564849853515625f * 2, x);
+  x = madd(qf, -3.7747668102383613586e-08f * 2, x);
+  x = madd(qf, -1.2816720341285448015e-12f * 2, x);
+  if ((q & 1) == 0) {
+    /* Crush denormals (only if we aren't inverting the result later). */
+    x = M_PI_4_F - (M_PI_4_F - x);
+  }
+  float s = x * x;
+  float u = 0.00927245803177356719970703f;
+  u = madd(u, s, 0.00331984995864331722259521f);
+  u = madd(u, s, 0.0242998078465461730957031f);
+  u = madd(u, s, 0.0534495301544666290283203f);
+  u = madd(u, s, 0.133383005857467651367188f);
+  u = madd(u, s, 0.333331853151321411132812f);
+  u = madd(s, u * x, x);
+  if ((q & 1) != 0) {
+    u = -1.0f / u;
+  }
+  return u;
+}
+
+/* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611.
+ *
+ * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773
+ */
+ccl_device float fast_sinpif(float x)
+{
+  /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */
+  const float z = x - ((x + 25165824.0f) - 25165824.0f);
+  const float y = z - z * fabsf(z);
+  const float Q = 3.10396624f;
+  const float P = 3.584135056f; /* P = 16-4*Q */
+  return y * (Q + P * fabsf(y));
+
+  /* The original article used inferior constants for Q and P and
+   * so had max error 1.091e-3.
+   *
+   * The optimal value for Q was determined by exhaustive search, minimizing
+   * the absolute numerical error relative to float(std::sin(double(phi*M_PI)))
+   * over the interval [0,2] (which is where most of the invocations happen).
+   *
+   * The basic idea of this approximation starts with the coarse approximation:
+   *      sin(pi*x) ~= f(x) =  4 * (x - x * abs(x))
+   *
+   * This approximation always _over_ estimates the target. On the other hand,
+   * the curve:
+   *      sin(pi*x) ~= f(x) * abs(f(x)) / 4
+   *
+   * always lies _under_ the target. Thus we can simply numerically search for
+   * the optimal constant to LERP these curves into a more precise
+   * approximation.
+   *
+   * After folding the constants together and simplifying the resulting math,
+   * we end up with the compact implementation above.
+   *
+   * NOTE: this function actually computes sin(x * pi) which avoids one or two
+   * mults in many cases and guarantees exact values at integer periods.
+   */
+}
+
+/* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */
+ccl_device_inline float fast_cospif(float x)
+{
+  return fast_sinpif(x + 0.5f);
+}
+
+ccl_device float fast_acosf(float x)
+{
+  const float f = fabsf(x);
+  /* clamp and crush denormals. */
+  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
+  /* Based on http://www.pouet.net/topic.php?which=9132&page=2
+   * 85% accurate (ulp 0)
+   * Examined 2130706434 values of acos:
+   *   15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
+   * Examined 2130706434 values of acos:
+   *   15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
+   */
+  const float a = sqrtf(1.0f - m) *
+                  (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
+  return x < 0 ? M_PI_F - a : a;
+}
+
+ccl_device float fast_asinf(float x)
+{
+  /* Based on acosf approximation above.
+   * Max error is 4.51133e-05 (ulps are higher because we are consistently off
+   * by a little amount).
+   */
+  const float f = fabsf(x);
+  /* Clamp and crush denormals. */
+  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
+  const float a = M_PI_2_F -
+                  sqrtf(1.0f - m) * (1.5707963267f +
+                                     m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
+  return copysignf(a, x);
+}
+
+ccl_device float fast_atanf(float x)
+{
+  const float a = fabsf(x);
+  const float k = a > 1.0f ? 1 / a : a;
+  const float s = 1.0f - (1.0f - k); /* Crush denormals. */
+  const float t = s * s;
+  /* http://mathforum.org/library/drmath/view/62672.html
+   * Examined 4278190080 values of atan:
+   *   2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error      // (with  denormals)
+   * Examined 4278190080 values of atan:
+   *   171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
+   */
+  float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
+  if (a > 1.0f) {
+    r = M_PI_2_F - r;
+  }
+  return copysignf(r, x);
+}
+
+ccl_device float fast_atan2f(float y, float x)
+{
+  /* Based on atan approximation above.
+   *
+   * The special cases around 0 and infinity were tested explicitly.
+   *
+   * The only case not handled correctly is x=NaN,y=0 which returns 0 instead
+   * of nan.
+   */
+  const float a = fabsf(x);
+  const float b = fabsf(y);
+
+  const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a));
+  const float s = 1.0f - (1.0f - k); /* Crush denormals */
+  const float t = s * s;
+
+  float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
+
+  if (b > a) {
+    /* Account for arg reduction. */
+    r = M_PI_2_F - r;
+  }
+  /* Test sign bit of x. */
+  if (__float_as_uint(x) & 0x80000000u) {
+    r = M_PI_F - r;
+  }
+  return copysignf(r, y);
+}
+
+/* Based on:
+ *
+ *   https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h
+ */
+ccl_device float fast_log2f(float x)
+{
+  /* NOTE: clamp to avoid special cases and make result "safe" from large
+   * negative values/NAN's. */
+  x = clamp(x, FLT_MIN, FLT_MAX);
+  unsigned bits = __float_as_uint(x);
+  int exponent = (int)(bits >> 23) - 127;
+  float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
+  /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
+   * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error.
+   * ulp histogram:
+   *  0  = 97.46%
+   *  1  =  2.29%
+   *  2  =  0.11%
+   */
+  float f2 = f * f;
+  float f4 = f2 * f2;
+  float hi = madd(f, -0.00931049621349f, 0.05206469089414f);
+  float lo = madd(f, 0.47868480909345f, -0.72116591947498f);
+  hi = madd(f, hi, -0.13753123777116f);
+  hi = madd(f, hi, 0.24187369696082f);
+  hi = madd(f, hi, -0.34730547155299f);
+  lo = madd(f, lo, 1.442689881667200f);
+  return ((f4 * hi) + (f * lo)) + exponent;
+}
+
+ccl_device_inline float fast_logf(float x)
+{
+  /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]:
+   * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error.
+   */
+  return fast_log2f(x) * M_LN2_F;
+}
+
+ccl_device_inline float fast_log10(float x)
+{
+  /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]:
+   * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error.
+   */
+  return fast_log2f(x) * M_LN2_F / M_LN10_F;
+}
+
+ccl_device float fast_logb(float x)
+{
+  /* Don't bother with denormals. */
+  x = fabsf(x);
+  x = clamp(x, FLT_MIN, FLT_MAX);
+  unsigned bits = __float_as_uint(x);
+  return (float)((int)(bits >> 23) - 127);
+}
+
+ccl_device float fast_exp2f(float x)
+{
+  /* Clamp to safe range for final addition. */
+  x = clamp(x, -126.0f, 126.0f);
+  /* Range reduction. */
+  int m = (int)x;
+  x -= m;
+  x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */
+  /* 5th degree polynomial generated with sollya
+   * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff,
+   * 232 max ulp.
+   *
+   * ulp histogram:
+   *  0  = 87.81%
+   *  1  =  4.18%
+   */
+  float r = 1.33336498402e-3f;
+  r = madd(x, r, 9.810352697968e-3f);
+  r = madd(x, r, 5.551834031939e-2f);
+  r = madd(x, r, 0.2401793301105f);
+  r = madd(x, r, 0.693144857883f);
+  r = madd(x, r, 1.0f);
+  /* Multiply by 2 ^ m by adding in the exponent. */
+  /* NOTE: left-shift of negative number is undefined behavior. */
+  return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23));
+}
+
+ccl_device_inline float fast_expf(float x)
+{
+  /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]:
+   * 2.6666452 avg ulp diff, 230 max ulp.
+   */
+  return fast_exp2f(x / M_LN2_F);
+}
+
+#if defined(__KERNEL_CPU__) && !defined(_MSC_VER)
+/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see
+ * T78047 and T78869 for details. Just disable for now, it only makes
+ * a small difference in denoising performance. */
+ccl_device float4 fast_exp2f4(float4 x)
+{
+  const float4 one = make_float4(1.0f);
+  const float4 limit = make_float4(126.0f);
+  x = clamp(x, -limit, limit);
+  int4 m = make_int4(x);
+  x = one - (one - (x - make_float4(m)));
+  float4 r = make_float4(1.33336498402e-3f);
+  r = madd4(x, r, make_float4(9.810352697968e-3f));
+  r = madd4(x, r, make_float4(5.551834031939e-2f));
+  r = madd4(x, r, make_float4(0.2401793301105f));
+  r = madd4(x, r, make_float4(0.693144857883f));
+  r = madd4(x, r, make_float4(1.0f));
+  return __int4_as_float4(__float4_as_int4(r) + (m << 23));
+}
+
+ccl_device_inline float4 fast_expf4(float4 x)
+{
+  return fast_exp2f4(x / M_LN2_F);
+}
+#else
+ccl_device_inline float4 fast_expf4(float4 x)
+{
+  return make_float4(fast_expf(x.x), fast_expf(x.y), fast_expf(x.z), fast_expf(x.w));
+}
+#endif
+
+ccl_device_inline float fast_exp10(float x)
+{
+  /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
+   * 2.71732409 avg ulp diff, 232 max ulp.
+   */
+  return fast_exp2f(x * M_LN10_F / M_LN2_F);
+}
+
+ccl_device_inline float fast_expm1f(float x)
+{
+  if (fabsf(x) < 1e-5f) {
+    x = 1.0f - (1.0f - x); /* Crush denormals. */
+    return madd(0.5f, x * x, x);
+  }
+  else {
+    return fast_expf(x) - 1.0f;
+  }
+}
+
+ccl_device float fast_sinhf(float x)
+{
+  float a = fabsf(x);
+  if (a > 1.0f) {
+    /* Examined 53389559 values of sinh on [1,87.3300018]:
+     * 33.6886442 avg ulp diff, 178 max ulp. */
+    float e = fast_expf(a);
+    return copysignf(0.5f * e - 0.5f / e, x);
+  }
+  else {
+    a = 1.0f - (1.0f - a); /* Crush denorms. */
+    float a2 = a * a;
+    /* Degree 7 polynomial generated with sollya. */
+    /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */
+    float r = 2.03945513931e-4f;
+    r = madd(r, a2, 8.32990277558e-3f);
+    r = madd(r, a2, 0.1666673421859f);
+    r = madd(r * a, a2, a);
+    return copysignf(r, x);
+  }
+}
+
+ccl_device_inline float fast_coshf(float x)
+{
+  /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]:
+   * 1.78256726 avg ulp diff, 178 max ulp.
+   */
+  float e = fast_expf(fabsf(x));
+  return 0.5f * e + 0.5f / e;
+}
+
+ccl_device_inline float fast_tanhf(float x)
+{
+  /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]:
+   * 3.12924e-06 max error.
+   */
+  /* NOTE: ulp error is high because of sub-optimal handling around the origin. */
+  float e = fast_expf(2.0f * fabsf(x));
+  return copysignf(1.0f - 2.0f / (1.0f + e), x);
+}
+
+ccl_device float fast_safe_powf(float x, float y)
+{
+  if (y == 0)
+    return 1.0f; /* x^1=1 */
+  if (x == 0)
+    return 0.0f; /* 0^y=0 */
+  float sign = 1.0f;
+  if (x < 0.0f) {
+    /* if x is negative, only deal with integer powers
+     * powf returns NaN for non-integers, we will return 0 instead.
+     */
+    int ybits = __float_as_int(y) & 0x7fffffff;
+    if (ybits >= 0x4b800000) {
+      // always even int, keep positive
+    }
+    else if (ybits >= 0x3f800000) {
+      /* Bigger than 1, check. */
+      int k = (ybits >> 23) - 127;    /* Get exponent. */
+      int j = ybits >> (23 - k);      /* Shift out possible fractional bits. */
+      if ((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */
+        /* +1 for even, -1 for odd. */
+        sign = __int_as_float(0x3f800000 | (j << 31));
+      }
+      else {
+        /* Not an integer. */
+        return 0.0f;
+      }
+    }
+    else {
+      /* Not an integer. */
+      return 0.0f;
+    }
+  }
+  return sign * fast_exp2f(y * fast_log2f(fabsf(x)));
+}
+
+/* TODO(sergey): Check speed  with our erf functions implementation from
+ * bsdf_microfacet.h.
+ */
+
+ccl_device_inline float fast_erff(float x)
+{
+  /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */
+  /* Abramowitz and Stegun, 7.1.28. */
+  const float a1 = 0.0705230784f;
+  const float a2 = 0.0422820123f;
+  const float a3 = 0.0092705272f;
+  const float a4 = 0.0001520143f;
+  const float a5 = 0.0002765672f;
+  const float a6 = 0.0000430638f;
+  const float a = fabsf(x);
+  if (a >= 12.3f) {
+    return copysignf(1.0f, x);
+  }
+  const float b = 1.0f - (1.0f - a); /* Crush denormals. */
+  const float r = madd(
+      madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f);
+  const float s = r * r; /* ^2 */
+  const float t = s * s; /* ^4 */
+  const float u = t * t; /* ^8 */
+  const float v = u * u; /* ^16 */
+  return copysignf(1.0f - 1.0f / v, x);
+}
+
+ccl_device_inline float fast_erfcf(float x)
+{
+  /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error.
+   *
+   * ulp histogram:
+   *
+   *  0  = 80.30%
+   */
+  return 1.0f - fast_erff(x);
+}
+
+ccl_device_inline float fast_ierff(float x)
+{
+  /* From: Approximating the `erfinv` function by Mike Giles. */
+  /* To avoid trouble at the limit, clamp input to 1-eps. */
+  float a = fabsf(x);
+  if (a > 0.99999994f) {
+    a = 0.99999994f;
+  }
+  float w = -fast_logf((1.0f - a) * (1.0f + a)), p;
+  if (w < 5.0f) {
+    w = w - 2.5f;
+    p = 2.81022636e-08f;
+    p = madd(p, w, 3.43273939e-07f);
+    p = madd(p, w, -3.5233877e-06f);
+    p = madd(p, w, -4.39150654e-06f);
+    p = madd(p, w, 0.00021858087f);
+    p = madd(p, w, -0.00125372503f);
+    p = madd(p, w, -0.00417768164f);
+    p = madd(p, w, 0.246640727f);
+    p = madd(p, w, 1.50140941f);
+  }
+  else {
+    w = sqrtf(w) - 3.0f;
+    p = -0.000200214257f;
+    p = madd(p, w, 0.000100950558f);
+    p = madd(p, w, 0.00134934322f);
+    p = madd(p, w, -0.00367342844f);
+    p = madd(p, w, 0.00573950773f);
+    p = madd(p, w, -0.0076224613f);
+    p = madd(p, w, 0.00943887047f);
+    p = madd(p, w, 1.00167406f);
+    p = madd(p, w, 2.83297682f);
+  }
+  return p * x;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_FAST_MATH__ */
diff --git a/intern/cycles/util/math_float2.h b/intern/cycles/util/math_float2.h
new file mode 100644
index 00000000000..87141d5bc37
--- /dev/null
+++ b/intern/cycles/util/math_float2.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_FLOAT2_H__
+#define __UTIL_MATH_FLOAT2_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+ccl_device_inline float2 operator-(const float2 &a);
+ccl_device_inline float2 operator*(const float2 &a, const float2 &b);
+ccl_device_inline float2 operator*(const float2 &a, float f);
+ccl_device_inline float2 operator*(float f, const float2 &a);
+ccl_device_inline float2 operator/(float f, const float2 &a);
+ccl_device_inline float2 operator/(const float2 &a, float f);
+ccl_device_inline float2 operator/(const float2 &a, const float2 &b);
+ccl_device_inline float2 operator+(const float2 &a, const float f);
+ccl_device_inline float2 operator+(const float2 &a, const float2 &b);
+ccl_device_inline float2 operator-(const float2 &a, const float f);
+ccl_device_inline float2 operator-(const float2 &a, const float2 &b);
+ccl_device_inline float2 operator+=(float2 &a, const float2 &b);
+ccl_device_inline float2 operator*=(float2 &a, const float2 &b);
+ccl_device_inline float2 operator*=(float2 &a, float f);
+ccl_device_inline float2 operator/=(float2 &a, const float2 &b);
+ccl_device_inline float2 operator/=(float2 &a, float f);
+
+ccl_device_inline bool operator==(const float2 &a, const float2 &b);
+ccl_device_inline bool operator!=(const float2 &a, const float2 &b);
+
+ccl_device_inline bool is_zero(const float2 &a);
+ccl_device_inline float average(const float2 &a);
+ccl_device_inline float distance(const float2 &a, const float2 &b);
+ccl_device_inline float dot(const float2 &a, const float2 &b);
+ccl_device_inline float cross(const float2 &a, const float2 &b);
+ccl_device_inline float len(const float2 &a);
+ccl_device_inline float2 normalize(const float2 &a);
+ccl_device_inline float2 normalize_len(const float2 &a, float *t);
+ccl_device_inline float2 safe_normalize(const float2 &a);
+ccl_device_inline float2 min(const float2 &a, const float2 &b);
+ccl_device_inline float2 max(const float2 &a, const float2 &b);
+ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx);
+ccl_device_inline float2 fabs(const float2 &a);
+ccl_device_inline float2 as_float2(const float4 &a);
+ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t);
+ccl_device_inline float2 floor(const float2 &a);
+
+ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b);
+
+/*******************************************************************************
+ * Definition.
+ */
+
+ccl_device_inline float2 zero_float2()
+{
+  return make_float2(0.0f, 0.0f);
+}
+
+ccl_device_inline float2 one_float2()
+{
+  return make_float2(1.0f, 1.0f);
+}
+
+ccl_device_inline float2 operator-(const float2 &a)
+{
+  return make_float2(-a.x, -a.y);
+}
+
+ccl_device_inline float2 operator*(const float2 &a, const float2 &b)
+{
+  return make_float2(a.x * b.x, a.y * b.y);
+}
+
+ccl_device_inline float2 operator*(const float2 &a, float f)
+{
+  return make_float2(a.x * f, a.y * f);
+}
+
+ccl_device_inline float2 operator*(float f, const float2 &a)
+{
+  return make_float2(a.x * f, a.y * f);
+}
+
+ccl_device_inline float2 operator/(float f, const float2 &a)
+{
+  return make_float2(f / a.x, f / a.y);
+}
+
+ccl_device_inline float2 operator/(const float2 &a, float f)
+{
+  float invf = 1.0f / f;
+  return make_float2(a.x * invf, a.y * invf);
+}
+
+ccl_device_inline float2 operator/(const float2 &a, const float2 &b)
+{
+  return make_float2(a.x / b.x, a.y / b.y);
+}
+
+ccl_device_inline float2 operator+(const float2 &a, const float f)
+{
+  return a + make_float2(f, f);
+}
+
+ccl_device_inline float2 operator+(const float2 &a, const float2 &b)
+{
+  return make_float2(a.x + b.x, a.y + b.y);
+}
+
+ccl_device_inline float2 operator-(const float2 &a, const float f)
+{
+  return a - make_float2(f, f);
+}
+
+ccl_device_inline float2 operator-(const float2 &a, const float2 &b)
+{
+  return make_float2(a.x - b.x, a.y - b.y);
+}
+
+ccl_device_inline float2 operator+=(float2 &a, const float2 &b)
+{
+  return a = a + b;
+}
+
+ccl_device_inline float2 operator*=(float2 &a, const float2 &b)
+{
+  return a = a * b;
+}
+
+ccl_device_inline float2 operator*=(float2 &a, float f)
+{
+  return a = a * f;
+}
+
+ccl_device_inline float2 operator/=(float2 &a, const float2 &b)
+{
+  return a = a / b;
+}
+
+ccl_device_inline float2 operator/=(float2 &a, float f)
+{
+  float invf = 1.0f / f;
+  return a = a * invf;
+}
+
+ccl_device_inline bool operator==(const float2 &a, const float2 &b)
+{
+  return (a.x == b.x && a.y == b.y);
+}
+
+ccl_device_inline bool operator!=(const float2 &a, const float2 &b)
+{
+  return !(a == b);
+}
+
+ccl_device_inline bool is_zero(const float2 &a)
+{
+  return (a.x == 0.0f && a.y == 0.0f);
+}
+
+ccl_device_inline float average(const float2 &a)
+{
+  return (a.x + a.y) * (1.0f / 2.0f);
+}
+
+ccl_device_inline float distance(const float2 &a, const float2 &b)
+{
+  return len(a - b);
+}
+
+ccl_device_inline float dot(const float2 &a, const float2 &b)
+{
+  return a.x * b.x + a.y * b.y;
+}
+
+ccl_device_inline float cross(const float2 &a, const float2 &b)
+{
+  return (a.x * b.y - a.y * b.x);
+}
+
+ccl_device_inline float len(const float2 &a)
+{
+  return sqrtf(dot(a, a));
+}
+
+ccl_device_inline float2 normalize(const float2 &a)
+{
+  return a / len(a);
+}
+
+ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t)
+{
+  *t = len(a);
+  return a / (*t);
+}
+
+ccl_device_inline float2 safe_normalize(const float2 &a)
+{
+  float t = len(a);
+  return (t != 0.0f) ? a / t : a;
+}
+
+ccl_device_inline float2 min(const float2 &a, const float2 &b)
+{
+  return make_float2(min(a.x, b.x), min(a.y, b.y));
+}
+
+ccl_device_inline float2 max(const float2 &a, const float2 &b)
+{
+  return make_float2(max(a.x, b.x), max(a.y, b.y));
+}
+
+ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx)
+{
+  return min(max(a, mn), mx);
+}
+
+ccl_device_inline float2 fabs(const float2 &a)
+{
+  return make_float2(fabsf(a.x), fabsf(a.y));
+}
+
+ccl_device_inline float2 as_float2(const float4 &a)
+{
+  return make_float2(a.x, a.y);
+}
+
+ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t)
+{
+  return a + t * (b - a);
+}
+
+ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t)
+{
+  return a + t * (b - a);
+}
+
+ccl_device_inline float2 floor(const float2 &a)
+{
+  return make_float2(floorf(a.x), floorf(a.y));
+}
+
+ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
+{
+  return (b != 0.0f) ? a / b : zero_float2();
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT2_H__ */
diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h
new file mode 100644
index 00000000000..e780d7e0a7c
--- /dev/null
+++ b/intern/cycles/util/math_float3.h
@@ -0,0 +1,530 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_FLOAT3_H__
+#define __UTIL_MATH_FLOAT3_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+ccl_device_inline float3 operator-(const float3 &a);
+ccl_device_inline float3 operator*(const float3 &a, const float3 &b);
+ccl_device_inline float3 operator*(const float3 &a, const float f);
+ccl_device_inline float3 operator*(const float f, const float3 &a);
+ccl_device_inline float3 operator/(const float f, const float3 &a);
+ccl_device_inline float3 operator/(const float3 &a, const float f);
+ccl_device_inline float3 operator/(const float3 &a, const float3 &b);
+ccl_device_inline float3 operator+(const float3 &a, const float f);
+ccl_device_inline float3 operator+(const float3 &a, const float3 &b);
+ccl_device_inline float3 operator-(const float3 &a, const float f);
+ccl_device_inline float3 operator-(const float3 &a, const float3 &b);
+ccl_device_inline float3 operator+=(float3 &a, const float3 &b);
+ccl_device_inline float3 operator-=(float3 &a, const float3 &b);
+ccl_device_inline float3 operator*=(float3 &a, const float3 &b);
+ccl_device_inline float3 operator*=(float3 &a, float f);
+ccl_device_inline float3 operator/=(float3 &a, const float3 &b);
+ccl_device_inline float3 operator/=(float3 &a, float f);
+
+ccl_device_inline bool operator==(const float3 &a, const float3 &b);
+ccl_device_inline bool operator!=(const float3 &a, const float3 &b);
+
+ccl_device_inline float distance(const float3 &a, const float3 &b);
+ccl_device_inline float dot(const float3 &a, const float3 &b);
+ccl_device_inline float dot_xy(const float3 &a, const float3 &b);
+ccl_device_inline float3 cross(const float3 &a, const float3 &b);
+ccl_device_inline float3 normalize(const float3 &a);
+ccl_device_inline float3 min(const float3 &a, const float3 &b);
+ccl_device_inline float3 max(const float3 &a, const float3 &b);
+ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx);
+ccl_device_inline float3 fabs(const float3 &a);
+ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t);
+ccl_device_inline float3 rcp(const float3 &a);
+ccl_device_inline float3 sqrt(const float3 &a);
+ccl_device_inline float3 floor(const float3 &a);
+ccl_device_inline float3 ceil(const float3 &a);
+
+ccl_device_inline float min3(float3 a);
+ccl_device_inline float max3(float3 a);
+ccl_device_inline float len(const float3 a);
+ccl_device_inline float len_squared(const float3 a);
+
+ccl_device_inline float3 reflect(const float3 incident, const float3 normal);
+ccl_device_inline float3 project(const float3 v, const float3 v_proj);
+
+ccl_device_inline float3 saturate3(float3 a);
+ccl_device_inline float3 safe_normalize(const float3 a);
+ccl_device_inline float3 normalize_len(const float3 a, float *t);
+ccl_device_inline float3 safe_normalize_len(const float3 a, float *t);
+ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b);
+ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b);
+ccl_device_inline float3 interp(float3 a, float3 b, float t);
+ccl_device_inline float3 sqr3(float3 a);
+
+ccl_device_inline bool is_zero(const float3 a);
+ccl_device_inline float reduce_add(const float3 a);
+ccl_device_inline float average(const float3 a);
+ccl_device_inline bool isequal_float3(const float3 a, const float3 b);
+
+/*******************************************************************************
+ * Definition.
+ */
+
+ccl_device_inline float3 zero_float3()
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_setzero_ps());
+#else
+  return make_float3(0.0f, 0.0f, 0.0f);
+#endif
+}
+
+ccl_device_inline float3 one_float3()
+{
+  return make_float3(1.0f, 1.0f, 1.0f);
+}
+
+ccl_device_inline float3 operator-(const float3 &a)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
+#else
+  return make_float3(-a.x, -a.y, -a.z);
+#endif
+}
+
+ccl_device_inline float3 operator*(const float3 &a, const float3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_mul_ps(a.m128, b.m128));
+#else
+  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
+#endif
+}
+
+ccl_device_inline float3 operator*(const float3 &a, const float f)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
+#else
+  return make_float3(a.x * f, a.y * f, a.z * f);
+#endif
+}
+
+ccl_device_inline float3 operator*(const float f, const float3 &a)
+{
+#if defined(__KERNEL_SSE__)
+  return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
+#else
+  return make_float3(a.x * f, a.y * f, a.z * f);
+#endif
+}
+
+ccl_device_inline float3 operator/(const float f, const float3 &a)
+{
+#if defined(__KERNEL_SSE__)
+  return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
+#else
+  return make_float3(f / a.x, f / a.y, f / a.z);
+#endif
+}
+
+ccl_device_inline float3 operator/(const float3 &a, const float f)
+{
+  float invf = 1.0f / f;
+  return a * invf;
+}
+
+ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
+{
+#if defined(__KERNEL_SSE__)
+  return float3(_mm_div_ps(a.m128, b.m128));
+#else
+  return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
+#endif
+}
+
+ccl_device_inline float3 operator+(const float3 &a, const float f)
+{
+  return a + make_float3(f, f, f);
+}
+
+ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_add_ps(a.m128, b.m128));
+#else
+  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+#endif
+}
+
+ccl_device_inline float3 operator-(const float3 &a, const float f)
+{
+  return a - make_float3(f, f, f);
+}
+
+ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_sub_ps(a.m128, b.m128));
+#else
+  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+#endif
+}
+
+ccl_device_inline float3 operator+=(float3 &a, const float3 &b)
+{
+  return a = a + b;
+}
+
+ccl_device_inline float3 operator-=(float3 &a, const float3 &b)
+{
+  return a = a - b;
+}
+
+ccl_device_inline float3 operator*=(float3 &a, const float3 &b)
+{
+  return a = a * b;
+}
+
+ccl_device_inline float3 operator*=(float3 &a, float f)
+{
+  return a = a * f;
+}
+
+ccl_device_inline float3 operator/=(float3 &a, const float3 &b)
+{
+  return a = a / b;
+}
+
+ccl_device_inline float3 operator/=(float3 &a, float f)
+{
+  float invf = 1.0f / f;
+  return a = a * invf;
+}
+
+ccl_device_inline bool operator==(const float3 &a, const float3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
+#else
+  return (a.x == b.x && a.y == b.y && a.z == b.z);
+#endif
+}
+
+ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
+{
+  return !(a == b);
+}
+
+ccl_device_inline float distance(const float3 &a, const float3 &b)
+{
+  return len(a - b);
+}
+
+ccl_device_inline float dot(const float3 &a, const float3 &b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+  return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
+#else
+  return a.x * b.x + a.y * b.y + a.z * b.z;
+#endif
+}
+
+ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+  return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b));
+#else
+  return a.x * b.x + a.y * b.y;
+#endif
+}
+
+ccl_device_inline float3 cross(const float3 &a, const float3 &b)
+{
+  float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+  return r;
+}
+
+ccl_device_inline float3 normalize(const float3 &a)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+  __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
+  return float3(_mm_div_ps(a.m128, norm));
+#else
+  return a / len(a);
+#endif
+}
+
+ccl_device_inline float3 min(const float3 &a, const float3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_min_ps(a.m128, b.m128));
+#else
+  return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+#endif
+}
+
+ccl_device_inline float3 max(const float3 &a, const float3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_max_ps(a.m128, b.m128));
+#else
+  return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+#endif
+}
+
+ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx)
+{
+  return min(max(a, mn), mx);
+}
+
+ccl_device_inline float3 fabs(const float3 &a)
+{
+#ifdef __KERNEL_SSE__
+#  ifdef __KERNEL_NEON__
+  return float3(vabsq_f32(a.m128));
+#  else
+  __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+  return float3(_mm_and_ps(a.m128, mask));
+#  endif
+#else
+  return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
+#endif
+}
+
+ccl_device_inline float3 sqrt(const float3 &a)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_sqrt_ps(a));
+#else
+  return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z));
+#endif
+}
+
+ccl_device_inline float3 floor(const float3 &a)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_floor_ps(a));
+#else
+  return make_float3(floorf(a.x), floorf(a.y), floorf(a.z));
+#endif
+}
+
+ccl_device_inline float3 ceil(const float3 &a)
+{
+#ifdef __KERNEL_SSE__
+  return float3(_mm_ceil_ps(a));
+#else
+  return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z));
+#endif
+}
+
+ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
+{
+  return a + t * (b - a);
+}
+
+ccl_device_inline float3 rcp(const float3 &a)
+{
+#ifdef __KERNEL_SSE__
+  /* Don't use _mm_rcp_ps due to poor precision. */
+  return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
+#else
+  return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
+#endif
+}
+
+ccl_device_inline float min3(float3 a)
+{
+  return min(min(a.x, a.y), a.z);
+}
+
+ccl_device_inline float max3(float3 a)
+{
+  return max(max(a.x, a.y), a.z);
+}
+
+ccl_device_inline float len(const float3 a)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+  return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
+#else
+  return sqrtf(dot(a, a));
+#endif
+}
+
+ccl_device_inline float len_squared(const float3 a)
+{
+  return dot(a, a);
+}
+
+ccl_device_inline float3 reflect(const float3 incident, const float3 normal)
+{
+  float3 unit_normal = normalize(normal);
+  return incident - 2.0f * unit_normal * dot(incident, unit_normal);
+}
+
+ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta)
+{
+  float k = 1.0f - eta * eta * (1.0f - dot(normal, incident) * dot(normal, incident));
+  if (k < 0.0f)
+    return zero_float3();
+  else
+    return eta * incident - (eta * dot(normal, incident) + sqrt(k)) * normal;
+}
+
+ccl_device_inline float3 faceforward(const float3 vector,
+                                     const float3 incident,
+                                     const float3 reference)
+{
+  return (dot(reference, incident) < 0.0f) ? vector : -vector;
+}
+
+ccl_device_inline float3 project(const float3 v, const float3 v_proj)
+{
+  float len_squared = dot(v_proj, v_proj);
+  return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj : zero_float3();
+}
+
+ccl_device_inline float3 saturate3(float3 a)
+{
+  return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
+}
+
+ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)
+{
+  *t = len(a);
+  float x = 1.0f / *t;
+  return a * x;
+}
+
+ccl_device_inline float3 safe_normalize(const float3 a)
+{
+  float t = len(a);
+  return (t != 0.0f) ? a * (1.0f / t) : a;
+}
+
+ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t)
+{
+  *t = len(a);
+  return (*t != 0.0f) ? a / (*t) : a;
+}
+
+ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b)
+{
+  return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f,
+                     (b.y != 0.0f) ? a.y / b.y : 0.0f,
+                     (b.z != 0.0f) ? a.z / b.z : 0.0f);
+}
+
+ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b)
+{
+  return (b != 0.0f) ? a / b : zero_float3();
+}
+
+ccl_device_inline float3 interp(float3 a, float3 b, float t)
+{
+  return a + t * (b - a);
+}
+
+ccl_device_inline float3 sqr3(float3 a)
+{
+  return a * a;
+}
+
+ccl_device_inline bool is_zero(const float3 a)
+{
+#ifdef __KERNEL_SSE__
+  return a == make_float3(0.0f);
+#else
+  return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
+#endif
+}
+
+ccl_device_inline float reduce_add(const float3 a)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__)
+  __m128 t = a.m128;
+  t[3] = 0.0f;
+  return vaddvq_f32(t);
+#else
+  return (a.x + a.y + a.z);
+#endif
+}
+
+ccl_device_inline float average(const float3 a)
+{
+  return reduce_add(a) * (1.0f / 3.0f);
+}
+
+ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
+{
+  return a == b;
+}
+
+ccl_device_inline float3 pow3(float3 v, float e)
+{
+  return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e));
+}
+
+ccl_device_inline float3 exp3(float3 v)
+{
+  return make_float3(expf(v.x), expf(v.y), expf(v.z));
+}
+
+ccl_device_inline float3 log3(float3 v)
+{
+  return make_float3(logf(v.x), logf(v.y), logf(v.z));
+}
+
+ccl_device_inline int3 quick_floor_to_int3(const float3 a)
+{
+#ifdef __KERNEL_SSE__
+  int3 b = int3(_mm_cvttps_epi32(a.m128));
+  int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f))));
+  /* Unsaturated add 0xffffffff is the same as subtract -1. */
+  return b + isneg;
+#else
+  return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z));
+#endif
+}
+
+ccl_device_inline bool isfinite3_safe(float3 v)
+{
+  return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);
+}
+
+ccl_device_inline float3 ensure_finite3(float3 v)
+{
+  if (!isfinite_safe(v.x))
+    v.x = 0.0f;
+  if (!isfinite_safe(v.y))
+    v.y = 0.0f;
+  if (!isfinite_safe(v.z))
+    v.z = 0.0f;
+  return v;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT3_H__ */
diff --git a/intern/cycles/util/math_float4.h b/intern/cycles/util/math_float4.h
new file mode 100644
index 00000000000..c76959ee7ff
--- /dev/null
+++ b/intern/cycles/util/math_float4.h
@@ -0,0 +1,536 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_FLOAT4_H__
+#define __UTIL_MATH_FLOAT4_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+ccl_device_inline float4 operator-(const float4 &a);
+ccl_device_inline float4 operator*(const float4 &a, const float4 &b);
+ccl_device_inline float4 operator*(const float4 &a, float f);
+ccl_device_inline float4 operator*(float f, const float4 &a);
+ccl_device_inline float4 operator/(const float4 &a, float f);
+ccl_device_inline float4 operator/(const float4 &a, const float4 &b);
+ccl_device_inline float4 operator+(const float4 &a, const float f);
+ccl_device_inline float4 operator+(const float4 &a, const float4 &b);
+ccl_device_inline float4 operator-(const float4 &a, const float f);
+ccl_device_inline float4 operator-(const float4 &a, const float4 &b);
+ccl_device_inline float4 operator+=(float4 &a, const float4 &b);
+ccl_device_inline float4 operator*=(float4 &a, const float4 &b);
+ccl_device_inline float4 operator*=(float4 &a, float f);
+ccl_device_inline float4 operator/=(float4 &a, float f);
+
+ccl_device_inline int4 operator<(const float4 &a, const float4 &b);
+ccl_device_inline int4 operator>=(const float4 &a, const float4 &b);
+ccl_device_inline int4 operator<=(const float4 &a, const float4 &b);
+ccl_device_inline bool operator==(const float4 &a, const float4 &b);
+
+ccl_device_inline float distance(const float4 &a, const float4 &b);
+ccl_device_inline float dot(const float4 &a, const float4 &b);
+ccl_device_inline float len_squared(const float4 &a);
+ccl_device_inline float4 rcp(const float4 &a);
+ccl_device_inline float4 sqrt(const float4 &a);
+ccl_device_inline float4 sqr(const float4 &a);
+ccl_device_inline float4 cross(const float4 &a, const float4 &b);
+ccl_device_inline bool is_zero(const float4 &a);
+ccl_device_inline float average(const float4 &a);
+ccl_device_inline float len(const float4 &a);
+ccl_device_inline float4 normalize(const float4 &a);
+ccl_device_inline float4 safe_normalize(const float4 &a);
+ccl_device_inline float4 min(const float4 &a, const float4 &b);
+ccl_device_inline float4 max(const float4 &a, const float4 &b);
+ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx);
+ccl_device_inline float4 fabs(const float4 &a);
+ccl_device_inline float4 floor(const float4 &a);
+ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t);
+
+ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b);
+
+#ifdef __KERNEL_SSE__
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
+__forceinline const float4 shuffle(const float4 &b);
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
+__forceinline const float4 shuffle(const float4 &a, const float4 &b);
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b);
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b);
+template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b);
+
+#  ifdef __KERNEL_SSE3__
+template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b);
+template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b);
+#  endif
+#endif /* __KERNEL_SSE__ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b);
+ccl_device_inline float4 reduce_min(const float4 &a);
+ccl_device_inline float4 reduce_max(const float4 &a);
+ccl_device_inline float4 reduce_add(const float4 &a);
+#endif /* !__KERNEL_GPU__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+ccl_device_inline float4 zero_float4()
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_setzero_ps());
+#else
+  return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+#endif
+}
+
+ccl_device_inline float4 one_float4()
+{
+  return make_float4(1.0f, 1.0f, 1.0f, 1.0f);
+}
+
+ccl_device_inline float4 operator-(const float4 &a)
+{
+#ifdef __KERNEL_SSE__
+  __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+  return float4(_mm_xor_ps(a.m128, mask));
+#else
+  return make_float4(-a.x, -a.y, -a.z, -a.w);
+#endif
+}
+
+ccl_device_inline float4 operator*(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_mul_ps(a.m128, b.m128));
+#else
+  return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
+#endif
+}
+
+ccl_device_inline float4 operator*(const float4 &a, float f)
+{
+#if defined(__KERNEL_SSE__)
+  return a * make_float4(f);
+#else
+  return make_float4(a.x * f, a.y * f, a.z * f, a.w * f);
+#endif
+}
+
+ccl_device_inline float4 operator*(float f, const float4 &a)
+{
+  return a * f;
+}
+
+ccl_device_inline float4 operator/(const float4 &a, float f)
+{
+  return a * (1.0f / f);
+}
+
+ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_div_ps(a.m128, b.m128));
+#else
+  return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
+#endif
+}
+
+ccl_device_inline float4 operator+(const float4 &a, const float f)
+{
+  return a + make_float4(f, f, f, f);
+}
+
+ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_add_ps(a.m128, b.m128));
+#else
+  return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+#endif
+}
+
+ccl_device_inline float4 operator-(const float4 &a, const float f)
+{
+  return a - make_float4(f, f, f, f);
+}
+
+ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_sub_ps(a.m128, b.m128));
+#else
+  return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+#endif
+}
+
+ccl_device_inline float4 operator+=(float4 &a, const float4 &b)
+{
+  return a = a + b;
+}
+
+ccl_device_inline float4 operator-=(float4 &a, const float4 &b)
+{
+  return a = a - b;
+}
+
+ccl_device_inline float4 operator*=(float4 &a, const float4 &b)
+{
+  return a = a * b;
+}
+
+ccl_device_inline float4 operator*=(float4 &a, float f)
+{
+  return a = a * f;
+}
+
+ccl_device_inline float4 operator/=(float4 &a, float f)
+{
+  return a = a / f;
+}
+
+ccl_device_inline int4 operator<(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128)));
+#else
+  return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
+#endif
+}
+
+ccl_device_inline int4 operator>=(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128)));
+#else
+  return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#endif
+}
+
+ccl_device_inline int4 operator<=(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128)));
+#else
+  return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
+#endif
+}
+
+ccl_device_inline bool operator==(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
+#else
+  return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
+#endif
+}
+
+ccl_device_inline float distance(const float4 &a, const float4 &b)
+{
+  return len(a - b);
+}
+
+ccl_device_inline float dot(const float4 &a, const float4 &b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#  if defined(__KERNEL_NEON__)
+  __m128 t = vmulq_f32(a, b);
+  return vaddvq_f32(t);
+#  else
+  return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
+#  endif
+#else
+  return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+#endif
+}
+
+ccl_device_inline float len_squared(const float4 &a)
+{
+  return dot(a, a);
+}
+
+ccl_device_inline float4 rcp(const float4 &a)
+{
+#ifdef __KERNEL_SSE__
+  /* Don't use _mm_rcp_ps due to poor precision. */
+  return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
+#else
+  return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
+#endif
+}
+
+ccl_device_inline float4 sqrt(const float4 &a)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_sqrt_ps(a.m128));
+#else
+  return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
+#endif
+}
+
+ccl_device_inline float4 sqr(const float4 &a)
+{
+  return a * a;
+}
+
+ccl_device_inline float4 cross(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) -
+         (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b));
+#else
+  return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f);
+#endif
+}
+
+ccl_device_inline bool is_zero(const float4 &a)
+{
+#ifdef __KERNEL_SSE__
+  return a == make_float4(0.0f);
+#else
+  return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+#endif
+}
+
+ccl_device_inline float4 reduce_add(const float4 &a)
+{
+#if defined(__KERNEL_SSE__)
+#  if defined(__KERNEL_NEON__)
+  return float4(vdupq_n_f32(vaddvq_f32(a)));
+#  elif defined(__KERNEL_SSE3__)
+  float4 h(_mm_hadd_ps(a.m128, a.m128));
+  return float4(_mm_hadd_ps(h.m128, h.m128));
+#  else
+  float4 h(shuffle<1, 0, 3, 2>(a) + a);
+  return shuffle<2, 3, 0, 1>(h) + h;
+#  endif
+#else
+  float sum = (a.x + a.y) + (a.z + a.w);
+  return make_float4(sum, sum, sum, sum);
+#endif
+}
+
+ccl_device_inline float average(const float4 &a)
+{
+  return reduce_add(a).x * 0.25f;
+}
+
+ccl_device_inline float len(const float4 &a)
+{
+  return sqrtf(dot(a, a));
+}
+
+ccl_device_inline float4 normalize(const float4 &a)
+{
+  return a / len(a);
+}
+
+ccl_device_inline float4 safe_normalize(const float4 &a)
+{
+  float t = len(a);
+  return (t != 0.0f) ? a / t : a;
+}
+
+ccl_device_inline float4 min(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_min_ps(a.m128, b.m128));
+#else
+  return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+#endif
+}
+
+ccl_device_inline float4 max(const float4 &a, const float4 &b)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_max_ps(a.m128, b.m128));
+#else
+  return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+#endif
+}
+
+ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx)
+{
+  return min(max(a, mn), mx);
+}
+
+ccl_device_inline float4 fabs(const float4 &a)
+{
+#if defined(__KERNEL_SSE__)
+#  if defined(__KERNEL_NEON__)
+  return float4(vabsq_f32(a));
+#  else
+  return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
+#  endif
+#else
+  return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+#endif
+}
+
+ccl_device_inline float4 floor(const float4 &a)
+{
+#ifdef __KERNEL_SSE__
+  return float4(_mm_floor_ps(a));
+#else
+  return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
+#endif
+}
+
+ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
+{
+  return a + t * (b - a);
+}
+
+#ifdef __KERNEL_SSE__
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
+__forceinline const float4 shuffle(const float4 &b)
+{
+#  if defined(__KERNEL_NEON__)
+  return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(b.m128));
+#  else
+  return float4(_mm_castsi128_ps(
+      _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0))));
+#  endif
+}
+
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
+__forceinline const float4 shuffle(const float4 &a, const float4 &b)
+{
+#  if defined(__KERNEL_NEON__)
+  return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(a.m128, b.m128));
+#  else
+  return float4(_mm_shuffle_ps(a.m128, b.m128, _MM_SHUFFLE(index_3, index_2, index_1, index_0)));
+#  endif
+}
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b)
+{
+  return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))));
+}
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b)
+{
+  return float4(_mm_movelh_ps(a.m128, b.m128));
+}
+
+template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b)
+{
+  return float4(_mm_movehl_ps(b.m128, a.m128));
+}
+
+#  ifdef __KERNEL_SSE3__
+template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b)
+{
+  return float4(_mm_moveldup_ps(b));
+}
+
+template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b)
+{
+  return float4(_mm_movehdup_ps(b));
+}
+#  endif /* __KERNEL_SSE3__ */
+#endif   /* __KERNEL_SSE__ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b)
+{
+#  ifdef __KERNEL_SSE__
+  return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128)));
+#  else
+  return make_float4(
+      (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
+#  endif
+}
+
+ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
+{
+  /* Replace elements of x with zero where mask isn't set. */
+  return select(mask, a, make_float4(0.0f));
+}
+
+ccl_device_inline float4 reduce_min(const float4 &a)
+{
+#  if defined(__KERNEL_SSE__)
+#    if defined(__KERNEL_NEON__)
+  return float4(vdupq_n_f32(vminvq_f32(a)));
+#    else
+  float4 h = min(shuffle<1, 0, 3, 2>(a), a);
+  return min(shuffle<2, 3, 0, 1>(h), h);
+#    endif
+#  else
+  return make_float4(min(min(a.x, a.y), min(a.z, a.w)));
+#  endif
+}
+
+ccl_device_inline float4 reduce_max(const float4 &a)
+{
+#  if defined(__KERNEL_SSE__)
+#    if defined(__KERNEL_NEON__)
+  return float4(vdupq_n_f32(vmaxvq_f32(a)));
+#    else
+  float4 h = max(shuffle<1, 0, 3, 2>(a), a);
+  return max(shuffle<2, 3, 0, 1>(h), h);
+#    endif
+#  else
+  return make_float4(max(max(a.x, a.y), max(a.z, a.w)));
+#  endif
+}
+
+ccl_device_inline float4 load_float4(ccl_private const float *v)
+{
+#  ifdef __KERNEL_SSE__
+  return float4(_mm_loadu_ps(v));
+#  else
+  return make_float4(v[0], v[1], v[2], v[3]);
+#  endif
+}
+
+#endif /* !__KERNEL_GPU__ */
+
+ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b)
+{
+  return (b != 0.0f) ? a / b : zero_float4();
+}
+
+ccl_device_inline bool isfinite4_safe(float4 v)
+{
+  return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z) && isfinite_safe(v.w);
+}
+
+ccl_device_inline float4 ensure_finite4(float4 v)
+{
+  if (!isfinite_safe(v.x))
+    v.x = 0.0f;
+  if (!isfinite_safe(v.y))
+    v.y = 0.0f;
+  if (!isfinite_safe(v.z))
+    v.z = 0.0f;
+  if (!isfinite_safe(v.w))
+    v.w = 0.0f;
+  return v;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT4_H__ */
diff --git a/intern/cycles/util/math_int2.h b/intern/cycles/util/math_int2.h
new file mode 100644
index 00000000000..5b04be92152
--- /dev/null
+++ b/intern/cycles/util/math_int2.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INT2_H__
+#define __UTIL_MATH_INT2_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+ccl_device_inline bool operator==(const int2 a, const int2 b);
+ccl_device_inline int2 operator+(const int2 &a, const int2 &b);
+ccl_device_inline int2 operator+=(int2 &a, const int2 &b);
+ccl_device_inline int2 operator-(const int2 &a, const int2 &b);
+ccl_device_inline int2 operator*(const int2 &a, const int2 &b);
+ccl_device_inline int2 operator/(const int2 &a, const int2 &b);
+
+/*******************************************************************************
+ * Definition.
+ */
+
+ccl_device_inline bool operator==(const int2 a, const int2 b)
+{
+  return (a.x == b.x && a.y == b.y);
+}
+
+ccl_device_inline int2 operator+(const int2 &a, const int2 &b)
+{
+  return make_int2(a.x + b.x, a.y + b.y);
+}
+
+ccl_device_inline int2 operator+=(int2 &a, const int2 &b)
+{
+  return a = a + b;
+}
+
+ccl_device_inline int2 operator-(const int2 &a, const int2 &b)
+{
+  return make_int2(a.x - b.x, a.y - b.y);
+}
+
+ccl_device_inline int2 operator*(const int2 &a, const int2 &b)
+{
+  return make_int2(a.x * b.x, a.y * b.y);
+}
+
+ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
+{
+  return make_int2(a.x / b.x, a.y / b.y);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT2_H__ */
diff --git a/intern/cycles/util/math_int3.h b/intern/cycles/util/math_int3.h
new file mode 100644
index 00000000000..128f2cb53b8
--- /dev/null
+++ b/intern/cycles/util/math_int3.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INT3_H__
+#define __UTIL_MATH_INT3_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+ccl_device_inline int3 min(int3 a, int3 b);
+ccl_device_inline int3 max(int3 a, int3 b);
+ccl_device_inline int3 clamp(const int3 &a, int mn, int mx);
+ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx);
+
+/*******************************************************************************
+ * Definition.
+ */
+
+ccl_device_inline int3 min(int3 a, int3 b)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+  return int3(_mm_min_epi32(a.m128, b.m128));
+#else
+  return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+#endif
+}
+
+ccl_device_inline int3 max(int3 a, int3 b)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+  return int3(_mm_max_epi32(a.m128, b.m128));
+#else
+  return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+#endif
+}
+
+ccl_device_inline int3 clamp(const int3 &a, int mn, int mx)
+{
+#ifdef __KERNEL_SSE__
+  return min(max(a, make_int3(mn)), make_int3(mx));
+#else
+  return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
+#endif
+}
+
+ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx)
+{
+#ifdef __KERNEL_SSE__
+  return min(max(a, mn), make_int3(mx));
+#else
+  return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
+#endif
+}
+
+ccl_device_inline bool operator==(const int3 &a, const int3 &b)
+{
+  return a.x == b.x && a.y == b.y && a.z == b.z;
+}
+
+ccl_device_inline bool operator!=(const int3 &a, const int3 &b)
+{
+  return !(a == b);
+}
+
+ccl_device_inline bool operator<(const int3 &a, const int3 &b)
+{
+  return a.x < b.x && a.y < b.y && a.z < b.z;
+}
+
+ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return int3(_mm_add_epi32(a.m128, b.m128));
+#else
+  return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+#endif
+}
+
+ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
+{
+#ifdef __KERNEL_SSE__
+  return int3(_mm_sub_epi32(a.m128, b.m128));
+#else
+  return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT3_H__ */
diff --git a/intern/cycles/util/math_int4.h b/intern/cycles/util/math_int4.h
new file mode 100644
index 00000000000..9e3f001efc2
--- /dev/null
+++ b/intern/cycles/util/math_int4.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INT4_H__
+#define __UTIL_MATH_INT4_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline int4 operator+(const int4 &a, const int4 &b);
+ccl_device_inline int4 operator+=(int4 &a, const int4 &b);
+ccl_device_inline int4 operator>>(const int4 &a, int i);
+ccl_device_inline int4 operator<<(const int4 &a, int i);
+ccl_device_inline int4 operator<(const int4 &a, const int4 &b);
+ccl_device_inline int4 operator>=(const int4 &a, const int4 &b);
+ccl_device_inline int4 operator&(const int4 &a, const int4 &b);
+ccl_device_inline int4 min(int4 a, int4 b);
+ccl_device_inline int4 max(int4 a, int4 b);
+ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx);
+ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b);
+#endif /* __KERNEL_GPU__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline int4 operator+(const int4 &a, const int4 &b)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_add_epi32(a.m128, b.m128));
+#  else
+  return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+#  endif
+}
+
+ccl_device_inline int4 operator+=(int4 &a, const int4 &b)
+{
+  return a = a + b;
+}
+
+ccl_device_inline int4 operator>>(const int4 &a, int i)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_srai_epi32(a.m128, i));
+#  else
+  return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
+#  endif
+}
+
+ccl_device_inline int4 operator<<(const int4 &a, int i)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_slli_epi32(a.m128, i));
+#  else
+  return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
+#  endif
+}
+
+ccl_device_inline int4 operator<(const int4 &a, const int4 &b)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_cmplt_epi32(a.m128, b.m128));
+#  else
+  return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
+#  endif
+}
+
+ccl_device_inline int4 operator>=(const int4 &a, const int4 &b)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
+#  else
+  return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#  endif
+}
+
+ccl_device_inline int4 operator&(const int4 &a, const int4 &b)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_and_si128(a.m128, b.m128));
+#  else
+  return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
+#  endif
+}
+
+ccl_device_inline int4 min(int4 a, int4 b)
+{
+#  if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+  return int4(_mm_min_epi32(a.m128, b.m128));
+#  else
+  return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+#  endif
+}
+
+ccl_device_inline int4 max(int4 a, int4 b)
+{
+#  if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+  return int4(_mm_max_epi32(a.m128, b.m128));
+#  else
+  return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+#  endif
+}
+
+ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx)
+{
+  return min(max(a, mn), mx);
+}
+
+ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b)
+{
+#  ifdef __KERNEL_SSE__
+  const __m128 m = _mm_cvtepi32_ps(mask);
+  /* TODO(sergey): avoid cvt. */
+  return int4(_mm_castps_si128(
+      _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b)))));
+#  else
+  return make_int4(
+      (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
+#  endif
+}
+
+ccl_device_inline int4 load_int4(const int *v)
+{
+#  ifdef __KERNEL_SSE__
+  return int4(_mm_loadu_si128((__m128i *)v));
+#  else
+  return make_int4(v[0], v[1], v[2], v[3]);
+#  endif
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT4_H__ */
diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h
new file mode 100644
index 00000000000..0c431a36afb
--- /dev/null
+++ b/intern/cycles/util/math_intersect.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INTERSECT_H__
+#define __UTIL_MATH_INTERSECT_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Ray Intersection */
+
+ccl_device bool ray_sphere_intersect(float3 ray_P,
+                                     float3 ray_D,
+                                     float ray_t,
+                                     float3 sphere_P,
+                                     float sphere_radius,
+                                     ccl_private float3 *isect_P,
+                                     ccl_private float *isect_t)
+{
+  const float3 d = sphere_P - ray_P;
+  const float radiussq = sphere_radius * sphere_radius;
+  const float tsq = dot(d, d);
+
+  if (tsq > radiussq) {
+    /* Ray origin outside sphere. */
+    const float tp = dot(d, ray_D);
+    if (tp < 0.0f) {
+      /* Ray  points away from sphere. */
+      return false;
+    }
+    const float dsq = tsq - tp * tp; /* Pythagoras. */
+    if (dsq > radiussq) {
+      /* Closest point on ray outside sphere. */
+      return false;
+    }
+    const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */
+    if (t < ray_t) {
+      *isect_t = t;
+      *isect_P = ray_P + ray_D * t;
+      return true;
+    }
+  }
+  return false;
+}
+
+ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
+                                           float3 ray_D,
+                                           float ray_t,
+                                           float3 disk_P,
+                                           float disk_radius,
+                                           ccl_private float3 *isect_P,
+                                           ccl_private float *isect_t)
+{
+  /* Aligned disk normal. */
+  float disk_t;
+  const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
+  const float div = dot(ray_D, disk_N);
+  if (UNLIKELY(div == 0.0f)) {
+    return false;
+  }
+  /* Compute t to intersection point. */
+  const float t = -disk_t / div;
+  if (t < 0.0f || t > ray_t) {
+    return false;
+  }
+  /* Test if within radius. */
+  float3 P = ray_P + ray_D * t;
+  if (len_squared(P - disk_P) > disk_radius * disk_radius) {
+    return false;
+  }
+  *isect_P = P;
+  *isect_t = t;
+  return true;
+}
+
+ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
+                                                   float3 ray_dir,
+                                                   float ray_t,
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+                                                   const ssef *ssef_verts,
+#else
+                                                   const float3 tri_a,
+                                                   const float3 tri_b,
+                                                   const float3 tri_c,
+#endif
+                                                   ccl_private float *isect_u,
+                                                   ccl_private float *isect_v,
+                                                   ccl_private float *isect_t)
+{
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  typedef ssef float3;
+  const float3 tri_a(ssef_verts[0]);
+  const float3 tri_b(ssef_verts[1]);
+  const float3 tri_c(ssef_verts[2]);
+  const float3 P(ray_P);
+  const float3 dir(ray_dir);
+#else
+#  define dot3(a, b) dot(a, b)
+  const float3 P = ray_P;
+  const float3 dir = ray_dir;
+#endif
+
+  /* Calculate vertices relative to ray origin. */
+  const float3 v0 = tri_c - P;
+  const float3 v1 = tri_a - P;
+  const float3 v2 = tri_b - P;
+
+  /* Calculate triangle edges. */
+  const float3 e0 = v2 - v0;
+  const float3 e1 = v0 - v1;
+  const float3 e2 = v1 - v2;
+
+  /* Perform edge tests. */
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  const float3 crossU = cross(v2 + v0, e0);
+  const float3 crossV = cross(v0 + v1, e1);
+  const float3 crossW = cross(v1 + v2, e2);
+
+  ssef crossX(crossU);
+  ssef crossY(crossV);
+  ssef crossZ(crossW);
+  ssef zero = _mm_setzero_ps();
+  _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero);
+
+  const ssef dirX(ray_dir.x);
+  const ssef dirY(ray_dir.y);
+  const ssef dirZ(ray_dir.z);
+
+  ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ));
+#else  /* __KERNEL_SSE2__ */
+  const float U = dot(cross(v2 + v0, e0), ray_dir);
+  const float V = dot(cross(v0 + v1, e1), ray_dir);
+  const float W = dot(cross(v1 + v2, e2), ray_dir);
+#endif /* __KERNEL_SSE2__ */
+
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  int uvw_sign = movemask(UVWW) & 0x7;
+  if (uvw_sign != 0) {
+    if (uvw_sign != 0x7) {
+      return false;
+    }
+  }
+#else
+  const float minUVW = min(U, min(V, W));
+  const float maxUVW = max(U, max(V, W));
+
+  if (minUVW < 0.0f && maxUVW > 0.0f) {
+    return false;
+  }
+#endif
+
+  /* Calculate geometry normal and denominator. */
+  const float3 Ng1 = cross(e1, e0);
+  // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
+  const float3 Ng = Ng1 + Ng1;
+  const float den = dot3(Ng, dir);
+  /* Avoid division by 0. */
+  if (UNLIKELY(den == 0.0f)) {
+    return false;
+  }
+
+  /* Perform depth test. */
+  const float T = dot3(v0, Ng);
+  const int sign_den = (__float_as_int(den) & 0x80000000);
+  const float sign_T = xor_signmask(T, sign_den);
+  if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) {
+    return false;
+  }
+
+  const float inv_den = 1.0f / den;
+#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
+  UVWW *= inv_den;
+  _mm_store_ss(isect_u, UVWW);
+  _mm_store_ss(isect_v, shuffle<1, 1, 3, 3>(UVWW));
+#else
+  *isect_u = U * inv_den;
+  *isect_v = V * inv_den;
+#endif
+  *isect_t = T * inv_den;
+  return true;
+
+#undef dot3
+}
+
+/* Tests for an intersection between a ray and a quad defined by
+ * its midpoint, normal and sides.
+ * If ellipse is true, hits outside the ellipse that's enclosed by the
+ * quad are rejected.
+ */
+ccl_device bool ray_quad_intersect(float3 ray_P,
+                                   float3 ray_D,
+                                   float ray_mint,
+                                   float ray_maxt,
+                                   float3 quad_P,
+                                   float3 quad_u,
+                                   float3 quad_v,
+                                   float3 quad_n,
+                                   ccl_private float3 *isect_P,
+                                   ccl_private float *isect_t,
+                                   ccl_private float *isect_u,
+                                   ccl_private float *isect_v,
+                                   bool ellipse)
+{
+  /* Perform intersection test. */
+  float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
+  if (t < ray_mint || t > ray_maxt) {
+    return false;
+  }
+  const float3 hit = ray_P + t * ray_D;
+  const float3 inplane = hit - quad_P;
+  const float u = dot(inplane, quad_u) / dot(quad_u, quad_u);
+  if (u < -0.5f || u > 0.5f) {
+    return false;
+  }
+  const float v = dot(inplane, quad_v) / dot(quad_v, quad_v);
+  if (v < -0.5f || v > 0.5f) {
+    return false;
+  }
+  if (ellipse && (u * u + v * v > 0.25f)) {
+    return false;
+  }
+  /* Store the result. */
+  /* TODO(sergey): Check whether we can avoid some checks here. */
+  if (isect_P != NULL)
+    *isect_P = hit;
+  if (isect_t != NULL)
+    *isect_t = t;
+  if (isect_u != NULL)
+    *isect_u = u + 0.5f;
+  if (isect_v != NULL)
+    *isect_v = v + 0.5f;
+  return true;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INTERSECT_H__ */
diff --git a/intern/cycles/util/math_matrix.h b/intern/cycles/util/math_matrix.h
new file mode 100644
index 00000000000..bff7ddb4cee
--- /dev/null
+++ b/intern/cycles/util/math_matrix.h
@@ -0,0 +1,454 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_MATRIX_H__
+#define __UTIL_MATH_MATRIX_H__
+
+CCL_NAMESPACE_BEGIN
+
+#define MAT(A, size, row, col) A[(row) * (size) + (col)]
+
+/* Variants that use a constant stride on GPUS. */
+#ifdef __KERNEL_GPU__
+#  define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)]
+/* Element access when only the lower-triangular elements are stored. */
+#  define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)]
+#  define VECS(V, i, s) V[(i) * (s)]
+#else
+#  define MATS(A, n, r, c, s) MAT(A, n, r, c)
+#  define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)]
+#  define VECS(V, i, s) V[i]
+#endif
+
+/* Zeroing helpers. */
+
+ccl_device_inline void math_vector_zero(ccl_private float *v, int n)
+{
+  for (int i = 0; i < n; i++) {
+    v[i] = 0.0f;
+  }
+}
+
+ccl_device_inline void math_matrix_zero(ccl_private float *A, int n)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      MAT(A, n, row, col) = 0.0f;
+    }
+  }
+}
+
+/* Elementary vector operations. */
+
+ccl_device_inline void math_vector_add(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
+{
+  for (int i = 0; i < n; i++) {
+    a[i] += b[i];
+  }
+}
+
+ccl_device_inline void math_vector_mul(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
+{
+  for (int i = 0; i < n; i++) {
+    a[i] *= b[i];
+  }
+}
+
+ccl_device_inline void math_vector_mul_strided(ccl_global float *a,
+                                               ccl_private const float *ccl_restrict b,
+                                               int astride,
+                                               int n)
+{
+  for (int i = 0; i < n; i++) {
+    a[i * astride] *= b[i];
+  }
+}
+
+ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n)
+{
+  for (int i = 0; i < n; i++) {
+    a[i] *= b;
+  }
+}
+
+ccl_device_inline void math_vector_max(ccl_private float *a,
+                                       ccl_private const float *ccl_restrict b,
+                                       int n)
+{
+  for (int i = 0; i < n; i++) {
+    a[i] = max(a[i], b[i]);
+  }
+}
+
+ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w)
+{
+  for (int i = 0; i < n; i++) {
+    v[i] += w * x[i];
+  }
+}
+
+ccl_device_inline void math_vec3_add_strided(
+    ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride)
+{
+  for (int i = 0; i < n; i++) {
+    ccl_global float *elem = (ccl_global float *)(v + i * stride);
+    atomic_add_and_fetch_float(elem + 0, w.x * x[i]);
+    atomic_add_and_fetch_float(elem + 1, w.y * x[i]);
+    atomic_add_and_fetch_float(elem + 2, w.z * x[i]);
+  }
+}
+
+/* Elementary matrix operations.
+ * Note: TriMatrix refers to a square matrix that is symmetric,
+ * and therefore its upper-triangular part isn't stored. */
+
+ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A,
+                                                   int n,
+                                                   float val,
+                                                   int stride)
+{
+  for (int row = 0; row < n; row++) {
+    MATHS(A, row, row, stride) += val;
+  }
+}
+
+/* Add Gramian matrix of v to A.
+ * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
+ccl_device_inline void math_matrix_add_gramian(ccl_private float *A,
+                                               int n,
+                                               ccl_private const float *ccl_restrict v,
+                                               float weight)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      MAT(A, n, row, col) += v[row] * v[col] * weight;
+    }
+  }
+}
+
+/* Add Gramian matrix of v to A.
+ * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
+ccl_device_inline void math_trimatrix_add_gramian_strided(
+    ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight);
+    }
+  }
+}
+
+ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A,
+                                                  int n,
+                                                  ccl_private const float *ccl_restrict v,
+                                                  float weight)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      MATHS(A, row, col, 1) += v[row] * v[col] * weight;
+    }
+  }
+}
+
+/* Transpose matrix A in place. */
+ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride)
+{
+  for (int i = 0; i < n; i++) {
+    for (int j = 0; j < i; j++) {
+      float temp = MATS(A, n, i, j, stride);
+      MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride);
+      MATS(A, n, j, i, stride) = temp;
+    }
+  }
+}
+
+/* Solvers for matrix problems */
+
+/* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A
+ * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L.
+ * Also, only the lower triangular part of A is ever accessed. */
+ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      float sum_col = MATHS(A, row, col, stride);
+      for (int k = 0; k < col; k++) {
+        sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride);
+      }
+      if (row == col) {
+        sum_col = sqrtf(max(sum_col, 0.0f));
+      }
+      else {
+        sum_col /= MATHS(A, col, col, stride);
+      }
+      MATHS(A, row, col, stride) = sum_col;
+    }
+  }
+}
+
+/* Solve A*S=y for S given A and y,
+ * where A is symmetrical positive-semi-definite and both inputs are destroyed in the process.
+ *
+ * We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A.
+ * With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S.
+ * Since L is lower triangular, finding b is relatively easy since y is known.
+ * Then, the remaining problem is Lt*S = b, which again can be solved easily.
+ *
+ * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is
+ * symmetrical positive-semidefinite by construction,
+ * so we can just use this function with A=Xt*W*X and y=Xt*W*y. */
+ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
+                                                 ccl_global float3 *y,
+                                                 int n,
+                                                 int stride)
+{
+  /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good
+   * heuristic for the amount of pixels considered (with weighting),
+   * therefore the amount of correction is scaled based on it. */
+  math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */
+  math_trimatrix_cholesky(A, n, stride);                   /* Replace A with L so that L*Lt = A. */
+
+  /* Use forward substitution to solve L*b = y, replacing y by b. */
+  for (int row = 0; row < n; row++) {
+    float3 sum = VECS(y, row, stride);
+    for (int col = 0; col < row; col++)
+      sum -= MATHS(A, row, col, stride) * VECS(y, col, stride);
+    VECS(y, row, stride) = sum / MATHS(A, row, row, stride);
+  }
+
+  /* Use backward substitution to solve Lt*S = b, replacing b by S. */
+  for (int row = n - 1; row >= 0; row--) {
+    float3 sum = VECS(y, row, stride);
+    for (int col = row + 1; col < n; col++)
+      sum -= MATHS(A, col, row, stride) * VECS(y, col, stride);
+    VECS(y, row, stride) = sum / MATHS(A, row, row, stride);
+  }
+}
+
+/* Perform the Jacobi Eigenvalue Method on matrix A.
+ * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever
+ * accessed. The algorithm overwrites the contents of A.
+ *
+ * After returning, A will be overwritten with D, which is (almost) diagonal,
+ * and V will contain the eigenvectors of the original A in its rows (!),
+ * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A.
+ */
+ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A,
+                                                      ccl_global float *V,
+                                                      int n,
+                                                      int v_stride)
+{
+  const float singular_epsilon = 1e-9f;
+
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col < n; col++) {
+      MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f;
+    }
+  }
+
+  for (int sweep = 0; sweep < 8; sweep++) {
+    float off_diagonal = 0.0f;
+    for (int row = 1; row < n; row++) {
+      for (int col = 0; col < row; col++) {
+        off_diagonal += fabsf(MAT(A, n, row, col));
+      }
+    }
+    if (off_diagonal < 1e-7f) {
+      /* The matrix has nearly reached diagonal form.
+       * Since the eigenvalues are only used to determine truncation, their exact values aren't
+       * required - a relative error of a few ULPs won't matter at all. */
+      break;
+    }
+
+    /* Set the threshold for the small element rotation skip in the first sweep:
+     * Skip all elements that are less than a tenth of the average off-diagonal element. */
+    float threshold = 0.2f * off_diagonal / (n * n);
+
+    for (int row = 1; row < n; row++) {
+      for (int col = 0; col < row; col++) {
+        /* Perform a Jacobi rotation on this element that reduces it to zero. */
+        float element = MAT(A, n, row, col);
+        float abs_element = fabsf(element);
+
+        /* If we're in a later sweep and the element already is very small,
+         * just set it to zero and skip the rotation. */
+        if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) &&
+            abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) {
+          MAT(A, n, row, col) = 0.0f;
+          continue;
+        }
+
+        if (element == 0.0f) {
+          continue;
+        }
+
+        /* If we're in one of the first sweeps and the element is smaller than the threshold,
+         * skip it. */
+        if (sweep < 3 && (abs_element < threshold)) {
+          continue;
+        }
+
+        /* Determine rotation: The rotation is characterized by its angle phi - or,
+         * in the actual implementation, sin(phi) and cos(phi).
+         * To find those, we first compute their ratio - that might be unstable if the angle
+         * approaches 90°, so there's a fallback for that case.
+         * Then, we compute sin(phi) and cos(phi) themselves. */
+        float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
+        float ratio;
+        if (abs_element > singular_epsilon * fabsf(singular_diff)) {
+          float cot_2phi = 0.5f * singular_diff / element;
+          ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi * cot_2phi));
+          if (cot_2phi < 0.0f)
+            ratio = -ratio; /* Copy sign. */
+        }
+        else {
+          ratio = element / singular_diff;
+        }
+
+        float c = 1.0f / sqrtf(1.0f + ratio * ratio);
+        float s = ratio * c;
+        /* To improve numerical stability by avoiding cancellation, the update equations are
+         * reformulized to use sin(phi) and tan(phi/2) instead. */
+        float tan_phi_2 = s / (1.0f + c);
+
+        /* Update the singular values in the diagonal. */
+        float singular_delta = ratio * element;
+        MAT(A, n, row, row) += singular_delta;
+        MAT(A, n, col, col) -= singular_delta;
+
+        /* Set the element itself to zero. */
+        MAT(A, n, row, col) = 0.0f;
+
+        /* Perform the actual rotations on the matrices. */
+#define ROT(M, r1, c1, r2, c2, stride) \
+  { \
+    float M1 = MATS(M, n, r1, c1, stride); \
+    float M2 = MATS(M, n, r2, c2, stride); \
+    MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \
+    MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \
+  }
+
+        /* Split into three parts to ensure correct accesses since we only store the
+         * lower-triangular part of A. */
+        for (int i = 0; i < col; i++)
+          ROT(A, col, i, row, i, 1);
+        for (int i = col + 1; i < row; i++)
+          ROT(A, i, col, row, i, 1);
+        for (int i = row + 1; i < n; i++)
+          ROT(A, i, col, i, row, 1);
+
+        for (int i = 0; i < n; i++)
+          ROT(V, col, i, row, i, v_stride);
+#undef ROT
+      }
+    }
+  }
+
+  /* Sort eigenvalues and the associated eigenvectors. */
+  for (int i = 0; i < n - 1; i++) {
+    float v = MAT(A, n, i, i);
+    int k = i;
+    for (int j = i; j < n; j++) {
+      if (MAT(A, n, j, j) >= v) {
+        v = MAT(A, n, j, j);
+        k = j;
+      }
+    }
+    if (k != i) {
+      /* Swap eigenvalues. */
+      MAT(A, n, k, k) = MAT(A, n, i, i);
+      MAT(A, n, i, i) = v;
+      /* Swap eigenvectors. */
+      for (int j = 0; j < n; j++) {
+        float v = MATS(V, n, i, j, v_stride);
+        MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride);
+        MATS(V, n, k, j, v_stride) = v;
+      }
+    }
+  }
+}
+
+#ifdef __KERNEL_SSE3__
+ccl_device_inline void math_vector_zero_sse(float4 *A, int n)
+{
+  for (int i = 0; i < n; i++) {
+    A[i] = make_float4(0.0f);
+  }
+}
+
+ccl_device_inline void math_matrix_zero_sse(float4 *A, int n)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      MAT(A, n, row, col) = make_float4(0.0f);
+    }
+  }
+}
+
+/* Add Gramian matrix of v to A.
+ * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
+ccl_device_inline void math_matrix_add_gramian_sse(float4 *A,
+                                                   int n,
+                                                   const float4 *ccl_restrict v,
+                                                   float4 weight)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight;
+    }
+  }
+}
+
+ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a)
+{
+  for (int i = 0; i < n; i++) {
+    V[i] += a[i];
+  }
+}
+
+ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a)
+{
+  for (int i = 0; i < n; i++) {
+    V[i] *= a[i];
+  }
+}
+
+ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n)
+{
+  for (int i = 0; i < n; i++) {
+    a[i] = max(a[i], b[i]);
+  }
+}
+
+ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B)
+{
+  for (int row = 0; row < n; row++) {
+    for (int col = 0; col <= row; col++) {
+      MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0];
+    }
+  }
+}
+#endif
+
+#undef MAT
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_MATRIX_H__ */
diff --git a/intern/cycles/util/md5.cpp b/intern/cycles/util/md5.cpp
new file mode 100644
index 00000000000..47e489b1aed
--- /dev/null
+++ b/intern/cycles/util/md5.cpp
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *  claim that you wrote the original software. If you use this software
+ *  in a product, an acknowledgment in the product documentation would be
+ *  appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *  misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * L. Peter Deutsch
+ * ghost@aladdin.com
+ */
+
+/* Minor modifications done to remove some code and change style. */
+
+#include "util/md5.h"
+#include "util/path.h"
+
+#include <stdio.h>
+#include <string.h>
+
+CCL_NAMESPACE_BEGIN
+
+#define T_MASK ((uint32_t)~0)
+#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
+#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
+#define T3 0x242070db
+#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
+#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
+#define T6 0x4787c62a
+#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
+#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
+#define T9 0x698098d8
+#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
+#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
+#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
+#define T13 0x6b901122
+#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
+#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
+#define T16 0x49b40821
+#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
+#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
+#define T19 0x265e5a51
+#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
+#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
+#define T22 0x02441453
+#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
+#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
+#define T25 0x21e1cde6
+#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
+#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
+#define T28 0x455a14ed
+#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
+#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
+#define T31 0x676f02d9
+#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
+#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
+#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
+#define T35 0x6d9d6122
+#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
+#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
+#define T38 0x4bdecfa9
+#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
+#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
+#define T41 0x289b7ec6
+#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
+#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
+#define T44 0x04881d05
+#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
+#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
+#define T47 0x1fa27cf8
+#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
+#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
+#define T50 0x432aff97
+#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
+#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
+#define T53 0x655b59c3
+#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
+#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
+#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
+#define T57 0x6fa87e4f
+#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
+#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
+#define T60 0x4e0811a1
+#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
+#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
+#define T63 0x2ad7d2bb
+#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
+
+void MD5Hash::process(const uint8_t *data /*[64]*/)
+{
+  uint32_t a = abcd[0], b = abcd[1], c = abcd[2], d = abcd[3];
+  uint32_t t;
+  /* Define storage for little-endian or both types of CPUs. */
+  uint32_t xbuf[16];
+  const uint32_t *X;
+
+  {
+    /*
+     * Determine dynamically whether this is a big-endian or
+     * little-endian machine, since we can use a more efficient
+     * algorithm on the latter.
+     */
+    static const int w = 1;
+
+    if (*((const uint8_t *)&w)) /* dynamic little-endian */
+    {
+      /*
+       * On little-endian machines, we can process properly aligned
+       * data without copying it.
+       */
+      if (!((data - (const uint8_t *)0) & 3)) {
+        /* data are properly aligned */
+        X = (const uint32_t *)data;
+      }
+      else {
+        /* not aligned */
+        memcpy(xbuf, data, 64);
+        X = xbuf;
+      }
+    }
+    else { /* dynamic big-endian */
+      /*
+       * On big-endian machines, we must arrange the bytes in the
+       * right order.
+       */
+      const uint8_t *xp = data;
+      int i;
+
+      X = xbuf; /* (dynamic only) */
+      for (i = 0; i < 16; ++i, xp += 4)
+        xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
+    }
+  }
+
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+
+  /* Round 1. */
+  /* Let [abcd k s i] denote the operation
+   * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
+#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
+#define SET(a, b, c, d, k, s, Ti) \
+  t = a + F(b, c, d) + X[k] + Ti; \
+  a = ROTATE_LEFT(t, s) + b
+  /* Do the following 16 operations. */
+  SET(a, b, c, d, 0, 7, T1);
+  SET(d, a, b, c, 1, 12, T2);
+  SET(c, d, a, b, 2, 17, T3);
+  SET(b, c, d, a, 3, 22, T4);
+  SET(a, b, c, d, 4, 7, T5);
+  SET(d, a, b, c, 5, 12, T6);
+  SET(c, d, a, b, 6, 17, T7);
+  SET(b, c, d, a, 7, 22, T8);
+  SET(a, b, c, d, 8, 7, T9);
+  SET(d, a, b, c, 9, 12, T10);
+  SET(c, d, a, b, 10, 17, T11);
+  SET(b, c, d, a, 11, 22, T12);
+  SET(a, b, c, d, 12, 7, T13);
+  SET(d, a, b, c, 13, 12, T14);
+  SET(c, d, a, b, 14, 17, T15);
+  SET(b, c, d, a, 15, 22, T16);
+#undef SET
+
+  /* Round 2. */
+  /* Let [abcd k s i] denote the operation
+   * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
+#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
+#define SET(a, b, c, d, k, s, Ti) \
+  t = a + G(b, c, d) + X[k] + Ti; \
+  a = ROTATE_LEFT(t, s) + b
+  /* Do the following 16 operations. */
+  SET(a, b, c, d, 1, 5, T17);
+  SET(d, a, b, c, 6, 9, T18);
+  SET(c, d, a, b, 11, 14, T19);
+  SET(b, c, d, a, 0, 20, T20);
+  SET(a, b, c, d, 5, 5, T21);
+  SET(d, a, b, c, 10, 9, T22);
+  SET(c, d, a, b, 15, 14, T23);
+  SET(b, c, d, a, 4, 20, T24);
+  SET(a, b, c, d, 9, 5, T25);
+  SET(d, a, b, c, 14, 9, T26);
+  SET(c, d, a, b, 3, 14, T27);
+  SET(b, c, d, a, 8, 20, T28);
+  SET(a, b, c, d, 13, 5, T29);
+  SET(d, a, b, c, 2, 9, T30);
+  SET(c, d, a, b, 7, 14, T31);
+  SET(b, c, d, a, 12, 20, T32);
+#undef SET
+
+  /* Round 3. */
+  /* Let [abcd k s t] denote the operation
+   * a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define SET(a, b, c, d, k, s, Ti) \
+  t = a + H(b, c, d) + X[k] + Ti; \
+  a = ROTATE_LEFT(t, s) + b
+  /* Do the following 16 operations. */
+  SET(a, b, c, d, 5, 4, T33);
+  SET(d, a, b, c, 8, 11, T34);
+  SET(c, d, a, b, 11, 16, T35);
+  SET(b, c, d, a, 14, 23, T36);
+  SET(a, b, c, d, 1, 4, T37);
+  SET(d, a, b, c, 4, 11, T38);
+  SET(c, d, a, b, 7, 16, T39);
+  SET(b, c, d, a, 10, 23, T40);
+  SET(a, b, c, d, 13, 4, T41);
+  SET(d, a, b, c, 0, 11, T42);
+  SET(c, d, a, b, 3, 16, T43);
+  SET(b, c, d, a, 6, 23, T44);
+  SET(a, b, c, d, 9, 4, T45);
+  SET(d, a, b, c, 12, 11, T46);
+  SET(c, d, a, b, 15, 16, T47);
+  SET(b, c, d, a, 2, 23, T48);
+#undef SET
+
+  /* Round 4. */
+  /* Let [abcd k s t] denote the operation
+   * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+#define SET(a, b, c, d, k, s, Ti) \
+  t = a + I(b, c, d) + X[k] + Ti; \
+  a = ROTATE_LEFT(t, s) + b
+  /* Do the following 16 operations. */
+  SET(a, b, c, d, 0, 6, T49);
+  SET(d, a, b, c, 7, 10, T50);
+  SET(c, d, a, b, 14, 15, T51);
+  SET(b, c, d, a, 5, 21, T52);
+  SET(a, b, c, d, 12, 6, T53);
+  SET(d, a, b, c, 3, 10, T54);
+  SET(c, d, a, b, 10, 15, T55);
+  SET(b, c, d, a, 1, 21, T56);
+  SET(a, b, c, d, 8, 6, T57);
+  SET(d, a, b, c, 15, 10, T58);
+  SET(c, d, a, b, 6, 15, T59);
+  SET(b, c, d, a, 13, 21, T60);
+  SET(a, b, c, d, 4, 6, T61);
+  SET(d, a, b, c, 11, 10, T62);
+  SET(c, d, a, b, 2, 15, T63);
+  SET(b, c, d, a, 9, 21, T64);
+#undef SET
+
+  /* Then perform the following additions. (That is increment each
+   * of the four registers by the value it had before this block
+   * was started.) */
+  abcd[0] += a;
+  abcd[1] += b;
+  abcd[2] += c;
+  abcd[3] += d;
+}
+
+MD5Hash::MD5Hash()
+{
+  count[0] = count[1] = 0;
+  abcd[0] = 0x67452301;
+  abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
+  abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
+  abcd[3] = 0x10325476;
+}
+
+MD5Hash::~MD5Hash()
+{
+}
+
+void MD5Hash::append(const uint8_t *data, int nbytes)
+{
+  const uint8_t *p = data;
+  int left = nbytes;
+  int offset = (count[0] >> 3) & 63;
+  uint32_t nbits = (uint32_t)(nbytes << 3);
+
+  if (nbytes <= 0)
+    return;
+
+  /* Update the message length. */
+  count[1] += nbytes >> 29;
+  count[0] += nbits;
+  if (count[0] < nbits)
+    count[1]++;
+
+  /* Process an initial partial block. */
+  if (offset) {
+    int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
+
+    memcpy(buf + offset, p, copy);
+    if (offset + copy < 64)
+      return;
+    p += copy;
+    left -= copy;
+    process(buf);
+  }
+
+  /* Process full blocks. */
+  for (; left >= 64; p += 64, left -= 64)
+    process(p);
+
+  /* Process a final partial block. */
+  if (left)
+    memcpy(buf, p, left);
+}
+
+void MD5Hash::append(const string &str)
+{
+  if (str.size()) {
+    append((const uint8_t *)str.c_str(), str.size());
+  }
+}
+
+bool MD5Hash::append_file(const string &filepath)
+{
+  FILE *f = path_fopen(filepath, "rb");
+
+  if (!f) {
+    fprintf(stderr, "MD5: failed to open file %s\n", filepath.c_str());
+    return false;
+  }
+
+  const size_t buffer_size = 1024;
+  uint8_t buffer[buffer_size];
+  size_t n;
+
+  do {
+    n = fread(buffer, 1, buffer_size, f);
+    append(buffer, n);
+  } while (n == buffer_size);
+
+  bool success = (ferror(f) == 0);
+
+  fclose(f);
+
+  return success;
+}
+
+void MD5Hash::finish(uint8_t digest[16])
+{
+  static const uint8_t pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                  0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                  0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                  0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  uint8_t data[8];
+  int i;
+
+  /* Save the length before padding. */
+  for (i = 0; i < 8; ++i)
+    data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3));
+
+  /* Pad to 56 bytes mod 64. */
+  append(pad, ((55 - (count[0] >> 3)) & 63) + 1);
+  /* Append the length. */
+  append(data, 8);
+
+  for (i = 0; i < 16; ++i)
+    digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3));
+}
+
+string MD5Hash::get_hex()
+{
+  uint8_t digest[16];
+  char buf[16 * 2 + 1];
+
+  finish(digest);
+
+  for (int i = 0; i < 16; i++)
+    sprintf(buf + i * 2, "%02X", (unsigned int)digest[i]);
+  buf[sizeof(buf) - 1] = '\0';
+
+  return string(buf);
+}
+
+string util_md5_string(const string &str)
+{
+  MD5Hash md5;
+  md5.append((uint8_t *)str.c_str(), str.size());
+  return md5.get_hex();
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/md5.h b/intern/cycles/util/md5.h
new file mode 100644
index 00000000000..cc7cbef6a49
--- /dev/null
+++ b/intern/cycles/util/md5.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * L. Peter Deutsch
+ * ghost@aladdin.com
+ */
+
+/* MD5
+ *
+ * Simply MD5 hash computation, used by disk cache. Adapted from external
+ * code, with minor code modifications done to remove some unused code and
+ * change code style. */
+
+#ifndef __UTIL_MD5_H__
+#define __UTIL_MD5_H__
+
+#include "util/string.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+class MD5Hash {
+ public:
+  MD5Hash();
+  ~MD5Hash();
+
+  void append(const uint8_t *data, int size);
+  void append(const string &str);
+  bool append_file(const string &filepath);
+  string get_hex();
+
+ protected:
+  void process(const uint8_t *data);
+  void finish(uint8_t digest[16]);
+
+  uint32_t count[2]; /* message length in bits, LSW first. */
+  uint32_t abcd[4];  /* digest buffer */
+  uint8_t buf[64];   /* accumulate block */
+};
+
+string util_md5_string(const string &str);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MD5_H__ */
diff --git a/intern/cycles/util/murmurhash.cpp b/intern/cycles/util/murmurhash.cpp
new file mode 100644
index 00000000000..9ba0a282cc2
--- /dev/null
+++ b/intern/cycles/util/murmurhash.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is taken from alShaders/Cryptomatte/MurmurHash3.h:
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util/algorithm.h"
+#include "util/murmurhash.h"
+
+#if defined(_MSC_VER)
+#  define ROTL32(x, y) _rotl(x, y)
+#  define ROTL64(x, y) _rotl64(x, y)
+#  define BIG_CONSTANT(x) (x)
+#else
+ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+  return (x << r) | (x >> (32 - r));
+}
+#  define ROTL32(x, y) rotl32(x, y)
+#  define BIG_CONSTANT(x) (x##LLU)
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Block read - if your platform needs to do endian-swapping or can only
+ * handle aligned reads, do the conversion here. */
+ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i)
+{
+  return p[i];
+}
+
+/* Finalization mix - force all bits of a hash block to avalanche */
+ccl_device_inline uint32_t mm_hash_fmix32(uint32_t h)
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+  return h;
+}
+
+uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed)
+{
+  const uint8_t *data = (const uint8_t *)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+
+  for (int i = -nblocks; i; i++) {
+    uint32_t k1 = mm_hash_getblock32(blocks, i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1, 15);
+    k1 *= c2;
+
+    h1 ^= k1;
+    h1 = ROTL32(h1, 13);
+    h1 = h1 * 5 + 0xe6546b64;
+  }
+
+  const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
+
+  uint32_t k1 = 0;
+
+  switch (len & 3) {
+    case 3:
+      k1 ^= tail[2] << 16;
+      ATTR_FALLTHROUGH;
+    case 2:
+      k1 ^= tail[1] << 8;
+      ATTR_FALLTHROUGH;
+    case 1:
+      k1 ^= tail[0];
+      k1 *= c1;
+      k1 = ROTL32(k1, 15);
+      k1 *= c2;
+      h1 ^= k1;
+  }
+
+  h1 ^= len;
+  h1 = mm_hash_fmix32(h1);
+  return h1;
+}
+
+/* This is taken from the cryptomatte specification 1.0 */
+float util_hash_to_float(uint32_t hash)
+{
+  uint32_t mantissa = hash & ((1 << 23) - 1);
+  uint32_t exponent = (hash >> 23) & ((1 << 8) - 1);
+  exponent = max(exponent, (uint32_t)1);
+  exponent = min(exponent, (uint32_t)254);
+  exponent = exponent << 23;
+  uint32_t sign = (hash >> 31);
+  sign = sign << 31;
+  uint32_t float_bits = sign | exponent | mantissa;
+  float f;
+  memcpy(&f, &float_bits, sizeof(uint32_t));
+  return f;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/murmurhash.h b/intern/cycles/util/murmurhash.h
new file mode 100644
index 00000000000..7c303db6ffa
--- /dev/null
+++ b/intern/cycles/util/murmurhash.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MURMURHASH_H__
+#define __UTIL_MURMURHASH_H__
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed);
+float util_hash_to_float(uint32_t hash);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MURMURHASH_H__ */
diff --git a/intern/cycles/util/opengl.h b/intern/cycles/util/opengl.h
new file mode 100644
index 00000000000..7a8d5eec1f9
--- /dev/null
+++ b/intern/cycles/util/opengl.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPENGL_H__
+#define __UTIL_OPENGL_H__
+
+/* OpenGL header includes, used everywhere we use OpenGL, to deal with
+ * platform differences in one central place. */
+
+#include <GL/glew.h>
+
+#endif /* __UTIL_OPENGL_H__ */
diff --git a/intern/cycles/util/openimagedenoise.h b/intern/cycles/util/openimagedenoise.h
new file mode 100644
index 00000000000..cc7b14ae18f
--- /dev/null
+++ b/intern/cycles/util/openimagedenoise.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPENIMAGEDENOISE_H__
+#define __UTIL_OPENIMAGEDENOISE_H__
+
+#ifdef WITH_OPENIMAGEDENOISE
+#  include <OpenImageDenoise/oidn.hpp>
+#endif
+
+#include "util/system.h"
+
+CCL_NAMESPACE_BEGIN
+
+static inline bool openimagedenoise_supported()
+{
+#ifdef WITH_OPENIMAGEDENOISE
+#  ifdef __APPLE__
+  /* Always supported through Accelerate framework BNNS. */
+  return true;
+#  else
+  return system_cpu_support_sse41();
+#  endif
+#else
+  return false;
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_OPENIMAGEDENOISE_H__ */
diff --git a/intern/cycles/util/openvdb.h b/intern/cycles/util/openvdb.h
new file mode 100644
index 00000000000..ae5326e3199
--- /dev/null
+++ b/intern/cycles/util/openvdb.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPENVDB_H__
+#define __UTIL_OPENVDB_H__
+
+#ifdef WITH_OPENVDB
+#  include <openvdb/openvdb.h>
+
+namespace openvdb {
+
+using Vec4fTree = tree::Tree4<Vec4f, 5, 4, 3>::Type;
+using Vec4fGrid = Grid<Vec4fTree>;
+
+/* Apply operation to known grid types. */
+template<typename OpType>
+bool grid_type_operation(const openvdb::GridBase::ConstPtr &grid, OpType &&op)
+{
+  if (grid->isType<openvdb::FloatGrid>()) {
+    return op.template operator()<openvdb::FloatGrid, openvdb::FloatGrid, float, 1>(grid);
+  }
+  else if (grid->isType<openvdb::Vec3fGrid>()) {
+    return op.template operator()<openvdb::Vec3fGrid, openvdb::Vec3fGrid, openvdb::Vec3f, 3>(grid);
+  }
+  else if (grid->isType<openvdb::BoolGrid>()) {
+    return op.template operator()<openvdb::BoolGrid, openvdb::FloatGrid, float, 1>(grid);
+  }
+  else if (grid->isType<openvdb::DoubleGrid>()) {
+    return op.template operator()<openvdb::DoubleGrid, openvdb::FloatGrid, float, 1>(grid);
+  }
+  else if (grid->isType<openvdb::Int32Grid>()) {
+    return op.template operator()<openvdb::Int32Grid, openvdb::FloatGrid, float, 1>(grid);
+  }
+  else if (grid->isType<openvdb::Int64Grid>()) {
+    return op.template operator()<openvdb::Int64Grid, openvdb::FloatGrid, float, 1>(grid);
+  }
+  else if (grid->isType<openvdb::Vec3IGrid>()) {
+    return op.template operator()<openvdb::Vec3IGrid, openvdb::Vec3fGrid, openvdb::Vec3f, 3>(grid);
+  }
+  else if (grid->isType<openvdb::Vec3dGrid>()) {
+    return op.template operator()<openvdb::Vec3dGrid, openvdb::Vec3fGrid, openvdb::Vec3f, 3>(grid);
+  }
+  else if (grid->isType<openvdb::MaskGrid>()) {
+    return op.template operator()<openvdb::MaskGrid, openvdb::FloatGrid, float, 1>(grid);
+  }
+  else {
+    return false;
+  }
+}
+
+};  // namespace openvdb
+
+#endif
+
+#endif /* __UTIL_OPENVDB_H__ */
diff --git a/intern/cycles/util/optimization.h b/intern/cycles/util/optimization.h
new file mode 100644
index 00000000000..7ecd3893cf4
--- /dev/null
+++ b/intern/cycles/util/optimization.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_OPTIMIZATION_H__
+#define __UTIL_OPTIMIZATION_H__
+
+#ifndef __KERNEL_GPU__
+
+/* x86
+ *
+ * Compile a regular, SSE2 and SSE3 kernel. */
+
+#  if defined(i386) || defined(_M_IX86)
+
+/* We require minimum SSE2 support on x86, so auto enable. */
+#    define __KERNEL_SSE2__
+#    ifdef WITH_KERNEL_SSE2
+#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+#    endif
+#    ifdef WITH_KERNEL_SSE3
+#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+#    endif
+
+/* x86-64
+ *
+ * Compile a regular (includes SSE2), SSE3, SSE 4.1, AVX and AVX2 kernel. */
+
+#  elif defined(__x86_64__) || defined(_M_X64)
+
+/* SSE2 is always available on x86-64 CPUs, so auto enable */
+#    define __KERNEL_SSE2__
+/* no SSE2 kernel on x86-64, part of regular kernel */
+#    ifdef WITH_KERNEL_SSE3
+#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+#    endif
+#    ifdef WITH_KERNEL_SSE41
+#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+#    endif
+#    ifdef WITH_KERNEL_AVX
+#      define WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+#    endif
+#    ifdef WITH_KERNEL_AVX2
+#      define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+#    endif
+
+/* Arm Neon
+ *
+ * Compile a SSE4 kernel emulated with Neon. Most code is shared with
+ * SSE, some specializations for performance and compatibility are made
+ * made testing for __KERNEL_NEON__. */
+
+#  elif defined(__ARM_NEON) && defined(WITH_SSE2NEON)
+
+#    define __KERNEL_NEON__
+#    define __KERNEL_SSE__
+#    define __KERNEL_SSE2__
+#    define __KERNEL_SSE3__
+#    define __KERNEL_SSE41__
+
+#  endif
+
+#endif
+
+#endif /* __UTIL_OPTIMIZATION_H__ */
diff --git a/intern/cycles/util/param.h b/intern/cycles/util/param.h
new file mode 100644
index 00000000000..3f8e2d6d700
--- /dev/null
+++ b/intern/cycles/util/param.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_PARAM_H__
+#define __UTIL_PARAM_H__
+
+/* Parameter value lists from OpenImageIO are used to store custom properties
+ * on various data, which can then later be used in shaders. */
+
+#include <OpenImageIO/paramlist.h>
+#include <OpenImageIO/typedesc.h>
+#include <OpenImageIO/ustring.h>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+static constexpr TypeDesc TypeFloat2(TypeDesc::FLOAT, TypeDesc::VEC2);
+static constexpr TypeDesc TypeRGBA(TypeDesc::FLOAT, TypeDesc::VEC4, TypeDesc::COLOR);
+static constexpr TypeDesc TypeFloatArray4(TypeDesc::FLOAT,
+                                          TypeDesc::SCALAR,
+                                          TypeDesc::NOSEMANTICS,
+                                          4);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PARAM_H__ */
diff --git a/intern/cycles/util/path.cpp b/intern/cycles/util/path.cpp
new file mode 100644
index 00000000000..5704c4ef8ef
--- /dev/null
+++ b/intern/cycles/util/path.cpp
@@ -0,0 +1,781 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/path.h"
+#include "util/md5.h"
+#include "util/string.h"
+
+#include <OpenImageIO/filesystem.h>
+#include <OpenImageIO/strutil.h>
+#include <OpenImageIO/sysutil.h>
+
+OIIO_NAMESPACE_USING
+
+#include <stdio.h>
+
+#include <sys/stat.h>
+
+#if defined(_WIN32)
+#  define DIR_SEP '\\'
+#  define DIR_SEP_ALT '/'
+#  include <direct.h>
+#else
+#  define DIR_SEP '/'
+#  include <dirent.h>
+#  include <pwd.h>
+#  include <sys/types.h>
+#  include <unistd.h>
+#endif
+
+#ifdef HAVE_SHLWAPI_H
+#  include <shlwapi.h>
+#endif
+
+#include "util/map.h"
+#include "util/windows.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef _WIN32
+#  if defined(_MSC_VER) || defined(__MINGW64__)
+typedef struct _stat64 path_stat_t;
+#  elif defined(__MINGW32__)
+typedef struct _stati64 path_stat_t;
+#  else
+typedef struct _stat path_stat_t;
+#  endif
+#  ifndef S_ISDIR
+#    define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR)
+#  endif
+#else
+typedef struct stat path_stat_t;
+#endif
+
+static string cached_path = "";
+static string cached_user_path = "";
+static string cached_temp_path = "";
+static string cached_xdg_cache_path = "";
+
+namespace {
+
+#ifdef _WIN32
+class directory_iterator {
+ public:
+  class path_info {
+   public:
+    path_info(const string &path, const WIN32_FIND_DATAW &find_data)
+        : path_(path), find_data_(find_data)
+    {
+    }
+
+    string path()
+    {
+      return path_join(path_, string_from_wstring(find_data_.cFileName));
+    }
+
+   protected:
+    const string &path_;
+    const WIN32_FIND_DATAW &find_data_;
+  };
+
+  directory_iterator() : path_info_("", find_data_), h_find_(INVALID_HANDLE_VALUE)
+  {
+  }
+
+  explicit directory_iterator(const string &path) : path_(path), path_info_(path, find_data_)
+  {
+    string wildcard = path;
+    if (wildcard[wildcard.size() - 1] != DIR_SEP) {
+      wildcard += DIR_SEP;
+    }
+    wildcard += "*";
+    h_find_ = FindFirstFileW(string_to_wstring(wildcard).c_str(), &find_data_);
+    if (h_find_ != INVALID_HANDLE_VALUE) {
+      skip_dots();
+    }
+  }
+
+  ~directory_iterator()
+  {
+    if (h_find_ != INVALID_HANDLE_VALUE) {
+      FindClose(h_find_);
+    }
+  }
+
+  directory_iterator &operator++()
+  {
+    step();
+    return *this;
+  }
+
+  path_info *operator->()
+  {
+    return &path_info_;
+  }
+
+  bool operator!=(const directory_iterator &other)
+  {
+    return h_find_ != other.h_find_;
+  }
+
+ protected:
+  bool step()
+  {
+    if (do_step()) {
+      return skip_dots();
+    }
+    return false;
+  }
+
+  bool do_step()
+  {
+    if (h_find_ != INVALID_HANDLE_VALUE) {
+      bool result = FindNextFileW(h_find_, &find_data_) == TRUE;
+      if (!result) {
+        FindClose(h_find_);
+        h_find_ = INVALID_HANDLE_VALUE;
+      }
+      return result;
+    }
+    return false;
+  }
+
+  bool skip_dots()
+  {
+    while (wcscmp(find_data_.cFileName, L".") == 0 || wcscmp(find_data_.cFileName, L"..") == 0) {
+      if (!do_step()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  string path_;
+  path_info path_info_;
+  WIN32_FIND_DATAW find_data_;
+  HANDLE h_find_;
+};
+#else /* _WIN32 */
+
+class directory_iterator {
+ public:
+  class path_info {
+   public:
+    explicit path_info(const string &path) : path_(path), entry_(NULL)
+    {
+    }
+
+    string path()
+    {
+      return path_join(path_, entry_->d_name);
+    }
+
+    void current_entry_set(const struct dirent *entry)
+    {
+      entry_ = entry;
+    }
+
+   protected:
+    const string &path_;
+    const struct dirent *entry_;
+  };
+
+  directory_iterator() : path_info_(""), name_list_(NULL), num_entries_(-1), cur_entry_(-1)
+  {
+  }
+
+  explicit directory_iterator(const string &path) : path_(path), path_info_(path_), cur_entry_(0)
+  {
+    num_entries_ = scandir(path.c_str(), &name_list_, NULL, alphasort);
+    if (num_entries_ < 0) {
+      perror("scandir");
+    }
+    else {
+      skip_dots();
+    }
+  }
+
+  ~directory_iterator()
+  {
+    destroy_name_list();
+  }
+
+  directory_iterator &operator++()
+  {
+    step();
+    return *this;
+  }
+
+  path_info *operator->()
+  {
+    path_info_.current_entry_set(name_list_[cur_entry_]);
+    return &path_info_;
+  }
+
+  bool operator!=(const directory_iterator &other)
+  {
+    return name_list_ != other.name_list_;
+  }
+
+ protected:
+  bool step()
+  {
+    if (do_step()) {
+      return skip_dots();
+    }
+    return false;
+  }
+
+  bool do_step()
+  {
+    ++cur_entry_;
+    if (cur_entry_ >= num_entries_) {
+      destroy_name_list();
+      return false;
+    }
+    return true;
+  }
+
+  /* Skip . and .. folders. */
+  bool skip_dots()
+  {
+    while (strcmp(name_list_[cur_entry_]->d_name, ".") == 0 ||
+           strcmp(name_list_[cur_entry_]->d_name, "..") == 0) {
+      if (!step()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void destroy_name_list()
+  {
+    if (name_list_ == NULL) {
+      return;
+    }
+    for (int i = 0; i < num_entries_; ++i) {
+      free(name_list_[i]);
+    }
+    free(name_list_);
+    name_list_ = NULL;
+  }
+
+  string path_;
+  path_info path_info_;
+  struct dirent **name_list_;
+  int num_entries_, cur_entry_;
+};
+
+#endif /* _WIN32 */
+
+size_t find_last_slash(const string &path)
+{
+  for (size_t i = 0; i < path.size(); ++i) {
+    size_t index = path.size() - 1 - i;
+#ifdef _WIN32
+    if (path[index] == DIR_SEP || path[index] == DIR_SEP_ALT)
+#else
+    if (path[index] == DIR_SEP)
+#endif
+    {
+      return index;
+    }
+  }
+  return string::npos;
+}
+
+} /* namespace */
+
+static char *path_specials(const string &sub)
+{
+  static bool env_init = false;
+  static char *env_shader_path;
+  static char *env_source_path;
+  if (!env_init) {
+    env_shader_path = getenv("CYCLES_SHADER_PATH");
+    /* NOTE: It is KERNEL in env variable for compatibility reasons. */
+    env_source_path = getenv("CYCLES_KERNEL_PATH");
+    env_init = true;
+  }
+  if (env_shader_path != NULL && sub == "shader") {
+    return env_shader_path;
+  }
+  else if (env_shader_path != NULL && sub == "source") {
+    return env_source_path;
+  }
+  return NULL;
+}
+
+#if defined(__linux__) || defined(__APPLE__)
+static string path_xdg_cache_get()
+{
+  const char *home = getenv("XDG_CACHE_HOME");
+  if (home) {
+    return string(home);
+  }
+  else {
+    home = getenv("HOME");
+    if (home == NULL) {
+      home = getpwuid(getuid())->pw_dir;
+    }
+    return path_join(string(home), ".cache");
+  }
+}
+#endif
+
+void path_init(const string &path, const string &user_path, const string &temp_path)
+{
+  cached_path = path;
+  cached_user_path = user_path;
+  cached_temp_path = temp_path;
+
+#ifdef _MSC_VER
+  // workaround for https://svn.boost.org/trac/boost/ticket/6320
+  // indirectly init boost codec here since it's not thread safe, and can
+  // cause crashes when it happens in multithreaded image load
+  OIIO::Filesystem::exists(path);
+#endif
+}
+
+string path_get(const string &sub)
+{
+  char *special = path_specials(sub);
+  if (special != NULL)
+    return special;
+
+  if (cached_path == "")
+    cached_path = path_dirname(Sysutil::this_program_path());
+
+  return path_join(cached_path, sub);
+}
+
+string path_user_get(const string &sub)
+{
+  if (cached_user_path == "")
+    cached_user_path = path_dirname(Sysutil::this_program_path());
+
+  return path_join(cached_user_path, sub);
+}
+
+string path_cache_get(const string &sub)
+{
+#if defined(__linux__) || defined(__APPLE__)
+  if (cached_xdg_cache_path == "") {
+    cached_xdg_cache_path = path_xdg_cache_get();
+  }
+  string result = path_join(cached_xdg_cache_path, "cycles");
+  return path_join(result, sub);
+#else
+  /* TODO(sergey): What that should be on Windows? */
+  return path_user_get(path_join("cache", sub));
+#endif
+}
+
+string path_temp_get(const string &sub)
+{
+  if (cached_temp_path == "") {
+    cached_temp_path = Filesystem::temp_directory_path();
+  }
+
+  return path_join(cached_temp_path, sub);
+}
+
+#if defined(__linux__) || defined(__APPLE__)
+string path_xdg_home_get(const string &sub = "");
+#endif
+
+string path_filename(const string &path)
+{
+  size_t index = find_last_slash(path);
+  if (index != string::npos) {
+    /* Corner cases to match boost behavior. */
+#ifndef _WIN32
+    if (index == 0 && path.size() == 1) {
+      return path;
+    }
+#endif
+    if (index == path.size() - 1) {
+#ifdef _WIN32
+      if (index == 2) {
+        return string(1, DIR_SEP);
+      }
+#endif
+      return ".";
+    }
+    return path.substr(index + 1, path.size() - index - 1);
+  }
+  return path;
+}
+
+string path_dirname(const string &path)
+{
+  size_t index = find_last_slash(path);
+  if (index != string::npos) {
+#ifndef _WIN32
+    if (index == 0 && path.size() > 1) {
+      return string(1, DIR_SEP);
+    }
+#endif
+    return path.substr(0, index);
+  }
+  return "";
+}
+
+string path_join(const string &dir, const string &file)
+{
+  if (dir.size() == 0) {
+    return file;
+  }
+  if (file.size() == 0) {
+    return dir;
+  }
+  string result = dir;
+#ifndef _WIN32
+  if (result[result.size() - 1] != DIR_SEP && file[0] != DIR_SEP)
+#else
+  if (result[result.size() - 1] != DIR_SEP && result[result.size() - 1] != DIR_SEP_ALT &&
+      file[0] != DIR_SEP && file[0] != DIR_SEP_ALT)
+#endif
+  {
+    result += DIR_SEP;
+  }
+  result += file;
+  return result;
+}
+
+string path_escape(const string &path)
+{
+  string result = path;
+  string_replace(result, " ", "\\ ");
+  return result;
+}
+
+bool path_is_relative(const string &path)
+{
+#ifdef _WIN32
+#  ifdef HAVE_SHLWAPI_H
+  return PathIsRelative(path.c_str());
+#  else  /* HAVE_SHLWAPI_H */
+  if (path.size() >= 3) {
+    return !(((path[0] >= 'a' && path[0] <= 'z') || (path[0] >= 'A' && path[0] <= 'Z')) &&
+             path[1] == ':' && path[2] == DIR_SEP);
+  }
+  return true;
+#  endif /* HAVE_SHLWAPI_H */
+#else    /* _WIN32 */
+  if (path.size() == 0) {
+    return 1;
+  }
+  return path[0] != DIR_SEP;
+#endif   /* _WIN32 */
+}
+
+#ifdef _WIN32
+/* Add a slash if the UNC path points to a share. */
+static string path_unc_add_slash_to_share(const string &path)
+{
+  size_t slash_after_server = path.find(DIR_SEP, 2);
+  if (slash_after_server != string::npos) {
+    size_t slash_after_share = path.find(DIR_SEP, slash_after_server + 1);
+    if (slash_after_share == string::npos) {
+      return path + DIR_SEP;
+    }
+  }
+  return path;
+}
+
+/* Convert:
+ *    \\?\UNC\server\share\folder\... to \\server\share\folder\...
+ *    \\?\C:\ to C:\ and \\?\C:\folder\... to C:\folder\...
+ */
+static string path_unc_to_short(const string &path)
+{
+  size_t len = path.size();
+  if ((len > 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP) && (path[2] == '?') &&
+      ((path[3] == DIR_SEP) || (path[3] == DIR_SEP_ALT))) {
+    if ((len > 5) && (path[5] == ':')) {
+      return path.substr(4, len - 4);
+    }
+    else if ((len > 7) && (path.substr(4, 3) == "UNC") &&
+             ((path[7] == DIR_SEP) || (path[7] == DIR_SEP_ALT))) {
+      return "\\\\" + path.substr(8, len - 8);
+    }
+  }
+  return path;
+}
+
+static string path_cleanup_unc(const string &path)
+{
+  string result = path_unc_to_short(path);
+  if (path.size() > 2) {
+    /* It's possible path is now a non-UNC. */
+    if (result[0] == DIR_SEP && result[1] == DIR_SEP) {
+      return path_unc_add_slash_to_share(result);
+    }
+  }
+  return result;
+}
+
+/* Make path compatible for stat() functions. */
+static string path_make_compatible(const string &path)
+{
+  string result = path;
+  /* In Windows stat() doesn't recognize dir ending on a slash. */
+  if (result.size() > 3 && result[result.size() - 1] == DIR_SEP) {
+    result.resize(result.size() - 1);
+  }
+  /* Clean up UNC path. */
+  if ((path.size() >= 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP)) {
+    result = path_cleanup_unc(result);
+  }
+  /* Make sure volume-only path ends up wit ha directory separator. */
+  if (result.size() == 2 && result[1] == ':') {
+    result += DIR_SEP;
+  }
+  return result;
+}
+
+static int path_wstat(const wstring &path_wc, path_stat_t *st)
+{
+#  if defined(_MSC_VER) || defined(__MINGW64__)
+  return _wstat64(path_wc.c_str(), st);
+#  elif defined(__MINGW32__)
+  return _wstati64(path_wc.c_str(), st);
+#  else
+  return _wstat(path_wc.c_str(), st);
+#  endif
+}
+
+static int path_stat(const string &path, path_stat_t *st)
+{
+  wstring path_wc = string_to_wstring(path);
+  return path_wstat(path_wc, st);
+}
+#else  /* _WIN32 */
+static int path_stat(const string &path, path_stat_t *st)
+{
+  return stat(path.c_str(), st);
+}
+#endif /* _WIN32 */
+
+size_t path_file_size(const string &path)
+{
+  path_stat_t st;
+  if (path_stat(path, &st) != 0) {
+    return -1;
+  }
+  return st.st_size;
+}
+
+bool path_exists(const string &path)
+{
+#ifdef _WIN32
+  string fixed_path = path_make_compatible(path);
+  wstring path_wc = string_to_wstring(fixed_path);
+  path_stat_t st;
+  if (path_wstat(path_wc, &st) != 0) {
+    return false;
+  }
+  return st.st_mode != 0;
+#else  /* _WIN32 */
+  struct stat st;
+  if (stat(path.c_str(), &st) != 0) {
+    return 0;
+  }
+  return st.st_mode != 0;
+#endif /* _WIN32 */
+}
+
+bool path_is_directory(const string &path)
+{
+  path_stat_t st;
+  if (path_stat(path, &st) != 0) {
+    return false;
+  }
+  return S_ISDIR(st.st_mode);
+}
+
+static void path_files_md5_hash_recursive(MD5Hash &hash, const string &dir)
+{
+  if (path_exists(dir)) {
+    directory_iterator it(dir), it_end;
+
+    for (; it != it_end; ++it) {
+      if (path_is_directory(it->path())) {
+        path_files_md5_hash_recursive(hash, it->path());
+      }
+      else {
+        string filepath = it->path();
+
+        hash.append((const uint8_t *)filepath.c_str(), filepath.size());
+        hash.append_file(filepath);
+      }
+    }
+  }
+}
+
+string path_files_md5_hash(const string &dir)
+{
+  /* computes md5 hash of all files in the directory */
+  MD5Hash hash;
+
+  path_files_md5_hash_recursive(hash, dir);
+
+  return hash.get_hex();
+}
+
+static bool create_directories_recursivey(const string &path)
+{
+  if (path_is_directory(path)) {
+    /* Directory already exists, nothing to do. */
+    return true;
+  }
+  if (path_exists(path)) {
+    /* File exists and it's not a directory. */
+    return false;
+  }
+
+  string parent = path_dirname(path);
+  if (parent.size() > 0 && parent != path) {
+    if (!create_directories_recursivey(parent)) {
+      return false;
+    }
+  }
+
+#ifdef _WIN32
+  wstring path_wc = string_to_wstring(path);
+  return _wmkdir(path_wc.c_str()) == 0;
+#else
+  return mkdir(path.c_str(), 0777) == 0;
+#endif
+}
+
+void path_create_directories(const string &filepath)
+{
+  string path = path_dirname(filepath);
+  create_directories_recursivey(path);
+}
+
+bool path_write_binary(const string &path, const vector<uint8_t> &binary)
+{
+  path_create_directories(path);
+
+  /* write binary file from memory */
+  FILE *f = path_fopen(path, "wb");
+
+  if (!f)
+    return false;
+
+  if (binary.size() > 0)
+    fwrite(&binary[0], sizeof(uint8_t), binary.size(), f);
+
+  fclose(f);
+
+  return true;
+}
+
+bool path_write_text(const string &path, string &text)
+{
+  vector<uint8_t> binary(text.length(), 0);
+  std::copy(text.begin(), text.end(), binary.begin());
+
+  return path_write_binary(path, binary);
+}
+
+bool path_read_binary(const string &path, vector<uint8_t> &binary)
+{
+  /* read binary file into memory */
+  FILE *f = path_fopen(path, "rb");
+
+  if (!f) {
+    binary.resize(0);
+    return false;
+  }
+
+  binary.resize(path_file_size(path));
+
+  if (binary.size() == 0) {
+    fclose(f);
+    return false;
+  }
+
+  if (fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) {
+    fclose(f);
+    return false;
+  }
+
+  fclose(f);
+
+  return true;
+}
+
+bool path_read_text(const string &path, string &text)
+{
+  vector<uint8_t> binary;
+
+  if (!path_exists(path) || !path_read_binary(path, binary))
+    return false;
+
+  const char *str = (const char *)&binary[0];
+  size_t size = binary.size();
+  text = string(str, size);
+
+  return true;
+}
+
+uint64_t path_modified_time(const string &path)
+{
+  path_stat_t st;
+  if (path_stat(path, &st) != 0) {
+    return 0;
+  }
+  return st.st_mtime;
+}
+
+bool path_remove(const string &path)
+{
+  return remove(path.c_str()) == 0;
+}
+
+FILE *path_fopen(const string &path, const string &mode)
+{
+#ifdef _WIN32
+  wstring path_wc = string_to_wstring(path);
+  wstring mode_wc = string_to_wstring(mode);
+  return _wfopen(path_wc.c_str(), mode_wc.c_str());
+#else
+  return fopen(path.c_str(), mode.c_str());
+#endif
+}
+
+void path_cache_clear_except(const string &name, const set<string> &except)
+{
+  string dir = path_user_get("cache");
+
+  if (path_exists(dir)) {
+    directory_iterator it(dir), it_end;
+
+    for (; it != it_end; ++it) {
+      string filename = path_filename(it->path());
+
+      if (string_startswith(filename, name.c_str()))
+        if (except.find(filename) == except.end())
+          path_remove(it->path());
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/path.h b/intern/cycles/util/path.h
new file mode 100644
index 00000000000..a1394555302
--- /dev/null
+++ b/intern/cycles/util/path.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_PATH_H__
+#define __UTIL_PATH_H__
+
+/* Utility functions to get paths to files distributed with the program. For
+ * the standalone apps, paths are relative to the executable, for dynamically
+ * linked libraries, the path to the library may be set with path_init, which
+ * then makes all paths relative to that. */
+
+#include <stdio.h>
+
+#include "util/set.h"
+#include "util/string.h"
+#include "util/types.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* program paths */
+void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = "");
+string path_get(const string &sub = "");
+string path_user_get(const string &sub = "");
+string path_temp_get(const string &sub = "");
+string path_cache_get(const string &sub = "");
+
+/* path string manipulation */
+string path_filename(const string &path);
+string path_dirname(const string &path);
+string path_join(const string &dir, const string &file);
+string path_escape(const string &path);
+bool path_is_relative(const string &path);
+
+/* file info */
+size_t path_file_size(const string &path);
+bool path_exists(const string &path);
+bool path_is_directory(const string &path);
+string path_files_md5_hash(const string &dir);
+uint64_t path_modified_time(const string &path);
+
+/* directory utility */
+void path_create_directories(const string &path);
+
+/* file read/write utilities */
+FILE *path_fopen(const string &path, const string &mode);
+
+bool path_write_binary(const string &path, const vector<uint8_t> &binary);
+bool path_write_text(const string &path, string &text);
+bool path_read_binary(const string &path, vector<uint8_t> &binary);
+bool path_read_text(const string &path, string &text);
+
+/* File manipulation. */
+bool path_remove(const string &path);
+
+/* cache utility */
+void path_cache_clear_except(const string &name, const set<string> &except);
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/profiling.cpp b/intern/cycles/util/profiling.cpp
new file mode 100644
index 00000000000..55b35b7320f
--- /dev/null
+++ b/intern/cycles/util/profiling.cpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/profiling.h"
+#include "util/algorithm.h"
+#include "util/foreach.h"
+#include "util/set.h"
+
+CCL_NAMESPACE_BEGIN
+
+Profiler::Profiler() : do_stop_worker(true), worker(NULL)
+{
+}
+
+Profiler::~Profiler()
+{
+  assert(worker == NULL);
+}
+
+void Profiler::run()
+{
+  uint64_t updates = 0;
+  auto start_time = std::chrono::system_clock::now();
+  while (!do_stop_worker) {
+    thread_scoped_lock lock(mutex);
+    foreach (ProfilingState *state, states) {
+      uint32_t cur_event = state->event;
+      int32_t cur_shader = state->shader;
+      int32_t cur_object = state->object;
+
+      /* The state reads/writes should be atomic, but just to be sure
+       * check the values for validity anyways. */
+      if (cur_event < PROFILING_NUM_EVENTS) {
+        event_samples[cur_event]++;
+      }
+
+      if (cur_shader >= 0 && cur_shader < shader_samples.size()) {
+        shader_samples[cur_shader]++;
+      }
+
+      if (cur_object >= 0 && cur_object < object_samples.size()) {
+        object_samples[cur_object]++;
+      }
+    }
+    lock.unlock();
+
+    /* Relative waits always overshoot a bit, so just waiting 1ms every
+     * time would cause the sampling to drift over time.
+     * By keeping track of the absolute time, the wait times correct themselves -
+     * if one wait overshoots a lot, the next one will be shorter to compensate. */
+    updates++;
+    std::this_thread::sleep_until(start_time + updates * std::chrono::milliseconds(1));
+  }
+}
+
+void Profiler::reset(int num_shaders, int num_objects)
+{
+  bool running = (worker != NULL);
+  if (running) {
+    stop();
+  }
+
+  /* Resize and clear the accumulation vectors. */
+  shader_hits.assign(num_shaders, 0);
+  object_hits.assign(num_objects, 0);
+
+  event_samples.assign(PROFILING_NUM_EVENTS, 0);
+  shader_samples.assign(num_shaders, 0);
+  object_samples.assign(num_objects, 0);
+
+  if (running) {
+    start();
+  }
+}
+
+void Profiler::start()
+{
+  assert(worker == NULL);
+  do_stop_worker = false;
+  worker = new thread(function_bind(&Profiler::run, this));
+}
+
+void Profiler::stop()
+{
+  if (worker != NULL) {
+    do_stop_worker = true;
+
+    worker->join();
+    delete worker;
+    worker = NULL;
+  }
+}
+
+void Profiler::add_state(ProfilingState *state)
+{
+  thread_scoped_lock lock(mutex);
+
+  /* Add the ProfilingState from the list of sampled states. */
+  assert(std::find(states.begin(), states.end(), state) == states.end());
+  states.push_back(state);
+
+  /* Resize thread-local hit counters. */
+  state->shader_hits.assign(shader_hits.size(), 0);
+  state->object_hits.assign(object_hits.size(), 0);
+
+  /* Initialize the state. */
+  state->event = PROFILING_UNKNOWN;
+  state->shader = -1;
+  state->object = -1;
+  state->active = true;
+}
+
+void Profiler::remove_state(ProfilingState *state)
+{
+  thread_scoped_lock lock(mutex);
+
+  /* Remove the ProfilingState from the list of sampled states. */
+  states.erase(std::remove(states.begin(), states.end(), state), states.end());
+  state->active = false;
+
+  /* Merge thread-local hit counters. */
+  assert(shader_hits.size() == state->shader_hits.size());
+  for (int i = 0; i < shader_hits.size(); i++) {
+    shader_hits[i] += state->shader_hits[i];
+  }
+
+  assert(object_hits.size() == state->object_hits.size());
+  for (int i = 0; i < object_hits.size(); i++) {
+    object_hits[i] += state->object_hits[i];
+  }
+}
+
+uint64_t Profiler::get_event(ProfilingEvent event)
+{
+  assert(worker == NULL);
+  return event_samples[event];
+}
+
+bool Profiler::get_shader(int shader, uint64_t &samples, uint64_t &hits)
+{
+  assert(worker == NULL);
+  if (shader_samples[shader] == 0) {
+    return false;
+  }
+  samples = shader_samples[shader];
+  hits = shader_hits[shader];
+  return true;
+}
+
+bool Profiler::get_object(int object, uint64_t &samples, uint64_t &hits)
+{
+  assert(worker == NULL);
+  if (object_samples[object] == 0) {
+    return false;
+  }
+  samples = object_samples[object];
+  hits = object_hits[object];
+  return true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/profiling.h b/intern/cycles/util/profiling.h
new file mode 100644
index 00000000000..b30aac90879
--- /dev/null
+++ b/intern/cycles/util/profiling.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_PROFILING_H__
+#define __UTIL_PROFILING_H__
+
+#include <atomic>
+
+#include "util/map.h"
+#include "util/thread.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+enum ProfilingEvent : uint32_t {
+  PROFILING_UNKNOWN,
+  PROFILING_RAY_SETUP,
+
+  PROFILING_INTERSECT_CLOSEST,
+  PROFILING_INTERSECT_SUBSURFACE,
+  PROFILING_INTERSECT_SHADOW,
+  PROFILING_INTERSECT_VOLUME_STACK,
+
+  PROFILING_SHADE_SURFACE_SETUP,
+  PROFILING_SHADE_SURFACE_EVAL,
+  PROFILING_SHADE_SURFACE_DIRECT_LIGHT,
+  PROFILING_SHADE_SURFACE_INDIRECT_LIGHT,
+  PROFILING_SHADE_SURFACE_AO,
+  PROFILING_SHADE_SURFACE_PASSES,
+
+  PROFILING_SHADE_VOLUME_SETUP,
+  PROFILING_SHADE_VOLUME_INTEGRATE,
+  PROFILING_SHADE_VOLUME_DIRECT_LIGHT,
+  PROFILING_SHADE_VOLUME_INDIRECT_LIGHT,
+
+  PROFILING_SHADE_SHADOW_SETUP,
+  PROFILING_SHADE_SHADOW_SURFACE,
+  PROFILING_SHADE_SHADOW_VOLUME,
+
+  PROFILING_SHADE_LIGHT_SETUP,
+  PROFILING_SHADE_LIGHT_EVAL,
+
+  PROFILING_NUM_EVENTS,
+};
+
+/* Contains the current execution state of a worker thread.
+ * These values are constantly updated by the worker.
+ * Periodically the profiler thread will wake up, read them
+ * and update its internal counters based on it.
+ *
+ * Atomics aren't needed here since we're only doing direct
+ * writes and reads to (4-byte-aligned) uint32_t, which is
+ * guaranteed to be atomic on x86 since the 486.
+ * Memory ordering is not guaranteed but does not matter.
+ *
+ * And even on other architectures, the extremely rare corner
+ * case of reading an intermediate state could at worst result
+ * in a single incorrect sample. */
+struct ProfilingState {
+  volatile uint32_t event = PROFILING_UNKNOWN;
+  volatile int32_t shader = -1;
+  volatile int32_t object = -1;
+  volatile bool active = false;
+
+  vector<uint64_t> shader_hits;
+  vector<uint64_t> object_hits;
+};
+
+class Profiler {
+ public:
+  Profiler();
+  ~Profiler();
+
+  void reset(int num_shaders, int num_objects);
+
+  void start();
+  void stop();
+
+  void add_state(ProfilingState *state);
+  void remove_state(ProfilingState *state);
+
+  uint64_t get_event(ProfilingEvent event);
+  bool get_shader(int shader, uint64_t &samples, uint64_t &hits);
+  bool get_object(int object, uint64_t &samples, uint64_t &hits);
+
+ protected:
+  void run();
+
+  /* Tracks how often the worker was in each ProfilingEvent while sampling,
+   * so multiplying the values by the sample frequency (currently 1ms)
+   * gives the approximate time spent in each state. */
+  vector<uint64_t> event_samples;
+  vector<uint64_t> shader_samples;
+  vector<uint64_t> object_samples;
+
+  /* Tracks the total amounts every object/shader was hit.
+   * Used to evaluate relative cost, written by the render thread.
+   * Indexed by the shader and object IDs that the kernel also uses
+   * to index __object_flag and __shaders. */
+  vector<uint64_t> shader_hits;
+  vector<uint64_t> object_hits;
+
+  volatile bool do_stop_worker;
+  thread *worker;
+
+  thread_mutex mutex;
+  vector<ProfilingState *> states;
+};
+
+class ProfilingHelper {
+ public:
+  ProfilingHelper(ProfilingState *state, ProfilingEvent event) : state(state)
+  {
+    previous_event = state->event;
+    state->event = event;
+  }
+
+  ~ProfilingHelper()
+  {
+    state->event = previous_event;
+  }
+
+  inline void set_event(ProfilingEvent event)
+  {
+    state->event = event;
+  }
+
+ protected:
+  ProfilingState *state;
+  uint32_t previous_event;
+};
+
+class ProfilingWithShaderHelper : public ProfilingHelper {
+ public:
+  ProfilingWithShaderHelper(ProfilingState *state, ProfilingEvent event)
+      : ProfilingHelper(state, event)
+  {
+  }
+
+  ~ProfilingWithShaderHelper()
+  {
+    state->object = -1;
+    state->shader = -1;
+  }
+
+  inline void set_shader(int object, int shader)
+  {
+    if (state->active) {
+      state->shader = shader;
+      state->object = object;
+
+      if (shader >= 0) {
+        assert(shader < state->shader_hits.size());
+        state->shader_hits[shader]++;
+      }
+
+      if (object >= 0) {
+        assert(object < state->object_hits.size());
+        state->object_hits[object]++;
+      }
+    }
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PROFILING_H__ */
diff --git a/intern/cycles/util/progress.h b/intern/cycles/util/progress.h
new file mode 100644
index 00000000000..4b0ff08aa7e
--- /dev/null
+++ b/intern/cycles/util/progress.h
@@ -0,0 +1,370 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_PROGRESS_H__
+#define __UTIL_PROGRESS_H__
+
+/* Progress
+ *
+ * Simple class to communicate progress status messages, timing information,
+ * update notifications from a job running in another thread. All methods
+ * except for the constructor/destructor are thread safe. */
+
+#include "util/function.h"
+#include "util/string.h"
+#include "util/thread.h"
+#include "util/time.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Progress {
+ public:
+  Progress()
+  {
+    pixel_samples = 0;
+    total_pixel_samples = 0;
+    current_tile_sample = 0;
+    rendered_tiles = 0;
+    denoised_tiles = 0;
+    start_time = time_dt();
+    render_start_time = time_dt();
+    end_time = 0.0;
+    status = "Initializing";
+    substatus = "";
+    sync_status = "";
+    sync_substatus = "";
+    update_cb = function_null;
+    cancel = false;
+    cancel_message = "";
+    error = false;
+    error_message = "";
+    cancel_cb = function_null;
+  }
+
+  Progress(Progress &progress)
+  {
+    *this = progress;
+  }
+
+  Progress &operator=(Progress &progress)
+  {
+    thread_scoped_lock lock(progress.progress_mutex);
+
+    progress.get_status(status, substatus);
+
+    pixel_samples = progress.pixel_samples;
+    total_pixel_samples = progress.total_pixel_samples;
+    current_tile_sample = progress.get_current_sample();
+
+    return *this;
+  }
+
+  void reset()
+  {
+    pixel_samples = 0;
+    total_pixel_samples = 0;
+    current_tile_sample = 0;
+    rendered_tiles = 0;
+    denoised_tiles = 0;
+    start_time = time_dt();
+    render_start_time = time_dt();
+    end_time = 0.0;
+    status = "Initializing";
+    substatus = "";
+    sync_status = "";
+    sync_substatus = "";
+    cancel = false;
+    cancel_message = "";
+    error = false;
+    error_message = "";
+  }
+
+  /* cancel */
+  void set_cancel(const string &cancel_message_)
+  {
+    thread_scoped_lock lock(progress_mutex);
+    cancel_message = cancel_message_;
+    cancel = true;
+  }
+
+  bool get_cancel() const
+  {
+    if (!cancel && cancel_cb)
+      cancel_cb();
+
+    return cancel;
+  }
+
+  string get_cancel_message() const
+  {
+    thread_scoped_lock lock(progress_mutex);
+    return cancel_message;
+  }
+
+  void set_cancel_callback(function<void()> function)
+  {
+    cancel_cb = function;
+  }
+
+  /* error */
+  void set_error(const string &error_message_)
+  {
+    thread_scoped_lock lock(progress_mutex);
+    error_message = error_message_;
+    error = true;
+    /* If error happens we also stop rendering. */
+    cancel_message = error_message_;
+    cancel = true;
+  }
+
+  bool get_error() const
+  {
+    return error;
+  }
+
+  string get_error_message() const
+  {
+    thread_scoped_lock lock(progress_mutex);
+    return error_message;
+  }
+
+  /* tile and timing information */
+
+  void set_start_time()
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    start_time = time_dt();
+    end_time = 0.0;
+  }
+
+  void set_render_start_time()
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    render_start_time = time_dt();
+  }
+
+  void add_skip_time(const scoped_timer &start_timer, bool only_render)
+  {
+    double skip_time = time_dt() - start_timer.get_start();
+
+    render_start_time += skip_time;
+    if (!only_render) {
+      start_time += skip_time;
+    }
+  }
+
+  void get_time(double &total_time_, double &render_time_) const
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    double time = (end_time > 0) ? end_time : time_dt();
+
+    total_time_ = time - start_time;
+    render_time_ = time - render_start_time;
+  }
+
+  void set_end_time()
+  {
+    end_time = time_dt();
+  }
+
+  void reset_sample()
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    pixel_samples = 0;
+    current_tile_sample = 0;
+    rendered_tiles = 0;
+    denoised_tiles = 0;
+  }
+
+  void set_total_pixel_samples(uint64_t total_pixel_samples_)
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    total_pixel_samples = total_pixel_samples_;
+  }
+
+  float get_progress() const
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    if (total_pixel_samples > 0) {
+      return ((float)pixel_samples) / total_pixel_samples;
+    }
+    return 0.0f;
+  }
+
+  void add_samples(uint64_t pixel_samples_, int tile_sample)
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    pixel_samples += pixel_samples_;
+    current_tile_sample = tile_sample;
+  }
+
+  void add_samples_update(uint64_t pixel_samples_, int tile_sample)
+  {
+    add_samples(pixel_samples_, tile_sample);
+    set_update();
+  }
+
+  void add_finished_tile(bool denoised)
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    if (denoised) {
+      denoised_tiles++;
+    }
+    else {
+      rendered_tiles++;
+    }
+  }
+
+  int get_current_sample() const
+  {
+    thread_scoped_lock lock(progress_mutex);
+    /* Note that the value here always belongs to the last tile that updated,
+     * so it's only useful if there is only one active tile. */
+    return current_tile_sample;
+  }
+
+  int get_rendered_tiles() const
+  {
+    thread_scoped_lock lock(progress_mutex);
+    return rendered_tiles;
+  }
+
+  int get_denoised_tiles() const
+  {
+    thread_scoped_lock lock(progress_mutex);
+    return denoised_tiles;
+  }
+
+  /* status messages */
+
+  void set_status(const string &status_, const string &substatus_ = "")
+  {
+    {
+      thread_scoped_lock lock(progress_mutex);
+      status = status_;
+      substatus = substatus_;
+    }
+
+    set_update();
+  }
+
+  void set_substatus(const string &substatus_)
+  {
+    {
+      thread_scoped_lock lock(progress_mutex);
+      substatus = substatus_;
+    }
+
+    set_update();
+  }
+
+  void set_sync_status(const string &status_, const string &substatus_ = "")
+  {
+    {
+      thread_scoped_lock lock(progress_mutex);
+      sync_status = status_;
+      sync_substatus = substatus_;
+    }
+
+    set_update();
+  }
+
+  void set_sync_substatus(const string &substatus_)
+  {
+    {
+      thread_scoped_lock lock(progress_mutex);
+      sync_substatus = substatus_;
+    }
+
+    set_update();
+  }
+
+  void get_status(string &status_, string &substatus_) const
+  {
+    thread_scoped_lock lock(progress_mutex);
+
+    if (sync_status != "") {
+      status_ = sync_status;
+      substatus_ = sync_substatus;
+    }
+    else {
+      status_ = status;
+      substatus_ = substatus;
+    }
+  }
+
+  /* callback */
+
+  void set_update()
+  {
+    if (update_cb) {
+      thread_scoped_lock lock(update_mutex);
+      update_cb();
+    }
+  }
+
+  void set_update_callback(function<void()> function)
+  {
+    update_cb = function;
+  }
+
+ protected:
+  mutable thread_mutex progress_mutex;
+  mutable thread_mutex update_mutex;
+  function<void()> update_cb;
+  function<void()> cancel_cb;
+
+  /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
+   * This makes the progress estimate more accurate when tiles with different sizes are used.
+   *
+   * total_pixel_samples is the total amount of pixel samples that will be rendered. */
+  uint64_t pixel_samples, total_pixel_samples;
+  /* Stores the current sample count of the last tile that called the update function.
+   * It's used to display the sample count if only one tile is active. */
+  int current_tile_sample;
+  /* Stores the number of tiles that's already finished.
+   * Used to determine whether all but the last tile are finished rendering,
+   * in which case the current_tile_sample is displayed. */
+  int rendered_tiles, denoised_tiles;
+
+  double start_time, render_start_time;
+  /* End time written when render is done, so it doesn't keep increasing on redraws. */
+  double end_time;
+
+  string status;
+  string substatus;
+
+  string sync_status;
+  string sync_substatus;
+
+  volatile bool cancel;
+  string cancel_message;
+
+  volatile bool error;
+  string error_message;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PROGRESS_H__ */
diff --git a/intern/cycles/util/projection.h b/intern/cycles/util/projection.h
new file mode 100644
index 00000000000..8d822a3777d
--- /dev/null
+++ b/intern/cycles/util/projection.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_PROJECTION_H__
+#define __UTIL_PROJECTION_H__
+
+#include "util/transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* 4x4 projection matrix, perspective or orthographic. */
+
+typedef struct ProjectionTransform {
+  float4 x, y, z, w; /* rows */
+
+#ifndef __KERNEL_GPU__
+  ProjectionTransform()
+  {
+  }
+
+  explicit ProjectionTransform(const Transform &tfm)
+      : x(tfm.x), y(tfm.y), z(tfm.z), w(make_float4(0.0f, 0.0f, 0.0f, 1.0f))
+  {
+  }
+#endif
+} ProjectionTransform;
+
+typedef struct PerspectiveMotionTransform {
+  ProjectionTransform pre;
+  ProjectionTransform post;
+} PerspectiveMotionTransform;
+
+/* Functions */
+
+ccl_device_inline float3 transform_perspective(ccl_private const ProjectionTransform *t,
+                                               const float3 a)
+{
+  float4 b = make_float4(a.x, a.y, a.z, 1.0f);
+  float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
+  float w = dot(t->w, b);
+
+  return (w != 0.0f) ? c / w : zero_float3();
+}
+
+ccl_device_inline float3 transform_perspective_direction(ccl_private const ProjectionTransform *t,
+                                                         const float3 a)
+{
+  float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
+                         a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
+                         a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
+
+  return c;
+}
+
+#ifndef __KERNEL_GPU__
+
+ccl_device_inline Transform projection_to_transform(const ProjectionTransform &a)
+{
+  Transform tfm = {a.x, a.y, a.z};
+  return tfm;
+}
+
+ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform &a)
+{
+  ProjectionTransform t;
+
+  t.x.x = a.x.x;
+  t.x.y = a.y.x;
+  t.x.z = a.z.x;
+  t.x.w = a.w.x;
+  t.y.x = a.x.y;
+  t.y.y = a.y.y;
+  t.y.z = a.z.y;
+  t.y.w = a.w.y;
+  t.z.x = a.x.z;
+  t.z.y = a.y.z;
+  t.z.z = a.z.z;
+  t.z.w = a.w.z;
+  t.w.x = a.x.w;
+  t.w.y = a.y.w;
+  t.w.z = a.z.w;
+  t.w.w = a.w.w;
+
+  return t;
+}
+
+ProjectionTransform projection_inverse(const ProjectionTransform &a);
+
+ccl_device_inline ProjectionTransform make_projection(float a,
+                                                      float b,
+                                                      float c,
+                                                      float d,
+                                                      float e,
+                                                      float f,
+                                                      float g,
+                                                      float h,
+                                                      float i,
+                                                      float j,
+                                                      float k,
+                                                      float l,
+                                                      float m,
+                                                      float n,
+                                                      float o,
+                                                      float p)
+{
+  ProjectionTransform t;
+
+  t.x.x = a;
+  t.x.y = b;
+  t.x.z = c;
+  t.x.w = d;
+  t.y.x = e;
+  t.y.y = f;
+  t.y.z = g;
+  t.y.w = h;
+  t.z.x = i;
+  t.z.y = j;
+  t.z.z = k;
+  t.z.w = l;
+  t.w.x = m;
+  t.w.y = n;
+  t.w.z = o;
+  t.w.w = p;
+
+  return t;
+}
+ccl_device_inline ProjectionTransform projection_identity()
+{
+  return make_projection(1.0f,
+                         0.0f,
+                         0.0f,
+                         0.0f,
+                         0.0f,
+                         1.0f,
+                         0.0f,
+                         0.0f,
+                         0.0f,
+                         0.0f,
+                         1.0f,
+                         0.0f,
+                         0.0f,
+                         0.0f,
+                         0.0f,
+                         1.0f);
+}
+
+ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a,
+                                                const ProjectionTransform &b)
+{
+  ProjectionTransform c = projection_transpose(b);
+  ProjectionTransform t;
+
+  t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
+  t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
+  t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
+  t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
+
+  return t;
+}
+
+ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, const Transform &b)
+{
+  return a * ProjectionTransform(b);
+}
+
+ccl_device_inline ProjectionTransform operator*(const Transform &a, const ProjectionTransform &b)
+{
+  return ProjectionTransform(a) * b;
+}
+
+ccl_device_inline void print_projection(const char *label, const ProjectionTransform &t)
+{
+  print_float4(label, t.x);
+  print_float4(label, t.y);
+  print_float4(label, t.z);
+  print_float4(label, t.w);
+  printf("\n");
+}
+
+ccl_device_inline ProjectionTransform projection_perspective(float fov, float n, float f)
+{
+  ProjectionTransform persp = make_projection(
+      1, 0, 0, 0, 0, 1, 0, 0, 0, 0, f / (f - n), -f * n / (f - n), 0, 0, 1, 0);
+
+  float inv_angle = 1.0f / tanf(0.5f * fov);
+
+  Transform scale = transform_scale(inv_angle, inv_angle, 1);
+
+  return scale * persp;
+}
+
+ccl_device_inline ProjectionTransform projection_orthographic(float znear, float zfar)
+{
+  Transform t = transform_scale(1.0f, 1.0f, 1.0f / (zfar - znear)) *
+                transform_translate(0.0f, 0.0f, -znear);
+
+  return ProjectionTransform(t);
+}
+
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PROJECTION_H__ */
diff --git a/intern/cycles/util/queue.h b/intern/cycles/util/queue.h
new file mode 100644
index 00000000000..622f4fe3e47
--- /dev/null
+++ b/intern/cycles/util/queue.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_QUEUE_H__
+#define __UTIL_QUEUE_H__
+
+#include <queue>
+
+CCL_NAMESPACE_BEGIN
+
+using std::queue;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/rect.h b/intern/cycles/util/rect.h
new file mode 100644
index 00000000000..79d64b917b7
--- /dev/null
+++ b/intern/cycles/util/rect.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_RECT_H__
+#define __UTIL_RECT_H__
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Rectangles are represented as a int4 containing the coordinates of the lower-left and
+ * upper-right corners in the order (x0, y0, x1, y1). */
+
+ccl_device_inline int4 rect_from_shape(int x0, int y0, int w, int h)
+{
+  return make_int4(x0, y0, x0 + w, y0 + h);
+}
+
+ccl_device_inline int4 rect_expand(int4 rect, int d)
+{
+  return make_int4(rect.x - d, rect.y - d, rect.z + d, rect.w + d);
+}
+
+/* Returns the intersection of two rects. */
+ccl_device_inline int4 rect_clip(int4 a, int4 b)
+{
+  return make_int4(max(a.x, b.x), max(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+}
+
+ccl_device_inline bool rect_is_valid(int4 rect)
+{
+  return (rect.z > rect.x) && (rect.w > rect.y);
+}
+
+/* Returns the local row-major index of the pixel inside the rect. */
+ccl_device_inline int coord_to_local_index(int4 rect, int x, int y)
+{
+  int w = rect.z - rect.x;
+  return (y - rect.y) * w + (x - rect.x);
+}
+
+/* Finds the coordinates of a pixel given by its row-major index in the rect,
+ * and returns whether the pixel is inside it. */
+ccl_device_inline bool local_index_to_coord(int4 rect,
+                                            int idx,
+                                            ccl_private int *x,
+                                            ccl_private int *y)
+{
+  int w = rect.z - rect.x;
+  *x = (idx % w) + rect.x;
+  *y = (idx / w) + rect.y;
+  return (*y < rect.w);
+}
+
+ccl_device_inline int rect_size(int4 rect)
+{
+  return (rect.z - rect.x) * (rect.w - rect.y);
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_RECT_H__ */
diff --git a/intern/cycles/util/semaphore.h b/intern/cycles/util/semaphore.h
new file mode 100644
index 00000000000..8da8a232ba2
--- /dev/null
+++ b/intern/cycles/util/semaphore.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SEMAPHORE_H__
+#define __UTIL_SEMAPHORE_H__
+
+#include "util/thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Counting Semaphore
+ *
+ * To restrict concurrent access to a resource to a specified number
+ * of threads. Similar to std::counting_semaphore from C++20. */
+
+class thread_counting_semaphore {
+ public:
+  explicit thread_counting_semaphore(const int count) : count(count)
+  {
+  }
+
+  thread_counting_semaphore(const thread_counting_semaphore &) = delete;
+
+  void acquire()
+  {
+    thread_scoped_lock lock(mutex);
+    while (count == 0) {
+      condition.wait(lock);
+    }
+    count--;
+  }
+
+  void release()
+  {
+    thread_scoped_lock lock(mutex);
+    count++;
+    condition.notify_one();
+  }
+
+ protected:
+  thread_mutex mutex;
+  thread_condition_variable condition;
+  int count;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SEMAPHORE_H__ */
diff --git a/intern/cycles/util/set.h b/intern/cycles/util/set.h
new file mode 100644
index 00000000000..298e1f7729a
--- /dev/null
+++ b/intern/cycles/util/set.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SET_H__
+#define __UTIL_SET_H__
+
+#include <set>
+#include <unordered_set>
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1900)
+#  include <iterator>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+using std::set;
+using std::unordered_set;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SET_H__ */
diff --git a/intern/cycles/util/simd.cpp b/intern/cycles/util/simd.cpp
new file mode 100644
index 00000000000..089444bb6cc
--- /dev/null
+++ b/intern/cycles/util/simd.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if (defined(WITH_KERNEL_SSE2)) || (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__))
+
+#  define __KERNEL_SSE2__
+#  include "util/simd.h"
+
+CCL_NAMESPACE_BEGIN
+
+const __m128 _mm_lookupmask_ps[16] = {_mm_castsi128_ps(_mm_set_epi32(0, 0, 0, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, 0, 0, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, -1)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, 0)),
+                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, -1))};
+
+CCL_NAMESPACE_END
+
+#endif  // WITH_KERNEL_SSE2
diff --git a/intern/cycles/util/simd.h b/intern/cycles/util/simd.h
new file mode 100644
index 00000000000..cc4950891d0
--- /dev/null
+++ b/intern/cycles/util/simd.h
@@ -0,0 +1,572 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SIMD_TYPES_H__
+#define __UTIL_SIMD_TYPES_H__
+
+#include <limits>
+#include <stdint.h>
+
+#include "util/defines.h"
+
+/* SSE Intrinsics includes
+ *
+ * We assume __KERNEL_SSEX__ flags to have been defined at this point.
+ *
+ * MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
+ * Since we can't avoid including <windows.h>, better only include that */
+#if defined(FREE_WINDOWS64)
+#  include "util/windows.h"
+#elif defined(_MSC_VER)
+#  include <intrin.h>
+#elif (defined(__x86_64__) || defined(__i386__))
+#  include <x86intrin.h>
+#elif defined(__KERNEL_NEON__)
+#  define SSE2NEON_PRECISE_MINMAX 1
+#  include <sse2neon.h>
+#endif
+
+/* Floating Point Control, for Embree. */
+#if defined(__x86_64__) || defined(_M_X64)
+#  define SIMD_SET_FLUSH_TO_ZERO \
+    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
+    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+#else
+#  define SIMD_SET_FLUSH_TO_ZERO
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Data structures used by SSE classes. */
+#ifdef __KERNEL_SSE2__
+
+extern const __m128 _mm_lookupmask_ps[16];
+
+static struct TrueTy {
+  __forceinline operator bool() const
+  {
+    return true;
+  }
+} True ccl_attr_maybe_unused;
+
+static struct FalseTy {
+  __forceinline operator bool() const
+  {
+    return false;
+  }
+} False ccl_attr_maybe_unused;
+
+static struct ZeroTy {
+  __forceinline operator float() const
+  {
+    return 0;
+  }
+  __forceinline operator int() const
+  {
+    return 0;
+  }
+} zero ccl_attr_maybe_unused;
+
+static struct OneTy {
+  __forceinline operator float() const
+  {
+    return 1;
+  }
+  __forceinline operator int() const
+  {
+    return 1;
+  }
+} one ccl_attr_maybe_unused;
+
+static struct NegInfTy {
+  __forceinline operator float() const
+  {
+    return -std::numeric_limits<float>::infinity();
+  }
+  __forceinline operator int() const
+  {
+    return std::numeric_limits<int>::min();
+  }
+} neg_inf ccl_attr_maybe_unused;
+
+static struct PosInfTy {
+  __forceinline operator float() const
+  {
+    return std::numeric_limits<float>::infinity();
+  }
+  __forceinline operator int() const
+  {
+    return std::numeric_limits<int>::max();
+  }
+} inf ccl_attr_maybe_unused, pos_inf ccl_attr_maybe_unused;
+
+static struct StepTy {
+} step ccl_attr_maybe_unused;
+
+#endif
+
+/* Utilities used by Neon */
+#if defined(__KERNEL_NEON__)
+template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const type &a)
+{
+  if (i0 == i1 && i0 == i2 && i0 == i3) {
+    return type(vdupq_laneq_s32(int32x4_t(a), i0));
+  }
+  static const uint8_t tbl[16] = {(i0 * 4) + 0,
+                                  (i0 * 4) + 1,
+                                  (i0 * 4) + 2,
+                                  (i0 * 4) + 3,
+                                  (i1 * 4) + 0,
+                                  (i1 * 4) + 1,
+                                  (i1 * 4) + 2,
+                                  (i1 * 4) + 3,
+                                  (i2 * 4) + 0,
+                                  (i2 * 4) + 1,
+                                  (i2 * 4) + 2,
+                                  (i2 * 4) + 3,
+                                  (i3 * 4) + 0,
+                                  (i3 * 4) + 1,
+                                  (i3 * 4) + 2,
+                                  (i3 * 4) + 3};
+
+  return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl));
+}
+
+template<class type, int i0, int i1, int i2, int i3>
+type shuffle_neon(const type &a, const type &b)
+{
+  if (&a == &b) {
+    static const uint8_t tbl[16] = {(i0 * 4) + 0,
+                                    (i0 * 4) + 1,
+                                    (i0 * 4) + 2,
+                                    (i0 * 4) + 3,
+                                    (i1 * 4) + 0,
+                                    (i1 * 4) + 1,
+                                    (i1 * 4) + 2,
+                                    (i1 * 4) + 3,
+                                    (i2 * 4) + 0,
+                                    (i2 * 4) + 1,
+                                    (i2 * 4) + 2,
+                                    (i2 * 4) + 3,
+                                    (i3 * 4) + 0,
+                                    (i3 * 4) + 1,
+                                    (i3 * 4) + 2,
+                                    (i3 * 4) + 3};
+
+    return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl));
+  }
+  else {
+
+    static const uint8_t tbl[16] = {(i0 * 4) + 0,
+                                    (i0 * 4) + 1,
+                                    (i0 * 4) + 2,
+                                    (i0 * 4) + 3,
+                                    (i1 * 4) + 0,
+                                    (i1 * 4) + 1,
+                                    (i1 * 4) + 2,
+                                    (i1 * 4) + 3,
+                                    (i2 * 4) + 0 + 16,
+                                    (i2 * 4) + 1 + 16,
+                                    (i2 * 4) + 2 + 16,
+                                    (i2 * 4) + 3 + 16,
+                                    (i3 * 4) + 0 + 16,
+                                    (i3 * 4) + 1 + 16,
+                                    (i3 * 4) + 2 + 16,
+                                    (i3 * 4) + 3 + 16};
+
+    return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl));
+  }
+}
+#endif /* __KERNEL_NEON */
+
+/* Intrinsics Functions
+ *
+ * For fast bit operations. */
+
+#if defined(__BMI__) && defined(__GNUC__)
+#  ifndef _tzcnt_u32
+#    define _tzcnt_u32 __tzcnt_u32
+#  endif
+#  ifndef _tzcnt_u64
+#    define _tzcnt_u64 __tzcnt_u64
+#  endif
+#endif
+
+#if defined(__LZCNT__)
+#  define _lzcnt_u32 __lzcnt32
+#  define _lzcnt_u64 __lzcnt64
+#endif
+
+#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__)
+/* Intrinsic functions on Windows. */
+__forceinline uint32_t __bsf(uint32_t v)
+{
+#  if defined(__KERNEL_AVX2__)
+  return _tzcnt_u32(v);
+#  else
+  unsigned long r = 0;
+  _BitScanForward(&r, v);
+  return r;
+#  endif
+}
+
+__forceinline uint32_t __bsr(uint32_t v)
+{
+  unsigned long r = 0;
+  _BitScanReverse(&r, v);
+  return r;
+}
+
+__forceinline uint32_t __btc(uint32_t v, uint32_t i)
+{
+  long r = v;
+  _bittestandcomplement(&r, i);
+  return r;
+}
+
+__forceinline uint32_t bitscan(uint32_t v)
+{
+#  if defined(__KERNEL_AVX2__)
+  return _tzcnt_u32(v);
+#  else
+  return __bsf(v);
+#  endif
+}
+
+#  if defined(__KERNEL_64_BIT__)
+
+__forceinline uint64_t __bsf(uint64_t v)
+{
+#    if defined(__KERNEL_AVX2__)
+  return _tzcnt_u64(v);
+#    else
+  unsigned long r = 0;
+  _BitScanForward64(&r, v);
+  return r;
+#    endif
+}
+
+__forceinline uint64_t __bsr(uint64_t v)
+{
+  unsigned long r = 0;
+  _BitScanReverse64(&r, v);
+  return r;
+}
+
+__forceinline uint64_t __btc(uint64_t v, uint64_t i)
+{
+  uint64_t r = v;
+  _bittestandcomplement64((__int64 *)&r, i);
+  return r;
+}
+
+__forceinline uint64_t bitscan(uint64_t v)
+{
+#    if defined(__KERNEL_AVX2__)
+#      if defined(__KERNEL_64_BIT__)
+  return _tzcnt_u64(v);
+#      else
+  return _tzcnt_u32(v);
+#      endif
+#    else
+  return __bsf(v);
+#    endif
+}
+
+#  endif /* __KERNEL_64_BIT__ */
+
+#elif (defined(__x86_64__) || defined(__i386__)) && defined(__KERNEL_SSE2__)
+/* Intrinsic functions with x86 SSE. */
+
+__forceinline uint32_t __bsf(const uint32_t v)
+{
+  uint32_t r = 0;
+  asm("bsf %1,%0" : "=r"(r) : "r"(v));
+  return r;
+}
+
+__forceinline uint32_t __bsr(const uint32_t v)
+{
+  uint32_t r = 0;
+  asm("bsr %1,%0" : "=r"(r) : "r"(v));
+  return r;
+}
+
+__forceinline uint32_t __btc(const uint32_t v, uint32_t i)
+{
+  uint32_t r = 0;
+  asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags");
+  return r;
+}
+
+#  if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \
+      !(defined(__ILP32__) && defined(__x86_64__))
+__forceinline uint64_t __bsf(const uint64_t v)
+{
+  uint64_t r = 0;
+  asm("bsf %1,%0" : "=r"(r) : "r"(v));
+  return r;
+}
+#  endif
+
+__forceinline uint64_t __bsr(const uint64_t v)
+{
+  uint64_t r = 0;
+  asm("bsr %1,%0" : "=r"(r) : "r"(v));
+  return r;
+}
+
+__forceinline uint64_t __btc(const uint64_t v, const uint64_t i)
+{
+  uint64_t r = 0;
+  asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags");
+  return r;
+}
+
+__forceinline uint32_t bitscan(uint32_t v)
+{
+#  if defined(__KERNEL_AVX2__)
+  return _tzcnt_u32(v);
+#  else
+  return __bsf(v);
+#  endif
+}
+
+#  if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \
+      !(defined(__ILP32__) && defined(__x86_64__))
+__forceinline uint64_t bitscan(uint64_t v)
+{
+#    if defined(__KERNEL_AVX2__)
+#      if defined(__KERNEL_64_BIT__)
+  return _tzcnt_u64(v);
+#      else
+  return _tzcnt_u32(v);
+#      endif
+#    else
+  return __bsf(v);
+#    endif
+}
+#  endif
+
+#else
+/* Intrinsic functions fallback for arbitrary processor. */
+__forceinline uint32_t __bsf(const uint32_t x)
+{
+  for (int i = 0; i < 32; i++) {
+    if (x & (1U << i))
+      return i;
+  }
+  return 32;
+}
+
+__forceinline uint32_t __bsr(const uint32_t x)
+{
+  for (int i = 0; i < 32; i++) {
+    if (x & (1U << (31 - i)))
+      return (31 - i);
+  }
+  return 32;
+}
+
+__forceinline uint32_t __btc(const uint32_t x, const uint32_t bit)
+{
+  uint32_t mask = 1U << bit;
+  return x & (~mask);
+}
+
+__forceinline uint32_t __bsf(const uint64_t x)
+{
+  for (int i = 0; i < 64; i++) {
+    if (x & (1UL << i))
+      return i;
+  }
+  return 64;
+}
+
+__forceinline uint32_t __bsr(const uint64_t x)
+{
+  for (int i = 0; i < 64; i++) {
+    if (x & (1UL << (63 - i)))
+      return (63 - i);
+  }
+  return 64;
+}
+
+__forceinline uint64_t __btc(const uint64_t x, const uint32_t bit)
+{
+  uint64_t mask = 1UL << bit;
+  return x & (~mask);
+}
+
+__forceinline uint32_t bitscan(uint32_t value)
+{
+  assert(value != 0);
+  uint32_t bit = 0;
+  while ((value & (1 << bit)) == 0) {
+    ++bit;
+  }
+  return bit;
+}
+
+__forceinline uint64_t bitscan(uint64_t value)
+{
+  assert(value != 0);
+  uint64_t bit = 0;
+  while ((value & (1 << bit)) == 0) {
+    ++bit;
+  }
+  return bit;
+}
+
+#endif /* Intrinsics */
+
+/* SSE compatibility.
+ *
+ * Various utilities to smooth over differences between SSE versions and
+ * implementations. */
+#ifdef __KERNEL_SSE2__
+
+/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test
+ * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other
+ * platforms when compiling code outside the kernel. */
+#  if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__))
+
+/* Emulation of SSE4 functions with SSE2 */
+
+#    define _MM_FROUND_TO_NEAREST_INT 0x00
+#    define _MM_FROUND_TO_NEG_INF 0x01
+#    define _MM_FROUND_TO_POS_INF 0x02
+#    define _MM_FROUND_TO_ZERO 0x03
+#    define _MM_FROUND_CUR_DIRECTION 0x04
+
+#    undef _mm_blendv_ps
+#    define _mm_blendv_ps _mm_blendv_ps_emu
+__forceinline __m128 _mm_blendv_ps_emu(__m128 value, __m128 input, __m128 mask)
+{
+  __m128i isignmask = _mm_set1_epi32(0x80000000);
+  __m128 signmask = _mm_castsi128_ps(isignmask);
+  __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask));
+  __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask);
+  __m128 cmpmask = _mm_castsi128_ps(icmpmask);
+  return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value));
+}
+
+#    undef _mm_blend_ps
+#    define _mm_blend_ps _mm_blend_ps_emu
+__forceinline __m128 _mm_blend_ps_emu(__m128 value, __m128 input, const int mask)
+{
+  assert(mask < 0x10);
+  return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]);
+}
+
+#    undef _mm_blendv_epi8
+#    define _mm_blendv_epi8 _mm_blendv_epi8_emu
+__forceinline __m128i _mm_blendv_epi8_emu(__m128i value, __m128i input, __m128i mask)
+{
+  return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value));
+}
+
+#    undef _mm_min_epi32
+#    define _mm_min_epi32 _mm_min_epi32_emu
+__forceinline __m128i _mm_min_epi32_emu(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
+}
+
+#    undef _mm_max_epi32
+#    define _mm_max_epi32 _mm_max_epi32_emu
+__forceinline __m128i _mm_max_epi32_emu(__m128i value, __m128i input)
+{
+  return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input));
+}
+
+#    ifndef __KERNEL_NEON__
+#      undef _mm_extract_epi32
+#      define _mm_extract_epi32 _mm_extract_epi32_emu
+__forceinline int _mm_extract_epi32_emu(__m128i input, const int index)
+{
+  switch (index) {
+    case 0:
+      return _mm_cvtsi128_si32(input);
+    case 1:
+      return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1)));
+    case 2:
+      return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2)));
+    case 3:
+      return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3)));
+    default:
+      assert(false);
+      return 0;
+  }
+}
+#    endif
+
+#    undef _mm_insert_epi32
+#    define _mm_insert_epi32 _mm_insert_epi32_emu
+__forceinline __m128i _mm_insert_epi32_emu(__m128i value, int input, const int index)
+{
+  assert(index >= 0 && index < 4);
+  ((int *)&value)[index] = input;
+  return value;
+}
+
+#    undef _mm_insert_ps
+#    define _mm_insert_ps _mm_insert_ps_emu
+__forceinline __m128 _mm_insert_ps_emu(__m128 value, __m128 input, const int index)
+{
+  assert(index < 0x100);
+  ((float *)&value)[(index >> 4) & 0x3] = ((float *)&input)[index >> 6];
+  return _mm_andnot_ps(_mm_lookupmask_ps[index & 0xf], value);
+}
+
+#    undef _mm_round_ps
+#    define _mm_round_ps _mm_round_ps_emu
+__forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags)
+{
+  switch (flags) {
+    case _MM_FROUND_TO_NEAREST_INT:
+      return _mm_cvtepi32_ps(_mm_cvtps_epi32(value));
+    case _MM_FROUND_TO_NEG_INF:
+      return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f))));
+    case _MM_FROUND_TO_POS_INF:
+      return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(0.5f))));
+    case _MM_FROUND_TO_ZERO:
+      return _mm_cvtepi32_ps(_mm_cvttps_epi32(value));
+  }
+  return value;
+}
+
+#  endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
+
+/* Older GCC versions do not have _mm256_cvtss_f32 yet, so define it ourselves.
+ * _mm256_castps256_ps128 generates no instructions so this is just as efficient. */
+#  if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+#    undef _mm256_cvtss_f32
+#    define _mm256_cvtss_f32(a) (_mm_cvtss_f32(_mm256_castps256_ps128(a)))
+#  endif
+
+#endif /* __KERNEL_SSE2__ */
+
+/* quiet unused define warnings */
+#if defined(__KERNEL_SSE2__) || defined(__KERNEL_SSE3__) || defined(__KERNEL_SSSE3__) || \
+    defined(__KERNEL_SSE41__) || defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+/* do nothing */
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SIMD_TYPES_H__ */
diff --git a/intern/cycles/util/sseb.h b/intern/cycles/util/sseb.h
new file mode 100644
index 00000000000..6afce4f8909
--- /dev/null
+++ b/intern/cycles/util/sseb.h
@@ -0,0 +1,358 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SSEB_H__
+#define __UTIL_SSEB_H__
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_SSE2__
+
+struct ssei;
+struct ssef;
+
+/*! 4-wide SSE bool type. */
+struct sseb {
+  typedef sseb Mask;   // mask type
+  typedef ssei Int;    // int type
+  typedef ssef Float;  // float type
+
+  enum { size = 4 };  // number of SIMD elements
+  union {
+    __m128 m128;
+    int32_t v[4];
+  };  // data
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constructors, Assignment & Cast Operators
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline sseb()
+  {
+  }
+  __forceinline sseb(const sseb &other)
+  {
+    m128 = other.m128;
+  }
+  __forceinline sseb &operator=(const sseb &other)
+  {
+    m128 = other.m128;
+    return *this;
+  }
+
+  __forceinline sseb(const __m128 input) : m128(input)
+  {
+  }
+  __forceinline operator const __m128 &(void) const
+  {
+    return m128;
+  }
+  __forceinline operator const __m128i(void) const
+  {
+    return _mm_castps_si128(m128);
+  }
+  __forceinline operator const __m128d(void) const
+  {
+    return _mm_castps_pd(m128);
+  }
+
+  __forceinline sseb(bool a)
+      : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)])
+  {
+  }
+  __forceinline sseb(bool a, bool b)
+      : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)])
+  {
+  }
+  __forceinline sseb(bool a, bool b, bool c, bool d)
+      : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)])
+  {
+  }
+  __forceinline sseb(int mask)
+  {
+    assert(mask >= 0 && mask < 16);
+    m128 = _mm_lookupmask_ps[mask];
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constants
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline sseb(FalseTy) : m128(_mm_setzero_ps())
+  {
+  }
+  __forceinline sseb(TrueTy)
+      : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
+  {
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Array Access
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline bool operator[](const size_t i) const
+  {
+    assert(i < 4);
+    return (_mm_movemask_ps(m128) >> i) & 1;
+  }
+  __forceinline int32_t &operator[](const size_t i)
+  {
+    assert(i < 4);
+    return v[i];
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator!(const sseb &a)
+{
+  return _mm_xor_ps(a, sseb(True));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator&(const sseb &a, const sseb &b)
+{
+  return _mm_and_ps(a, b);
+}
+__forceinline const sseb operator|(const sseb &a, const sseb &b)
+{
+  return _mm_or_ps(a, b);
+}
+__forceinline const sseb operator^(const sseb &a, const sseb &b)
+{
+  return _mm_xor_ps(a, b);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator&=(sseb &a, const sseb &b)
+{
+  return a = a & b;
+}
+__forceinline const sseb operator|=(sseb &a, const sseb &b)
+{
+  return a = a | b;
+}
+__forceinline const sseb operator^=(sseb &a, const sseb &b)
+{
+  return a = a ^ b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator!=(const sseb &a, const sseb &b)
+{
+  return _mm_xor_ps(a, b);
+}
+__forceinline const sseb operator==(const sseb &a, const sseb &b)
+{
+  return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b));
+}
+
+__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f)
+{
+#  if defined(__KERNEL_SSE41__)
+  return _mm_blendv_ps(f, t, m);
+#  else
+  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
+#  endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb unpacklo(const sseb &a, const sseb &b)
+{
+  return _mm_unpacklo_ps(a, b);
+}
+__forceinline const sseb unpackhi(const sseb &a, const sseb &b)
+{
+  return _mm_unpackhi_ps(a, b);
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const sseb shuffle(const sseb &a)
+{
+#  ifdef __KERNEL_NEON__
+  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a);
+#  else
+  return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
+#  endif
+}
+
+#  ifndef __KERNEL_NEON__
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a)
+{
+  return _mm_movelh_ps(a, a);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a)
+{
+  return _mm_movehl_ps(a, a);
+}
+#  endif
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const sseb shuffle(const sseb &a, const sseb &b)
+{
+#  ifdef __KERNEL_NEON__
+  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a, b);
+#  else
+  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
+#  endif
+}
+
+#  ifndef __KERNEL_NEON__
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b)
+{
+  return _mm_movelh_ps(a, b);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b)
+{
+  return _mm_movehl_ps(b, a);
+}
+#  endif
+
+#  if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
+template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a)
+{
+  return _mm_moveldup_ps(a);
+}
+template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a)
+{
+  return _mm_movehdup_ps(a);
+}
+#  endif
+
+#  if defined(__KERNEL_SSE41__)
+template<size_t dst, size_t src, size_t clr>
+__forceinline const sseb insert(const sseb &a, const sseb &b)
+{
+#    ifdef __KERNEL_NEON__
+  sseb res = a;
+  if (clr)
+    res[dst] = 0;
+  else
+    res[dst] = b[src];
+  return res;
+#    else
+  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
+#    endif
+}
+template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b)
+{
+  return insert<dst, src, 0>(a, b);
+}
+template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b)
+{
+  return insert<dst, 0>(a, sseb(b));
+}
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reduction Operations
+////////////////////////////////////////////////////////////////////////////////
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline uint32_t popcnt(const sseb &a)
+{
+#    if defined(__KERNEL_NEON__)
+  const int32x4_t mask = {1, 1, 1, 1};
+  int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
+  return vaddvq_s32(t);
+#    else
+  return _mm_popcnt_u32(_mm_movemask_ps(a));
+#    endif
+}
+#  else
+__forceinline uint32_t popcnt(const sseb &a)
+{
+  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]);
+}
+#  endif
+
+__forceinline bool reduce_and(const sseb &a)
+{
+#  if defined(__KERNEL_NEON__)
+  return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
+#  else
+  return _mm_movemask_ps(a) == 0xf;
+#  endif
+}
+__forceinline bool reduce_or(const sseb &a)
+{
+#  if defined(__KERNEL_NEON__)
+  return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
+#  else
+  return _mm_movemask_ps(a) != 0x0;
+#  endif
+}
+__forceinline bool all(const sseb &b)
+{
+#  if defined(__KERNEL_NEON__)
+  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
+#  else
+  return _mm_movemask_ps(b) == 0xf;
+#  endif
+}
+__forceinline bool any(const sseb &b)
+{
+#  if defined(__KERNEL_NEON__)
+  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
+#  else
+  return _mm_movemask_ps(b) != 0x0;
+#  endif
+}
+__forceinline bool none(const sseb &b)
+{
+#  if defined(__KERNEL_NEON__)
+  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
+#  else
+  return _mm_movemask_ps(b) == 0x0;
+#  endif
+}
+
+__forceinline uint32_t movemask(const sseb &a)
+{
+  return _mm_movemask_ps(a);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_sseb(const char *label, const sseb &a)
+{
+  printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]);
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/ssef.h b/intern/cycles/util/ssef.h
new file mode 100644
index 00000000000..ea5e78b54d2
--- /dev/null
+++ b/intern/cycles/util/ssef.h
@@ -0,0 +1,1104 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SSEF_H__
+#define __UTIL_SSEF_H__
+
+#include "util/ssei.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_SSE2__
+
+struct sseb;
+struct ssef;
+
+/*! 4-wide SSE float type. */
+struct ssef {
+  typedef sseb Mask;   // mask type
+  typedef ssei Int;    // int type
+  typedef ssef Float;  // float type
+
+  enum { size = 4 };  // number of SIMD elements
+  union {
+    __m128 m128;
+    float f[4];
+    int i[4];
+  };  // data
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constructors, Assignment & Cast Operators
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline ssef()
+  {
+  }
+  __forceinline ssef(const ssef &other)
+  {
+    m128 = other.m128;
+  }
+  __forceinline ssef &operator=(const ssef &other)
+  {
+    m128 = other.m128;
+    return *this;
+  }
+
+  __forceinline ssef(const __m128 a) : m128(a)
+  {
+  }
+  __forceinline operator const __m128 &() const
+  {
+    return m128;
+  }
+  __forceinline operator __m128 &()
+  {
+    return m128;
+  }
+
+  __forceinline ssef(float a) : m128(_mm_set1_ps(a))
+  {
+  }
+  __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d))
+  {
+  }
+
+  __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a))
+  {
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Loads and Stores
+  ////////////////////////////////////////////////////////////////////////////////
+
+#  if defined(__KERNEL_AVX__)
+  static __forceinline ssef broadcast(const void *const a)
+  {
+    return _mm_broadcast_ss((float *)a);
+  }
+#  else
+  static __forceinline ssef broadcast(const void *const a)
+  {
+    return _mm_set1_ps(*(float *)a);
+  }
+#  endif
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Array Access
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline const float &operator[](const size_t i) const
+  {
+    assert(i < 4);
+    return f[i];
+  }
+  __forceinline float &operator[](const size_t i)
+  {
+    assert(i < 4);
+    return f[i];
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const ssef cast(const __m128i &a)
+{
+  return _mm_castsi128_ps(a);
+}
+__forceinline const ssef operator+(const ssef &a)
+{
+  return a;
+}
+__forceinline const ssef operator-(const ssef &a)
+{
+  return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
+}
+__forceinline const ssef abs(const ssef &a)
+{
+  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
+}
+#  if defined(__KERNEL_SSE41__)
+__forceinline const ssef sign(const ssef &a)
+{
+  return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f)));
+}
+#  endif
+__forceinline const ssef signmsk(const ssef &a)
+{
+  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
+}
+
+__forceinline const ssef rcp(const ssef &a)
+{
+  const ssef r = _mm_rcp_ps(a.m128);
+  return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
+}
+__forceinline const ssef sqr(const ssef &a)
+{
+  return _mm_mul_ps(a, a);
+}
+__forceinline const ssef mm_sqrt(const ssef &a)
+{
+  return _mm_sqrt_ps(a.m128);
+}
+__forceinline const ssef rsqrt(const ssef &a)
+{
+  const ssef r = _mm_rsqrt_ps(a.m128);
+  return _mm_add_ps(
+      _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r),
+      _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r),
+                 _mm_mul_ps(r, r)));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const ssef operator+(const ssef &a, const ssef &b)
+{
+  return _mm_add_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator+(const ssef &a, const float &b)
+{
+  return a + ssef(b);
+}
+__forceinline const ssef operator+(const float &a, const ssef &b)
+{
+  return ssef(a) + b;
+}
+
+__forceinline const ssef operator-(const ssef &a, const ssef &b)
+{
+  return _mm_sub_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator-(const ssef &a, const float &b)
+{
+  return a - ssef(b);
+}
+__forceinline const ssef operator-(const float &a, const ssef &b)
+{
+  return ssef(a) - b;
+}
+
+__forceinline const ssef operator*(const ssef &a, const ssef &b)
+{
+  return _mm_mul_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator*(const ssef &a, const float &b)
+{
+  return a * ssef(b);
+}
+__forceinline const ssef operator*(const float &a, const ssef &b)
+{
+  return ssef(a) * b;
+}
+
+__forceinline const ssef operator/(const ssef &a, const ssef &b)
+{
+  return _mm_div_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator/(const ssef &a, const float &b)
+{
+  return a / ssef(b);
+}
+__forceinline const ssef operator/(const float &a, const ssef &b)
+{
+  return ssef(a) / b;
+}
+
+__forceinline const ssef operator^(const ssef &a, const ssef &b)
+{
+  return _mm_xor_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator^(const ssef &a, const ssei &b)
+{
+  return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128));
+}
+
+__forceinline const ssef operator&(const ssef &a, const ssef &b)
+{
+  return _mm_and_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator&(const ssef &a, const ssei &b)
+{
+  return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128));
+}
+
+__forceinline const ssef operator|(const ssef &a, const ssef &b)
+{
+  return _mm_or_ps(a.m128, b.m128);
+}
+__forceinline const ssef operator|(const ssef &a, const ssei &b)
+{
+  return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128));
+}
+
+__forceinline const ssef andnot(const ssef &a, const ssef &b)
+{
+  return _mm_andnot_ps(a.m128, b.m128);
+}
+
+__forceinline const ssef min(const ssef &a, const ssef &b)
+{
+  return _mm_min_ps(a.m128, b.m128);
+}
+__forceinline const ssef min(const ssef &a, const float &b)
+{
+  return _mm_min_ps(a.m128, ssef(b));
+}
+__forceinline const ssef min(const float &a, const ssef &b)
+{
+  return _mm_min_ps(ssef(a), b.m128);
+}
+
+__forceinline const ssef max(const ssef &a, const ssef &b)
+{
+  return _mm_max_ps(a.m128, b.m128);
+}
+__forceinline const ssef max(const ssef &a, const float &b)
+{
+  return _mm_max_ps(a.m128, ssef(b));
+}
+__forceinline const ssef max(const float &a, const ssef &b)
+{
+  return _mm_max_ps(ssef(a), b.m128);
+}
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline ssef mini(const ssef &a, const ssef &b)
+{
+  const ssei ai = _mm_castps_si128(a);
+  const ssei bi = _mm_castps_si128(b);
+  const ssei ci = _mm_min_epi32(ai, bi);
+  return _mm_castsi128_ps(ci);
+}
+#  endif
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline ssef maxi(const ssef &a, const ssef &b)
+{
+  const ssei ai = _mm_castps_si128(a);
+  const ssei bi = _mm_castps_si128(b);
+  const ssei ci = _mm_max_epi32(ai, bi);
+  return _mm_castsi128_ps(ci);
+}
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Ternary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c)
+{
+#  if defined(__KERNEL_NEON__)
+  return vfmaq_f32(c, a, b);
+#  elif defined(__KERNEL_AVX2__)
+  return _mm_fmadd_ps(a, b, c);
+#  else
+  return a * b + c;
+#  endif
+}
+__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c)
+{
+#  if defined(__KERNEL_NEON__)
+  return vfmaq_f32(vnegq_f32(c), a, b);
+#  elif defined(__KERNEL_AVX2__)
+  return _mm_fmsub_ps(a, b, c);
+#  else
+  return a * b - c;
+#  endif
+}
+__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c)
+{
+#  if defined(__KERNEL_NEON__)
+  return vfmsq_f32(c, a, b);
+#  elif defined(__KERNEL_AVX2__)
+  return _mm_fnmadd_ps(a, b, c);
+#  else
+  return c - a * b;
+#  endif
+}
+__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c)
+{
+#  if defined(__KERNEL_NEON__)
+  return vfmsq_f32(vnegq_f32(c), a, b);
+#  elif defined(__KERNEL_AVX2__)
+  return _mm_fnmsub_ps(a, b, c);
+#  else
+  return -a * b - c;
+#  endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssef &operator+=(ssef &a, const ssef &b)
+{
+  return a = a + b;
+}
+__forceinline ssef &operator+=(ssef &a, const float &b)
+{
+  return a = a + b;
+}
+
+__forceinline ssef &operator-=(ssef &a, const ssef &b)
+{
+  return a = a - b;
+}
+__forceinline ssef &operator-=(ssef &a, const float &b)
+{
+  return a = a - b;
+}
+
+__forceinline ssef &operator*=(ssef &a, const ssef &b)
+{
+  return a = a * b;
+}
+__forceinline ssef &operator*=(ssef &a, const float &b)
+{
+  return a = a * b;
+}
+
+__forceinline ssef &operator/=(ssef &a, const ssef &b)
+{
+  return a = a / b;
+}
+__forceinline ssef &operator/=(ssef &a, const float &b)
+{
+  return a = a / b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator==(const ssef &a, const ssef &b)
+{
+  return _mm_cmpeq_ps(a.m128, b.m128);
+}
+__forceinline const sseb operator==(const ssef &a, const float &b)
+{
+  return a == ssef(b);
+}
+__forceinline const sseb operator==(const float &a, const ssef &b)
+{
+  return ssef(a) == b;
+}
+
+__forceinline const sseb operator!=(const ssef &a, const ssef &b)
+{
+  return _mm_cmpneq_ps(a.m128, b.m128);
+}
+__forceinline const sseb operator!=(const ssef &a, const float &b)
+{
+  return a != ssef(b);
+}
+__forceinline const sseb operator!=(const float &a, const ssef &b)
+{
+  return ssef(a) != b;
+}
+
+__forceinline const sseb operator<(const ssef &a, const ssef &b)
+{
+  return _mm_cmplt_ps(a.m128, b.m128);
+}
+__forceinline const sseb operator<(const ssef &a, const float &b)
+{
+  return a < ssef(b);
+}
+__forceinline const sseb operator<(const float &a, const ssef &b)
+{
+  return ssef(a) < b;
+}
+
+__forceinline const sseb operator>=(const ssef &a, const ssef &b)
+{
+  return _mm_cmpnlt_ps(a.m128, b.m128);
+}
+__forceinline const sseb operator>=(const ssef &a, const float &b)
+{
+  return a >= ssef(b);
+}
+__forceinline const sseb operator>=(const float &a, const ssef &b)
+{
+  return ssef(a) >= b;
+}
+
+__forceinline const sseb operator>(const ssef &a, const ssef &b)
+{
+  return _mm_cmpnle_ps(a.m128, b.m128);
+}
+__forceinline const sseb operator>(const ssef &a, const float &b)
+{
+  return a > ssef(b);
+}
+__forceinline const sseb operator>(const float &a, const ssef &b)
+{
+  return ssef(a) > b;
+}
+
+__forceinline const sseb operator<=(const ssef &a, const ssef &b)
+{
+  return _mm_cmple_ps(a.m128, b.m128);
+}
+__forceinline const sseb operator<=(const ssef &a, const float &b)
+{
+  return a <= ssef(b);
+}
+__forceinline const sseb operator<=(const float &a, const ssef &b)
+{
+  return ssef(a) <= b;
+}
+
+__forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f)
+{
+#  ifdef __KERNEL_SSE41__
+  return _mm_blendv_ps(f, t, m);
+#  else
+  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
+#  endif
+}
+
+__forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f)
+{
+#  ifdef __KERNEL_SSE41__
+  return _mm_blendv_ps(f, t, m);
+#  else
+  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
+#  endif
+}
+
+__forceinline const ssef select(const int mask, const ssef &t, const ssef &f)
+{
+#  if defined(__KERNEL_SSE41__) && \
+      ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
+  return _mm_blend_ps(f, t, mask);
+#  else
+  return select(sseb(mask), t, f);
+#  endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Rounding Functions
+////////////////////////////////////////////////////////////////////////////////
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline const ssef round_even(const ssef &a)
+{
+#    ifdef __KERNEL_NEON__
+  return vrndnq_f32(a);
+#    else
+  return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT);
+#    endif
+}
+__forceinline const ssef round_down(const ssef &a)
+{
+#    ifdef __KERNEL_NEON__
+  return vrndmq_f32(a);
+#    else
+  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
+#    endif
+}
+__forceinline const ssef round_up(const ssef &a)
+{
+#    ifdef __KERNEL_NEON__
+  return vrndpq_f32(a);
+#    else
+  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
+#    endif
+}
+__forceinline const ssef round_zero(const ssef &a)
+{
+#    ifdef __KERNEL_NEON__
+  return vrndq_f32(a);
+#    else
+  return _mm_round_ps(a, _MM_FROUND_TO_ZERO);
+#    endif
+}
+__forceinline const ssef floor(const ssef &a)
+{
+#    ifdef __KERNEL_NEON__
+  return vrndnq_f32(a);
+#    else
+  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
+#    endif
+}
+__forceinline const ssef ceil(const ssef &a)
+{
+#    ifdef __KERNEL_NEON__
+  return vrndpq_f32(a);
+#    else
+  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
+#    endif
+}
+#  endif
+
+__forceinline ssei truncatei(const ssef &a)
+{
+  return _mm_cvttps_epi32(a.m128);
+}
+
+/* This is about 25% faster than straightforward floor to integer conversion
+ * due to better pipelining.
+ *
+ * Unsaturated add 0xffffffff (a < 0) is the same as subtract -1.
+ */
+__forceinline ssei floori(const ssef &a)
+{
+  return truncatei(a) + cast((a < 0.0f).m128);
+}
+
+__forceinline ssef floorfrac(const ssef &x, ssei *i)
+{
+  *i = floori(x);
+  return x - ssef(*i);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Common Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t)
+{
+  return madd(t, b, (ssef(1.0f) - t) * a);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssef unpacklo(const ssef &a, const ssef &b)
+{
+  return _mm_unpacklo_ps(a.m128, b.m128);
+}
+__forceinline ssef unpackhi(const ssef &a, const ssef &b)
+{
+  return _mm_unpackhi_ps(a.m128, b.m128);
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const ssef shuffle(const ssef &b)
+{
+#  ifdef __KERNEL_NEON__
+  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128);
+#  else
+  return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
+#  endif
+}
+
+template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a)
+{
+  return _mm_movelh_ps(a, a);
+}
+
+template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a)
+{
+  return _mm_movehl_ps(a, a);
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const ssef shuffle(const ssef &a, const ssef &b)
+{
+#  ifdef __KERNEL_NEON__
+  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(a, b);
+#  else
+  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
+#  endif
+}
+
+template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
+{
+#  ifdef __KERNEL_NEON__
+  return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b);
+#  else
+  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
+#  endif
+}
+
+#  ifndef __KERNEL_NEON__
+template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b)
+{
+  return _mm_movelh_ps(a, b);
+}
+
+template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b)
+{
+  return _mm_movehl_ps(b, a);
+}
+#  endif
+
+#  if defined(__KERNEL_SSSE3__)
+__forceinline const ssef shuffle8(const ssef &a, const ssei &shuf)
+{
+  return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf));
+}
+#  endif
+
+#  if defined(__KERNEL_SSE3__)
+template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b)
+{
+  return _mm_moveldup_ps(b);
+}
+template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b)
+{
+  return _mm_movehdup_ps(b);
+}
+#  endif
+
+template<size_t i0> __forceinline const ssef shuffle(const ssef &b)
+{
+  return shuffle<i0, i0, i0, i0>(b);
+}
+
+#  if defined(__KERNEL_AVX__)
+__forceinline const ssef shuffle(const ssef &a, const ssei &shuf)
+{
+  return _mm_permutevar_ps(a, shuf);
+}
+#  endif
+
+template<size_t i> __forceinline float extract(const ssef &a)
+{
+  return _mm_cvtss_f32(shuffle<i, i, i, i>(a));
+}
+template<> __forceinline float extract<0>(const ssef &a)
+{
+  return _mm_cvtss_f32(a);
+}
+
+#  if defined(__KERNEL_SSE41__)
+template<size_t dst, size_t src, size_t clr>
+__forceinline const ssef insert(const ssef &a, const ssef &b)
+{
+#    ifdef __KERNEL_NEON__
+  ssef res = a;
+  if (clr)
+    res[dst] = 0;
+  else
+    res[dst] = b[src];
+  return res;
+#    else
+  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
+#    endif
+}
+template<size_t dst, size_t src> __forceinline const ssef insert(const ssef &a, const ssef &b)
+{
+  return insert<dst, src, 0>(a, b);
+}
+template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
+{
+  return insert<dst, 0>(a, _mm_set_ss(b));
+}
+#  else
+template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
+{
+  ssef c = a;
+  c[dst] = b;
+  return c;
+}
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Transpose
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline void transpose(const ssef &r0,
+                             const ssef &r1,
+                             const ssef &r2,
+                             const ssef &r3,
+                             ssef &c0,
+                             ssef &c1,
+                             ssef &c2,
+                             ssef &c3)
+{
+  ssef l02 = unpacklo(r0, r2);
+  ssef h02 = unpackhi(r0, r2);
+  ssef l13 = unpacklo(r1, r3);
+  ssef h13 = unpackhi(r1, r3);
+  c0 = unpacklo(l02, l13);
+  c1 = unpackhi(l02, l13);
+  c2 = unpacklo(h02, h13);
+  c3 = unpackhi(h02, h13);
+}
+
+__forceinline void transpose(
+    const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2)
+{
+  ssef l02 = unpacklo(r0, r2);
+  ssef h02 = unpackhi(r0, r2);
+  ssef l13 = unpacklo(r1, r3);
+  ssef h13 = unpackhi(r1, r3);
+  c0 = unpacklo(l02, l13);
+  c1 = unpackhi(l02, l13);
+  c2 = unpacklo(h02, h13);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reductions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const ssef vreduce_min(const ssef &v)
+{
+#  ifdef __KERNEL_NEON__
+  return vdupq_n_f32(vminvq_f32(v));
+#  else
+  ssef h = min(shuffle<1, 0, 3, 2>(v), v);
+  return min(shuffle<2, 3, 0, 1>(h), h);
+#  endif
+}
+__forceinline const ssef vreduce_max(const ssef &v)
+{
+#  ifdef __KERNEL_NEON__
+  return vdupq_n_f32(vmaxvq_f32(v));
+#  else
+  ssef h = max(shuffle<1, 0, 3, 2>(v), v);
+  return max(shuffle<2, 3, 0, 1>(h), h);
+#  endif
+}
+__forceinline const ssef vreduce_add(const ssef &v)
+{
+#  ifdef __KERNEL_NEON__
+  return vdupq_n_f32(vaddvq_f32(v));
+#  else
+  ssef h = shuffle<1, 0, 3, 2>(v) + v;
+  return shuffle<2, 3, 0, 1>(h) + h;
+#  endif
+}
+
+__forceinline float reduce_min(const ssef &v)
+{
+#  ifdef __KERNEL_NEON__
+  return vminvq_f32(v);
+#  else
+  return _mm_cvtss_f32(vreduce_min(v));
+#  endif
+}
+__forceinline float reduce_max(const ssef &v)
+{
+#  ifdef __KERNEL_NEON__
+  return vmaxvq_f32(v);
+#  else
+  return _mm_cvtss_f32(vreduce_max(v));
+#  endif
+}
+__forceinline float reduce_add(const ssef &v)
+{
+#  ifdef __KERNEL_NEON__
+  return vaddvq_f32(v);
+#  else
+  return _mm_cvtss_f32(vreduce_add(v));
+#  endif
+}
+
+__forceinline uint32_t select_min(const ssef &v)
+{
+  return __bsf(movemask(v == vreduce_min(v)));
+}
+__forceinline uint32_t select_max(const ssef &v)
+{
+  return __bsf(movemask(v == vreduce_max(v)));
+}
+
+__forceinline uint32_t select_min(const sseb &valid, const ssef &v)
+{
+  const ssef a = select(valid, v, ssef(pos_inf));
+  return __bsf(movemask(valid & (a == vreduce_min(a))));
+}
+__forceinline uint32_t select_max(const sseb &valid, const ssef &v)
+{
+  const ssef a = select(valid, v, ssef(neg_inf));
+  return __bsf(movemask(valid & (a == vreduce_max(a))));
+}
+
+__forceinline uint32_t movemask(const ssef &a)
+{
+  return _mm_movemask_ps(a);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Memory load and store operations
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssef load4f(const float4 &a)
+{
+#  ifdef __KERNEL_WITH_SSE_ALIGN__
+  return _mm_load_ps(&a.x);
+#  else
+  return _mm_loadu_ps(&a.x);
+#  endif
+}
+
+__forceinline ssef load4f(const float3 &a)
+{
+#  ifdef __KERNEL_WITH_SSE_ALIGN__
+  return _mm_load_ps(&a.x);
+#  else
+  return _mm_loadu_ps(&a.x);
+#  endif
+}
+
+__forceinline ssef load4f(const void *const a)
+{
+  return _mm_load_ps((float *)a);
+}
+
+__forceinline ssef load1f_first(const float a)
+{
+  return _mm_set_ss(a);
+}
+
+__forceinline void store4f(void *ptr, const ssef &v)
+{
+  _mm_store_ps((float *)ptr, v);
+}
+
+__forceinline ssef loadu4f(const void *const a)
+{
+  return _mm_loadu_ps((float *)a);
+}
+
+__forceinline void storeu4f(void *ptr, const ssef &v)
+{
+  _mm_storeu_ps((float *)ptr, v);
+}
+
+__forceinline void store4f(const sseb &mask, void *ptr, const ssef &f)
+{
+#  if defined(__KERNEL_AVX__)
+  _mm_maskstore_ps((float *)ptr, (__m128i)mask, f);
+#  else
+  *(ssef *)ptr = select(mask, f, *(ssef *)ptr);
+#  endif
+}
+
+__forceinline ssef load4f_nt(void *ptr)
+{
+#  if defined(__KERNEL_SSE41__)
+  return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr));
+#  else
+  return _mm_load_ps((float *)ptr);
+#  endif
+}
+
+__forceinline void store4f_nt(void *ptr, const ssef &v)
+{
+#  if defined(__KERNEL_SSE41__)
+  _mm_stream_ps((float *)ptr, v);
+#  else
+  _mm_store_ps((float *)ptr, v);
+#  endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Euclidian Space Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline float dot(const ssef &a, const ssef &b)
+{
+  return reduce_add(a * b);
+}
+
+/* calculate shuffled cross product, useful when order of components does not matter */
+__forceinline ssef cross_zxy(const ssef &a, const ssef &b)
+{
+  const ssef a0 = a;
+  const ssef b0 = shuffle<1, 2, 0, 3>(b);
+  const ssef a1 = shuffle<1, 2, 0, 3>(a);
+  const ssef b1 = b;
+  return msub(a0, b0, a1 * b1);
+}
+
+__forceinline ssef cross(const ssef &a, const ssef &b)
+{
+  return shuffle<1, 2, 0, 3>(cross_zxy(a, b));
+}
+
+ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b)
+{
+#  ifdef __KERNEL_SSE41__
+  return _mm_dp_ps(a.m128, b.m128, 0x7f);
+#  else
+  ssef t = a * b;
+  return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]);
+#  endif
+}
+
+/* squared length taking only specified axes into account */
+template<size_t X, size_t Y, size_t Z, size_t W> ccl_device_inline float len_squared(const ssef &a)
+{
+#  ifndef __KERNEL_SSE41__
+  float4 &t = (float4 &)a;
+  return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) +
+         (W ? t.w * t.w : 0.0f);
+#  else
+  return extract<0>(
+      ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf)));
+#  endif
+}
+
+ccl_device_inline float dot3(const ssef &a, const ssef &b)
+{
+#  ifdef __KERNEL_SSE41__
+  return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f)));
+#  else
+  ssef t = a * b;
+  return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
+#  endif
+}
+
+ccl_device_inline const ssef len3_squared_splat(const ssef &a)
+{
+  return dot3_splat(a, a);
+}
+
+ccl_device_inline float len3_squared(const ssef &a)
+{
+  return dot3(a, a);
+}
+
+ccl_device_inline float len3(const ssef &a)
+{
+  return extract<0>(mm_sqrt(dot3_splat(a, a)));
+}
+
+/* SSE shuffle utility functions */
+
+#  ifdef __KERNEL_SSSE3__
+
+/* faster version for SSSE3 */
+typedef ssei shuffle_swap_t;
+
+ccl_device_inline shuffle_swap_t shuffle_swap_identity()
+{
+  return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+}
+
+ccl_device_inline shuffle_swap_t shuffle_swap_swap()
+{
+  return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
+}
+
+ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf)
+{
+  return cast(_mm_shuffle_epi8(cast(a), shuf));
+}
+
+#  else
+
+/* somewhat slower version for SSE2 */
+typedef int shuffle_swap_t;
+
+ccl_device_inline shuffle_swap_t shuffle_swap_identity()
+{
+  return 0;
+}
+
+ccl_device_inline shuffle_swap_t shuffle_swap_swap()
+{
+  return 1;
+}
+
+ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf)
+{
+  /* shuffle value must be a constant, so we need to branch */
+  if (shuf)
+    return shuffle<1, 0, 3, 2>(a);
+  else
+    return shuffle<3, 2, 1, 0>(a);
+}
+
+#  endif
+
+#  if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__)
+
+ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
+                                          const shuffle_swap_t &shuf_identity,
+                                          const shuffle_swap_t &shuf_swap,
+                                          const float3 &idir,
+                                          ssef idirsplat[3],
+                                          shuffle_swap_t shufflexyz[3])
+{
+  const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)};
+  idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn);
+  idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn);
+  idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn);
+
+  const ssef signmask = cast(ssei(0x80000000));
+  const ssef shuf_identity_f = cast(shuf_identity);
+  const ssef shuf_swap_f = cast(shuf_swap);
+
+  shufflexyz[0] = _mm_castps_si128(
+      _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask)));
+  shufflexyz[1] = _mm_castps_si128(
+      _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask)));
+  shufflexyz[2] = _mm_castps_si128(
+      _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask)));
+}
+
+#  else
+
+ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
+                                          const shuffle_swap_t &shuf_identity,
+                                          const shuffle_swap_t &shuf_swap,
+                                          const float3 &idir,
+                                          ssef idirsplat[3],
+                                          shuffle_swap_t shufflexyz[3])
+{
+  idirsplat[0] = ssef(idir.x) ^ pn;
+  idirsplat[1] = ssef(idir.y) ^ pn;
+  idirsplat[2] = ssef(idir.z) ^ pn;
+
+  shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap;
+  shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap;
+  shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap;
+}
+
+#  endif
+
+ccl_device_inline const ssef uint32_to_float(const ssei &in)
+{
+  ssei a = _mm_srli_epi32(in, 16);
+  ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff));
+  ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000));
+  ssef d = _mm_cvtepi32_ps(b);
+  ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000)));
+  return _mm_add_ps(e, d);
+}
+
+template<size_t S1, size_t S2, size_t S3, size_t S4>
+ccl_device_inline const ssef set_sign_bit(const ssef &a)
+{
+  return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_ssef(const char *label, const ssef &a)
+{
+  printf(
+      "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]);
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/ssei.h b/intern/cycles/util/ssei.h
new file mode 100644
index 00000000000..94412fb77e7
--- /dev/null
+++ b/intern/cycles/util/ssei.h
@@ -0,0 +1,646 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SSEI_H__
+#define __UTIL_SSEI_H__
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __KERNEL_SSE2__
+
+struct sseb;
+struct ssef;
+
+/*! 4-wide SSE integer type. */
+struct ssei {
+  typedef sseb Mask;   // mask type
+  typedef ssei Int;    // int type
+  typedef ssef Float;  // float type
+
+  enum { size = 4 };  // number of SIMD elements
+  union {
+    __m128i m128;
+    int32_t i[4];
+  };  // data
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Constructors, Assignment & Cast Operators
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline ssei()
+  {
+  }
+  __forceinline ssei(const ssei &a)
+  {
+    m128 = a.m128;
+  }
+  __forceinline ssei &operator=(const ssei &a)
+  {
+    m128 = a.m128;
+    return *this;
+  }
+
+  __forceinline ssei(const __m128i a) : m128(a)
+  {
+  }
+  __forceinline operator const __m128i &(void) const
+  {
+    return m128;
+  }
+  __forceinline operator __m128i &(void)
+  {
+    return m128;
+  }
+
+  __forceinline ssei(const int a) : m128(_mm_set1_epi32(a))
+  {
+  }
+  __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d))
+  {
+  }
+
+  __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a))
+  {
+  }
+
+  ////////////////////////////////////////////////////////////////////////////////
+  /// Array Access
+  ////////////////////////////////////////////////////////////////////////////////
+
+  __forceinline const int32_t &operator[](const size_t index) const
+  {
+    assert(index < 4);
+    return i[index];
+  }
+  __forceinline int32_t &operator[](const size_t index)
+  {
+    assert(index < 4);
+    return i[index];
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const ssei cast(const __m128 &a)
+{
+  return _mm_castps_si128(a);
+}
+__forceinline const ssei operator+(const ssei &a)
+{
+  return a;
+}
+__forceinline const ssei operator-(const ssei &a)
+{
+  return _mm_sub_epi32(_mm_setzero_si128(), a.m128);
+}
+#  if defined(__KERNEL_SSSE3__)
+__forceinline const ssei abs(const ssei &a)
+{
+  return _mm_abs_epi32(a.m128);
+}
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const ssei operator+(const ssei &a, const ssei &b)
+{
+  return _mm_add_epi32(a.m128, b.m128);
+}
+__forceinline const ssei operator+(const ssei &a, const int32_t &b)
+{
+  return a + ssei(b);
+}
+__forceinline const ssei operator+(const int32_t &a, const ssei &b)
+{
+  return ssei(a) + b;
+}
+
+__forceinline const ssei operator-(const ssei &a, const ssei &b)
+{
+  return _mm_sub_epi32(a.m128, b.m128);
+}
+__forceinline const ssei operator-(const ssei &a, const int32_t &b)
+{
+  return a - ssei(b);
+}
+__forceinline const ssei operator-(const int32_t &a, const ssei &b)
+{
+  return ssei(a) - b;
+}
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline const ssei operator*(const ssei &a, const ssei &b)
+{
+  return _mm_mullo_epi32(a.m128, b.m128);
+}
+__forceinline const ssei operator*(const ssei &a, const int32_t &b)
+{
+  return a * ssei(b);
+}
+__forceinline const ssei operator*(const int32_t &a, const ssei &b)
+{
+  return ssei(a) * b;
+}
+#  endif
+
+__forceinline const ssei operator&(const ssei &a, const ssei &b)
+{
+  return _mm_and_si128(a.m128, b.m128);
+}
+__forceinline const ssei operator&(const ssei &a, const int32_t &b)
+{
+  return a & ssei(b);
+}
+__forceinline const ssei operator&(const int32_t &a, const ssei &b)
+{
+  return ssei(a) & b;
+}
+
+__forceinline const ssei operator|(const ssei &a, const ssei &b)
+{
+  return _mm_or_si128(a.m128, b.m128);
+}
+__forceinline const ssei operator|(const ssei &a, const int32_t &b)
+{
+  return a | ssei(b);
+}
+__forceinline const ssei operator|(const int32_t &a, const ssei &b)
+{
+  return ssei(a) | b;
+}
+
+__forceinline const ssei operator^(const ssei &a, const ssei &b)
+{
+  return _mm_xor_si128(a.m128, b.m128);
+}
+__forceinline const ssei operator^(const ssei &a, const int32_t &b)
+{
+  return a ^ ssei(b);
+}
+__forceinline const ssei operator^(const int32_t &a, const ssei &b)
+{
+  return ssei(a) ^ b;
+}
+
+__forceinline const ssei operator<<(const ssei &a, const int32_t &n)
+{
+  return _mm_slli_epi32(a.m128, n);
+}
+__forceinline const ssei operator>>(const ssei &a, const int32_t &n)
+{
+  return _mm_srai_epi32(a.m128, n);
+}
+
+__forceinline const ssei andnot(const ssei &a, const ssei &b)
+{
+  return _mm_andnot_si128(a.m128, b.m128);
+}
+__forceinline const ssei andnot(const sseb &a, const ssei &b)
+{
+  return _mm_andnot_si128(cast(a.m128), b.m128);
+}
+__forceinline const ssei andnot(const ssei &a, const sseb &b)
+{
+  return _mm_andnot_si128(a.m128, cast(b.m128));
+}
+
+__forceinline const ssei sra(const ssei &a, const int32_t &b)
+{
+  return _mm_srai_epi32(a.m128, b);
+}
+__forceinline const ssei srl(const ssei &a, const int32_t &b)
+{
+  return _mm_srli_epi32(a.m128, b);
+}
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline const ssei min(const ssei &a, const ssei &b)
+{
+  return _mm_min_epi32(a.m128, b.m128);
+}
+__forceinline const ssei min(const ssei &a, const int32_t &b)
+{
+  return min(a, ssei(b));
+}
+__forceinline const ssei min(const int32_t &a, const ssei &b)
+{
+  return min(ssei(a), b);
+}
+
+__forceinline const ssei max(const ssei &a, const ssei &b)
+{
+  return _mm_max_epi32(a.m128, b.m128);
+}
+__forceinline const ssei max(const ssei &a, const int32_t &b)
+{
+  return max(a, ssei(b));
+}
+__forceinline const ssei max(const int32_t &a, const ssei &b)
+{
+  return max(ssei(a), b);
+}
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssei &operator+=(ssei &a, const ssei &b)
+{
+  return a = a + b;
+}
+__forceinline ssei &operator+=(ssei &a, const int32_t &b)
+{
+  return a = a + b;
+}
+
+__forceinline ssei &operator-=(ssei &a, const ssei &b)
+{
+  return a = a - b;
+}
+__forceinline ssei &operator-=(ssei &a, const int32_t &b)
+{
+  return a = a - b;
+}
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline ssei &operator*=(ssei &a, const ssei &b)
+{
+  return a = a * b;
+}
+__forceinline ssei &operator*=(ssei &a, const int32_t &b)
+{
+  return a = a * b;
+}
+#  endif
+
+__forceinline ssei &operator&=(ssei &a, const ssei &b)
+{
+  return a = a & b;
+}
+__forceinline ssei &operator&=(ssei &a, const int32_t &b)
+{
+  return a = a & b;
+}
+
+__forceinline ssei &operator|=(ssei &a, const ssei &b)
+{
+  return a = a | b;
+}
+__forceinline ssei &operator|=(ssei &a, const int32_t &b)
+{
+  return a = a | b;
+}
+
+__forceinline ssei &operator^=(ssei &a, const ssei &b)
+{
+  return a = a ^ b;
+}
+__forceinline ssei &operator^=(ssei &a, const int32_t &b)
+{
+  return a = a ^ b;
+}
+
+__forceinline ssei &operator<<=(ssei &a, const int32_t &b)
+{
+  return a = a << b;
+}
+__forceinline ssei &operator>>=(ssei &a, const int32_t &b)
+{
+  return a = a >> b;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const sseb operator==(const ssei &a, const ssei &b)
+{
+  return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128));
+}
+__forceinline const sseb operator==(const ssei &a, const int32_t &b)
+{
+  return a == ssei(b);
+}
+__forceinline const sseb operator==(const int32_t &a, const ssei &b)
+{
+  return ssei(a) == b;
+}
+
+__forceinline const sseb operator!=(const ssei &a, const ssei &b)
+{
+  return !(a == b);
+}
+__forceinline const sseb operator!=(const ssei &a, const int32_t &b)
+{
+  return a != ssei(b);
+}
+__forceinline const sseb operator!=(const int32_t &a, const ssei &b)
+{
+  return ssei(a) != b;
+}
+
+__forceinline const sseb operator<(const ssei &a, const ssei &b)
+{
+  return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128));
+}
+__forceinline const sseb operator<(const ssei &a, const int32_t &b)
+{
+  return a < ssei(b);
+}
+__forceinline const sseb operator<(const int32_t &a, const ssei &b)
+{
+  return ssei(a) < b;
+}
+
+__forceinline const sseb operator>=(const ssei &a, const ssei &b)
+{
+  return !(a < b);
+}
+__forceinline const sseb operator>=(const ssei &a, const int32_t &b)
+{
+  return a >= ssei(b);
+}
+__forceinline const sseb operator>=(const int32_t &a, const ssei &b)
+{
+  return ssei(a) >= b;
+}
+
+__forceinline const sseb operator>(const ssei &a, const ssei &b)
+{
+  return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128));
+}
+__forceinline const sseb operator>(const ssei &a, const int32_t &b)
+{
+  return a > ssei(b);
+}
+__forceinline const sseb operator>(const int32_t &a, const ssei &b)
+{
+  return ssei(a) > b;
+}
+
+__forceinline const sseb operator<=(const ssei &a, const ssei &b)
+{
+  return !(a > b);
+}
+__forceinline const sseb operator<=(const ssei &a, const int32_t &b)
+{
+  return a <= ssei(b);
+}
+__forceinline const sseb operator<=(const int32_t &a, const ssei &b)
+{
+  return ssei(a) <= b;
+}
+
+__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f)
+{
+#  ifdef __KERNEL_SSE41__
+  return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
+#  else
+  return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f));
+#  endif
+}
+
+__forceinline const ssei select(const int mask, const ssei &t, const ssei &f)
+{
+#  if defined(__KERNEL_SSE41__) && \
+      ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
+  return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask));
+#  else
+  return select(sseb(mask), t, f);
+#  endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssei unpacklo(const ssei &a, const ssei &b)
+{
+  return _mm_unpacklo_epi32(a, b);
+}
+__forceinline ssei unpackhi(const ssei &a, const ssei &b)
+{
+  return _mm_unpackhi_epi32(a, b);
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const ssei shuffle(const ssei &a)
+{
+#  ifdef __KERNEL_NEON__
+  int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
+  return vreinterpretq_m128i_s32(result);
+#  else
+  return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
+#  endif
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3>
+__forceinline const ssei shuffle(const ssei &a, const ssei &b)
+{
+#  ifdef __KERNEL_NEON__
+  int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
+                                                             vreinterpretq_s32_m128i(b));
+  return vreinterpretq_m128i_s32(result);
+#  else
+  return _mm_castps_si128(
+      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
+#  endif
+}
+
+template<size_t i0> __forceinline const ssei shuffle(const ssei &b)
+{
+  return shuffle<i0, i0, i0, i0>(b);
+}
+
+#  if defined(__KERNEL_SSE41__)
+template<size_t src> __forceinline int extract(const ssei &b)
+{
+  return _mm_extract_epi32(b, src);
+}
+template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b)
+{
+  return _mm_insert_epi32(a, b, dst);
+}
+#  else
+template<size_t src> __forceinline int extract(const ssei &b)
+{
+  return b[src];
+}
+template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b)
+{
+  ssei c = a;
+  c[dst] = b;
+  return c;
+}
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reductions
+////////////////////////////////////////////////////////////////////////////////
+
+#  if defined(__KERNEL_SSE41__)
+__forceinline const ssei vreduce_min(const ssei &v)
+{
+  ssei h = min(shuffle<1, 0, 3, 2>(v), v);
+  return min(shuffle<2, 3, 0, 1>(h), h);
+}
+__forceinline const ssei vreduce_max(const ssei &v)
+{
+  ssei h = max(shuffle<1, 0, 3, 2>(v), v);
+  return max(shuffle<2, 3, 0, 1>(h), h);
+}
+__forceinline const ssei vreduce_add(const ssei &v)
+{
+  ssei h = shuffle<1, 0, 3, 2>(v) + v;
+  return shuffle<2, 3, 0, 1>(h) + h;
+}
+
+__forceinline int reduce_min(const ssei &v)
+{
+#    ifdef __KERNEL_NEON__
+  return vminvq_s32(vreinterpretq_s32_m128i(v));
+#    else
+  return extract<0>(vreduce_min(v));
+#    endif
+}
+__forceinline int reduce_max(const ssei &v)
+{
+#    ifdef __KERNEL_NEON__
+  return vmaxvq_s32(vreinterpretq_s32_m128i(v));
+#    else
+  return extract<0>(vreduce_max(v));
+#    endif
+}
+__forceinline int reduce_add(const ssei &v)
+{
+#    ifdef __KERNEL_NEON__
+  return vaddvq_s32(vreinterpretq_s32_m128i(v));
+#    else
+  return extract<0>(vreduce_add(v));
+#    endif
+}
+
+__forceinline uint32_t select_min(const ssei &v)
+{
+  return __bsf(movemask(v == vreduce_min(v)));
+}
+__forceinline uint32_t select_max(const ssei &v)
+{
+  return __bsf(movemask(v == vreduce_max(v)));
+}
+
+__forceinline uint32_t select_min(const sseb &valid, const ssei &v)
+{
+  const ssei a = select(valid, v, ssei((int)pos_inf));
+  return __bsf(movemask(valid & (a == vreduce_min(a))));
+}
+__forceinline uint32_t select_max(const sseb &valid, const ssei &v)
+{
+  const ssei a = select(valid, v, ssei((int)neg_inf));
+  return __bsf(movemask(valid & (a == vreduce_max(a))));
+}
+
+#  else
+
+__forceinline int ssei_min(int a, int b)
+{
+  return (a < b) ? a : b;
+}
+__forceinline int ssei_max(int a, int b)
+{
+  return (a > b) ? a : b;
+}
+__forceinline int reduce_min(const ssei &v)
+{
+  return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3]));
+}
+__forceinline int reduce_max(const ssei &v)
+{
+  return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3]));
+}
+__forceinline int reduce_add(const ssei &v)
+{
+  return v[0] + v[1] + v[2] + v[3];
+}
+
+#  endif
+
+////////////////////////////////////////////////////////////////////////////////
+/// Memory load and store operations
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline ssei load4i(const void *const a)
+{
+  return _mm_load_si128((__m128i *)a);
+}
+
+__forceinline void store4i(void *ptr, const ssei &v)
+{
+  _mm_store_si128((__m128i *)ptr, v);
+}
+
+__forceinline void storeu4i(void *ptr, const ssei &v)
+{
+  _mm_storeu_si128((__m128i *)ptr, v);
+}
+
+__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i)
+{
+#  if defined(__KERNEL_AVX__)
+  _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i));
+#  else
+  *(ssei *)ptr = select(mask, i, *(ssei *)ptr);
+#  endif
+}
+
+__forceinline ssei load4i_nt(void *ptr)
+{
+#  if defined(__KERNEL_SSE41__)
+  return _mm_stream_load_si128((__m128i *)ptr);
+#  else
+  return _mm_load_si128((__m128i *)ptr);
+#  endif
+}
+
+__forceinline void store4i_nt(void *ptr, const ssei &v)
+{
+#  if defined(__KERNEL_SSE41__)
+  _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v));
+#  else
+  _mm_store_si128((__m128i *)ptr, v);
+#  endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_ssei(const char *label, const ssei &a)
+{
+  printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]);
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/stack_allocator.h b/intern/cycles/util/stack_allocator.h
new file mode 100644
index 00000000000..ef31c0fe5e2
--- /dev/null
+++ b/intern/cycles/util/stack_allocator.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_STACK_ALLOCATOR_H__
+#define __UTIL_STACK_ALLOCATOR_H__
+
+#include <cstddef>
+#include <memory>
+
+CCL_NAMESPACE_BEGIN
+
+/* Stack allocator for the use with STL. */
+template<int SIZE, typename T> class ccl_try_align(16) StackAllocator
+{
+ public:
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  typedef T *pointer;
+  typedef const T *const_pointer;
+  typedef T &reference;
+  typedef const T &const_reference;
+  typedef T value_type;
+
+  /* Allocator construction/destruction. */
+
+  StackAllocator() : pointer_(0), use_stack_(true)
+  {
+  }
+
+  StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(true)
+  {
+  }
+
+  template<class U>
+  StackAllocator(const StackAllocator<SIZE, U> &) : pointer_(0), use_stack_(false)
+  {
+  }
+
+  /* Memory allocation/deallocation. */
+
+  T *allocate(size_t n, const void *hint = 0)
+  {
+    (void)hint;
+    if (n == 0) {
+      return NULL;
+    }
+    if (pointer_ + n >= SIZE || use_stack_ == false) {
+      size_t size = n * sizeof(T);
+      util_guarded_mem_alloc(size);
+      T *mem;
+#ifdef WITH_BLENDER_GUARDEDALLOC
+      mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc");
+#else
+      mem = (T *)malloc(size);
+#endif
+      if (mem == NULL) {
+        throw std::bad_alloc();
+      }
+      return mem;
+    }
+    T *mem = &data_[pointer_];
+    pointer_ += n;
+    return mem;
+  }
+
+  void deallocate(T * p, size_t n)
+  {
+    if (p == NULL) {
+      return;
+    }
+    if (p < data_ || p >= data_ + SIZE) {
+      util_guarded_mem_free(n * sizeof(T));
+#ifdef WITH_BLENDER_GUARDEDALLOC
+      MEM_freeN(p);
+#else
+      free(p);
+#endif
+      return;
+    }
+    /* We don't support memory free for the stack allocator. */
+  }
+
+  /* Address of an reference. */
+
+  T *address(T & x) const
+  {
+    return &x;
+  }
+
+  const T *address(const T &x) const
+  {
+    return &x;
+  }
+
+  /* Object construction/destruction. */
+
+  void construct(T * p, const T &val)
+  {
+    if (p != NULL) {
+      new ((T *)p) T(val);
+    }
+  }
+
+  void destroy(T * p)
+  {
+    p->~T();
+  }
+
+  /* Maximum allocation size. */
+
+  size_t max_size() const
+  {
+    return size_t(-1);
+  }
+
+  /* Rebind to other type of allocator. */
+
+  template<class U> struct rebind {
+    typedef StackAllocator<SIZE, U> other;
+  };
+
+  /* Operators */
+
+  template<class U> inline StackAllocator &operator=(const StackAllocator<SIZE, U> &)
+  {
+    return *this;
+  }
+
+  StackAllocator<SIZE, T> &operator=(const StackAllocator &)
+  {
+    return *this;
+  }
+
+  inline bool operator==(StackAllocator const & /*other*/) const
+  {
+    return true;
+  }
+
+  inline bool operator!=(StackAllocator const &other) const
+  {
+    return !operator==(other);
+  }
+
+ private:
+  int pointer_;
+  bool use_stack_;
+  T data_[SIZE];
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_STACK_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/static_assert.h b/intern/cycles/util/static_assert.h
new file mode 100644
index 00000000000..7df52d462b7
--- /dev/null
+++ b/intern/cycles/util/static_assert.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* clang-format off */
+
+/* #define static_assert triggers a bug in some clang-format versions, disable
+ * format for entire file to keep results consistent. */
+
+#ifndef __UTIL_STATIC_ASSERT_H__
+#define __UTIL_STATIC_ASSERT_H__
+
+CCL_NAMESPACE_BEGIN
+
+#if defined(CYCLES_CUBIN_CC)
+#  define static_assert(statement, message)
+#endif
+
+#define static_assert_align(st, align) \
+  static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned")  // NOLINT
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_STATIC_ASSERT_H__ */
diff --git a/intern/cycles/util/stats.h b/intern/cycles/util/stats.h
new file mode 100644
index 00000000000..590973f1cbc
--- /dev/null
+++ b/intern/cycles/util/stats.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_STATS_H__
+#define __UTIL_STATS_H__
+
+#include "util/atomic.h"
+#include "util/profiling.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Stats {
+ public:
+  enum static_init_t { static_init = 0 };
+
+  Stats() : mem_used(0), mem_peak(0)
+  {
+  }
+  explicit Stats(static_init_t)
+  {
+  }
+
+  void mem_alloc(size_t size)
+  {
+    atomic_add_and_fetch_z(&mem_used, size);
+    atomic_fetch_and_update_max_z(&mem_peak, mem_used);
+  }
+
+  void mem_free(size_t size)
+  {
+    assert(mem_used >= size);
+    atomic_sub_and_fetch_z(&mem_used, size);
+  }
+
+  size_t mem_used;
+  size_t mem_peak;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_STATS_H__ */
diff --git a/intern/cycles/util/string.cpp b/intern/cycles/util/string.cpp
new file mode 100644
index 00000000000..b98272f7759
--- /dev/null
+++ b/intern/cycles/util/string.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <cctype>
+
+#include "util/foreach.h"
+#include "util/string.h"
+#include "util/windows.h"
+
+#ifdef _WIN32
+#  ifndef vsnprintf
+#    define vsnprintf _vsnprintf
+#  endif
+#endif /* _WIN32 */
+
+CCL_NAMESPACE_BEGIN
+
+string string_printf(const char *format, ...)
+{
+  vector<char> str(128, 0);
+
+  while (1) {
+    va_list args;
+    int result;
+
+    va_start(args, format);
+    result = vsnprintf(&str[0], str.size(), format, args);
+    va_end(args);
+
+    if (result == -1) {
+      /* not enough space or formatting error */
+      if (str.size() > 65536) {
+        assert(0);
+        return string("");
+      }
+
+      str.resize(str.size() * 2, 0);
+      continue;
+    }
+    else if (result >= (int)str.size()) {
+      /* not enough space */
+      str.resize(result + 1, 0);
+      continue;
+    }
+
+    return string(&str[0]);
+  }
+}
+
+bool string_iequals(const string &a, const string &b)
+{
+  if (a.size() == b.size()) {
+    for (size_t i = 0; i < a.size(); i++)
+      if (toupper(a[i]) != toupper(b[i]))
+        return false;
+
+    return true;
+  }
+
+  return false;
+}
+
+void string_split(vector<string> &tokens,
+                  const string &str,
+                  const string &separators,
+                  bool skip_empty_tokens)
+{
+  size_t token_start = 0, token_length = 0;
+  for (size_t i = 0; i < str.size(); ++i) {
+    const char ch = str[i];
+    if (separators.find(ch) == string::npos) {
+      /* Current character is not a separator,
+       * append it to token by increasing token length.
+       */
+      ++token_length;
+    }
+    else {
+      /* Current character is a separator,
+       * append current token to the list.
+       */
+      if (!skip_empty_tokens || token_length > 0) {
+        string token = str.substr(token_start, token_length);
+        tokens.push_back(token);
+      }
+      token_start = i + 1;
+      token_length = 0;
+    }
+  }
+  /* Append token from the tail of the string if exists. */
+  if (token_length) {
+    string token = str.substr(token_start, token_length);
+    tokens.push_back(token);
+  }
+}
+
+bool string_startswith(const string_view s, const string_view start)
+{
+  const size_t len = start.size();
+
+  if (len > s.size()) {
+    return false;
+  }
+
+  return strncmp(s.c_str(), start.data(), len) == 0;
+}
+
+bool string_endswith(const string_view s, const string_view end)
+{
+  const size_t len = end.size();
+
+  if (len > s.size()) {
+    return false;
+  }
+
+  return strncmp(s.c_str() + s.size() - len, end.data(), len) == 0;
+}
+
+string string_strip(const string &s)
+{
+  string result = s;
+  result.erase(0, result.find_first_not_of(' '));
+  result.erase(result.find_last_not_of(' ') + 1);
+  return result;
+}
+
+void string_replace(string &haystack, const string &needle, const string &other)
+{
+  size_t i = 0, index;
+  while ((index = haystack.find(needle, i)) != string::npos) {
+    haystack.replace(index, needle.size(), other);
+    i = index + other.size();
+  }
+}
+
+string string_remove_trademark(const string &s)
+{
+  string result = s;
+
+  /* Special case, so we don't leave sequential spaces behind. */
+  /* TODO(sergey): Consider using regex perhaps? */
+  string_replace(result, " (TM)", "");
+  string_replace(result, " (R)", "");
+
+  string_replace(result, "(TM)", "");
+  string_replace(result, "(R)", "");
+
+  return string_strip(result);
+}
+
+string string_from_bool(bool var)
+{
+  if (var)
+    return "True";
+  else
+    return "False";
+}
+
+string to_string(const char *str)
+{
+  return string(str);
+}
+
+string string_to_lower(const string &s)
+{
+  string r = s;
+  std::transform(r.begin(), r.end(), r.begin(), [](char c) { return std::tolower(c); });
+  return r;
+}
+
+/* Wide char strings helpers for Windows. */
+
+#ifdef _WIN32
+
+wstring string_to_wstring(const string &str)
+{
+  const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
+  wstring str_wc(length_wc, 0);
+  MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc);
+  return str_wc;
+}
+
+string string_from_wstring(const wstring &str)
+{
+  int length_mb = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), NULL, 0, NULL, NULL);
+  string str_mb(length_mb, 0);
+  WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), &str_mb[0], length_mb, NULL, NULL);
+  return str_mb;
+}
+
+string string_to_ansi(const string &str)
+{
+  const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
+  wstring str_wc(length_wc, 0);
+  MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc);
+
+  int length_mb = WideCharToMultiByte(
+      CP_ACP, 0, str_wc.c_str(), str_wc.size(), NULL, 0, NULL, NULL);
+
+  string str_mb(length_mb, 0);
+  WideCharToMultiByte(CP_ACP, 0, str_wc.c_str(), str_wc.size(), &str_mb[0], length_mb, NULL, NULL);
+
+  return str_mb;
+}
+
+#endif /* _WIN32 */
+
+string string_human_readable_size(size_t size)
+{
+  static const char suffixes[] = "BKMGTPEZY";
+
+  const char *suffix = suffixes;
+  size_t r = 0;
+
+  while (size >= 1024) {
+    r = size % 1024;
+    size /= 1024;
+    suffix++;
+  }
+
+  if (*suffix != 'B')
+    return string_printf("%.2f%c", double(size * 1024 + r) / 1024.0, *suffix);
+  else
+    return string_printf("%zu", size);
+}
+
+string string_human_readable_number(size_t num)
+{
+  if (num == 0) {
+    return "0";
+  }
+
+  /* Add thousands separators. */
+  char buf[32];
+
+  char *p = buf + 31;
+  *p = '\0';
+
+  int i = -1;
+  while (num) {
+    if (++i && i % 3 == 0)
+      *(--p) = ',';
+
+    *(--p) = '0' + (num % 10);
+
+    num /= 10;
+  }
+
+  return p;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/string.h b/intern/cycles/util/string.h
new file mode 100644
index 00000000000..cc20a6df120
--- /dev/null
+++ b/intern/cycles/util/string.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <sstream>
+#include <string.h>
+#include <string>
+
+/* Use string view implementation from OIIO.
+ * Ideally, need to switch to `std::string_view`, but this first requires getting rid of using
+ * namespace OIIO as it causes symbol collision. */
+#include <OpenImageIO/string_view.h>
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+using std::istringstream;
+using std::ostringstream;
+using std::string;
+using std::stringstream;
+using std::to_string;
+
+using OIIO::string_view;
+
+#ifdef __GNUC__
+#  define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2)))
+#else
+#  define PRINTF_ATTRIBUTE
+#endif
+
+string string_printf(const char *format, ...) PRINTF_ATTRIBUTE;
+
+bool string_iequals(const string &a, const string &b);
+void string_split(vector<string> &tokens,
+                  const string &str,
+                  const string &separators = "\t ",
+                  bool skip_empty_tokens = true);
+void string_replace(string &haystack, const string &needle, const string &other);
+bool string_startswith(string_view s, string_view start);
+bool string_endswith(string_view s, string_view end);
+string string_strip(const string &s);
+string string_remove_trademark(const string &s);
+string string_from_bool(const bool var);
+string to_string(const char *str);
+string string_to_lower(const string &s);
+
+/* Wide char strings are only used on Windows to deal with non-ASCII
+ * characters in file names and such. No reason to use such strings
+ * for something else at this moment.
+ *
+ * Please note that strings are expected to be in UTF-8 codepage, and
+ * if ANSI is needed then explicit conversion required.
+ */
+#ifdef _WIN32
+using std::wstring;
+wstring string_to_wstring(const string &path);
+string string_from_wstring(const wstring &path);
+string string_to_ansi(const string &str);
+#endif
+
+/* Make a string from a size in bytes in human readable form. */
+string string_human_readable_size(size_t size);
+/* Make a string from a unit-less quantity in human readable form. */
+string string_human_readable_number(size_t num);
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/system.cpp b/intern/cycles/util/system.cpp
new file mode 100644
index 00000000000..f12e15e756f
--- /dev/null
+++ b/intern/cycles/util/system.cpp
@@ -0,0 +1,415 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/system.h"
+
+#include "util/log.h"
+#include "util/string.h"
+#include "util/types.h"
+
+#include <numaapi.h>
+
+#include <OpenImageIO/sysutil.h>
+OIIO_NAMESPACE_USING
+
+#ifdef _WIN32
+#  if (!defined(FREE_WINDOWS))
+#    include <intrin.h>
+#  endif
+#  include "util/windows.h"
+#elif defined(__APPLE__)
+#  include <sys/ioctl.h>
+#  include <sys/sysctl.h>
+#  include <sys/types.h>
+#else
+#  include <sys/ioctl.h>
+#  include <unistd.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+bool system_cpu_ensure_initialized()
+{
+  static bool is_initialized = false;
+  static bool result = false;
+  if (is_initialized) {
+    return result;
+  }
+  is_initialized = true;
+  const NUMAAPI_Result numa_result = numaAPI_Initialize();
+  result = (numa_result == NUMAAPI_SUCCESS);
+  return result;
+}
+
+/* Fallback solution, which doesn't use NUMA/CPU groups. */
+static int system_cpu_thread_count_fallback()
+{
+#ifdef _WIN32
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  return info.dwNumberOfProcessors;
+#elif defined(__APPLE__)
+  int count;
+  size_t len = sizeof(count);
+  int mib[2] = {CTL_HW, HW_NCPU};
+  sysctl(mib, 2, &count, &len, NULL, 0);
+  return count;
+#else
+  return sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+}
+
+int system_cpu_thread_count()
+{
+  const int num_nodes = system_cpu_num_numa_nodes();
+  int num_threads = 0;
+  for (int node = 0; node < num_nodes; ++node) {
+    if (!system_cpu_is_numa_node_available(node)) {
+      continue;
+    }
+    num_threads += system_cpu_num_numa_node_processors(node);
+  }
+  return num_threads;
+}
+
+int system_cpu_num_numa_nodes()
+{
+  if (!system_cpu_ensure_initialized()) {
+    /* Fallback to a single node with all the threads. */
+    return 1;
+  }
+  return numaAPI_GetNumNodes();
+}
+
+bool system_cpu_is_numa_node_available(int node)
+{
+  if (!system_cpu_ensure_initialized()) {
+    return true;
+  }
+  return numaAPI_IsNodeAvailable(node);
+}
+
+int system_cpu_num_numa_node_processors(int node)
+{
+  if (!system_cpu_ensure_initialized()) {
+    return system_cpu_thread_count_fallback();
+  }
+  return numaAPI_GetNumNodeProcessors(node);
+}
+
+bool system_cpu_run_thread_on_node(int node)
+{
+  if (!system_cpu_ensure_initialized()) {
+    return true;
+  }
+  return numaAPI_RunThreadOnNode(node);
+}
+
+int system_console_width()
+{
+  int columns = 0;
+
+#ifdef _WIN32
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
+    columns = csbi.dwSize.X;
+  }
+#else
+  struct winsize w;
+  if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
+    columns = w.ws_col;
+  }
+#endif
+
+  return (columns > 0) ? columns : 80;
+}
+
+int system_cpu_num_active_group_processors()
+{
+  if (!system_cpu_ensure_initialized()) {
+    return system_cpu_thread_count_fallback();
+  }
+  return numaAPI_GetNumCurrentNodesProcessors();
+}
+
+/* Equivalent of Windows __cpuid for x86 processors on other platforms. */
+#if (!defined(_WIN32) || defined(FREE_WINDOWS)) && (defined(__x86_64__) || defined(__i386__))
+static void __cpuid(int data[4], int selector)
+{
+#  if defined(__x86_64__)
+  asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector));
+#  elif defined(__i386__)
+  asm("pushl %%ebx    \n\t"
+      "cpuid          \n\t"
+      "movl %%ebx, %1 \n\t"
+      "popl %%ebx     \n\t"
+      : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3])
+      : "a"(selector)
+      : "ebx");
+#  else
+  data[0] = data[1] = data[2] = data[3] = 0;
+#  endif
+}
+#endif
+
+string system_cpu_brand_string()
+{
+#if defined(__APPLE__)
+  /* Get from system on macOS. */
+  char modelname[512] = "";
+  size_t bufferlen = 512;
+  if (sysctlbyname("machdep.cpu.brand_string", &modelname, &bufferlen, NULL, 0) == 0) {
+    return modelname;
+  }
+#elif defined(WIN32) || defined(__x86_64__) || defined(__i386__)
+  /* Get from intrinsics on Windows and x86. */
+  char buf[49] = {0};
+  int result[4] = {0};
+
+  __cpuid(result, 0x80000000);
+
+  if (result[0] != 0 && result[0] >= (int)0x80000004) {
+    __cpuid((int *)(buf + 0), 0x80000002);
+    __cpuid((int *)(buf + 16), 0x80000003);
+    __cpuid((int *)(buf + 32), 0x80000004);
+
+    string brand = buf;
+
+    /* Make it a bit more presentable. */
+    brand = string_remove_trademark(brand);
+
+    return brand;
+  }
+#else
+  /* Get from /proc/cpuinfo on Unix systems. */
+  FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
+  if (cpuinfo != nullptr) {
+    char cpuinfo_buf[513] = "";
+    fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo);
+    fclose(cpuinfo);
+
+    char *modelname = strstr(cpuinfo_buf, "model name");
+    if (modelname != nullptr) {
+      modelname = strchr(modelname, ':');
+      if (modelname != nullptr) {
+        modelname += 2;
+        char *modelname_end = strchr(modelname, '\n');
+        if (modelname_end != nullptr) {
+          *modelname_end = '\0';
+          return modelname;
+        }
+      }
+    }
+  }
+#endif
+  return "Unknown CPU";
+}
+
+int system_cpu_bits()
+{
+  return (sizeof(void *) * 8);
+}
+
+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
+
+struct CPUCapabilities {
+  bool x64;
+  bool mmx;
+  bool sse;
+  bool sse2;
+  bool sse3;
+  bool ssse3;
+  bool sse41;
+  bool sse42;
+  bool sse4a;
+  bool avx;
+  bool f16c;
+  bool avx2;
+  bool xop;
+  bool fma3;
+  bool fma4;
+  bool bmi1;
+  bool bmi2;
+};
+
+static CPUCapabilities &system_cpu_capabilities()
+{
+  static CPUCapabilities caps;
+  static bool caps_init = false;
+
+  if (!caps_init) {
+    int result[4], num;
+
+    memset(&caps, 0, sizeof(caps));
+
+    __cpuid(result, 0);
+    num = result[0];
+
+    if (num >= 1) {
+      __cpuid(result, 0x00000001);
+      caps.mmx = (result[3] & ((int)1 << 23)) != 0;
+      caps.sse = (result[3] & ((int)1 << 25)) != 0;
+      caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
+      caps.sse3 = (result[2] & ((int)1 << 0)) != 0;
+
+      caps.ssse3 = (result[2] & ((int)1 << 9)) != 0;
+      caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
+      caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
+
+      caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
+      caps.avx = false;
+      bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
+      bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
+
+      if (os_uses_xsave_xrestore && cpu_avx_support) {
+        // Check if the OS will save the YMM registers
+        uint32_t xcr_feature_mask;
+#  if defined(__GNUC__)
+        int edx; /* not used */
+        /* actual opcode for xgetbv */
+        __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0));
+#  elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+        /* Minimum VS2010 SP1 compiler is required. */
+        xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#  else
+        xcr_feature_mask = 0;
+#  endif
+        caps.avx = (xcr_feature_mask & 0x6) == 0x6;
+      }
+
+      caps.f16c = (result[2] & ((int)1 << 29)) != 0;
+
+      __cpuid(result, 0x00000007);
+      caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
+      caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
+      caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
+    }
+
+    caps_init = true;
+  }
+
+  return caps;
+}
+
+bool system_cpu_support_sse2()
+{
+  CPUCapabilities &caps = system_cpu_capabilities();
+  return caps.sse && caps.sse2;
+}
+
+bool system_cpu_support_sse3()
+{
+  CPUCapabilities &caps = system_cpu_capabilities();
+  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
+}
+
+bool system_cpu_support_sse41()
+{
+  CPUCapabilities &caps = system_cpu_capabilities();
+  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
+}
+
+bool system_cpu_support_avx()
+{
+  CPUCapabilities &caps = system_cpu_capabilities();
+  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
+}
+
+bool system_cpu_support_avx2()
+{
+  CPUCapabilities &caps = system_cpu_capabilities();
+  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c &&
+         caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
+}
+#else
+
+bool system_cpu_support_sse2()
+{
+  return false;
+}
+
+bool system_cpu_support_sse3()
+{
+  return false;
+}
+
+bool system_cpu_support_sse41()
+{
+  return false;
+}
+
+bool system_cpu_support_avx()
+{
+  return false;
+}
+bool system_cpu_support_avx2()
+{
+  return false;
+}
+
+#endif
+
+bool system_call_self(const vector<string> &args)
+{
+  /* Escape program and arguments in case they contain spaces. */
+  string cmd = "\"" + Sysutil::this_program_path() + "\"";
+
+  for (int i = 0; i < args.size(); i++) {
+    cmd += " \"" + args[i] + "\"";
+  }
+
+#ifdef _WIN32
+  /* Use cmd /S to avoid issues with spaces in arguments. */
+  cmd = "cmd /S /C \"" + cmd + " > nul \"";
+#else
+  /* Quiet output. */
+  cmd += " > /dev/null";
+#endif
+
+  return (system(cmd.c_str()) == 0);
+}
+
+size_t system_physical_ram()
+{
+#ifdef _WIN32
+  MEMORYSTATUSEX ram;
+  ram.dwLength = sizeof(ram);
+  GlobalMemoryStatusEx(&ram);
+  return ram.ullTotalPhys;
+#elif defined(__APPLE__)
+  uint64_t ram = 0;
+  size_t len = sizeof(ram);
+  if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
+    return ram;
+  }
+  return 0;
+#else
+  size_t ps = sysconf(_SC_PAGESIZE);
+  size_t pn = sysconf(_SC_PHYS_PAGES);
+  return ps * pn;
+#endif
+}
+
+uint64_t system_self_process_id()
+{
+#ifdef _WIN32
+  return GetCurrentProcessId();
+#else
+  return getpid();
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/system.h b/intern/cycles/util/system.h
new file mode 100644
index 00000000000..425c7255cbe
--- /dev/null
+++ b/intern/cycles/util/system.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SYSTEM_H__
+#define __UTIL_SYSTEM_H__
+
+#include "util/string.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Make sure CPU groups / NUMA API is initialized. */
+bool system_cpu_ensure_initialized();
+
+/* Get total number of threads in all NUMA nodes / CPU groups. */
+int system_cpu_thread_count();
+
+/* Get width in characters of the current console output. */
+int system_console_width();
+
+/* Get number of available nodes.
+ *
+ * This is in fact an index of last node plus one and it's not guaranteed
+ * that all nodes up to this one are available. */
+int system_cpu_num_numa_nodes();
+
+/* Returns truth if the given node is available for compute. */
+bool system_cpu_is_numa_node_available(int node);
+
+/* Get number of available processors on a given node. */
+int system_cpu_num_numa_node_processors(int node);
+
+/* Runs the current thread and its children on a specific node.
+ *
+ * Returns truth if affinity has successfully changed. */
+bool system_cpu_run_thread_on_node(int node);
+
+/* Number of processors within the current CPU group (or within active thread
+ * thread affinity). */
+int system_cpu_num_active_group_processors();
+
+string system_cpu_brand_string();
+int system_cpu_bits();
+bool system_cpu_support_sse2();
+bool system_cpu_support_sse3();
+bool system_cpu_support_sse41();
+bool system_cpu_support_avx();
+bool system_cpu_support_avx2();
+
+size_t system_physical_ram();
+
+/* Start a new process of the current application with the given arguments. */
+bool system_call_self(const vector<string> &args);
+
+/* Get identifier of the currently running process. */
+uint64_t system_self_process_id();
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SYSTEM_H__ */
diff --git a/intern/cycles/util/task.cpp b/intern/cycles/util/task.cpp
new file mode 100644
index 00000000000..ce61bf8d6c4
--- /dev/null
+++ b/intern/cycles/util/task.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/task.h"
+#include "util/foreach.h"
+#include "util/log.h"
+#include "util/system.h"
+#include "util/time.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Task Pool */
+
+TaskPool::TaskPool() : start_time(time_dt()), num_tasks_pushed(0)
+{
+}
+
+TaskPool::~TaskPool()
+{
+  cancel();
+}
+
+void TaskPool::push(TaskRunFunction &&task)
+{
+  tbb_group.run(std::move(task));
+  num_tasks_pushed++;
+}
+
+void TaskPool::wait_work(Summary *stats)
+{
+  tbb_group.wait();
+
+  if (stats != NULL) {
+    stats->time_total = time_dt() - start_time;
+    stats->num_tasks_handled = num_tasks_pushed;
+  }
+
+  num_tasks_pushed = 0;
+}
+
+void TaskPool::cancel()
+{
+  if (num_tasks_pushed > 0) {
+    tbb_group.cancel();
+    tbb_group.wait();
+    num_tasks_pushed = 0;
+  }
+}
+
+bool TaskPool::canceled()
+{
+  return tbb::is_current_task_group_canceling();
+}
+
+/* Task Scheduler */
+
+thread_mutex TaskScheduler::mutex;
+int TaskScheduler::users = 0;
+int TaskScheduler::active_num_threads = 0;
+tbb::global_control *TaskScheduler::global_control = nullptr;
+
+void TaskScheduler::init(int num_threads)
+{
+  thread_scoped_lock lock(mutex);
+  /* Multiple cycles instances can use this task scheduler, sharing the same
+   * threads, so we keep track of the number of users. */
+  ++users;
+  if (users != 1) {
+    return;
+  }
+  if (num_threads > 0) {
+    /* Automatic number of threads. */
+    VLOG(1) << "Overriding number of TBB threads to " << num_threads << ".";
+    global_control = new tbb::global_control(tbb::global_control::max_allowed_parallelism,
+                                             num_threads);
+    active_num_threads = num_threads;
+  }
+  else {
+    active_num_threads = system_cpu_thread_count();
+  }
+}
+
+void TaskScheduler::exit()
+{
+  thread_scoped_lock lock(mutex);
+  users--;
+  if (users == 0) {
+    delete global_control;
+    global_control = nullptr;
+    active_num_threads = 0;
+  }
+}
+
+void TaskScheduler::free_memory()
+{
+  assert(users == 0);
+}
+
+int TaskScheduler::num_threads()
+{
+  return active_num_threads;
+}
+
+/* Dedicated Task Pool */
+
+DedicatedTaskPool::DedicatedTaskPool()
+{
+  do_cancel = false;
+  do_exit = false;
+  num = 0;
+
+  worker_thread = new thread(function_bind(&DedicatedTaskPool::thread_run, this));
+}
+
+DedicatedTaskPool::~DedicatedTaskPool()
+{
+  wait();
+
+  do_exit = true;
+  queue_cond.notify_all();
+
+  worker_thread->join();
+  delete worker_thread;
+}
+
+void DedicatedTaskPool::push(TaskRunFunction &&task, bool front)
+{
+  num_increase();
+
+  /* add task to queue */
+  queue_mutex.lock();
+  if (front)
+    queue.emplace_front(std::move(task));
+  else
+    queue.emplace_back(std::move(task));
+
+  queue_cond.notify_one();
+  queue_mutex.unlock();
+}
+
+void DedicatedTaskPool::wait()
+{
+  thread_scoped_lock num_lock(num_mutex);
+
+  while (num)
+    num_cond.wait(num_lock);
+}
+
+void DedicatedTaskPool::cancel()
+{
+  do_cancel = true;
+
+  clear();
+  wait();
+
+  do_cancel = false;
+}
+
+bool DedicatedTaskPool::canceled()
+{
+  return do_cancel;
+}
+
+void DedicatedTaskPool::num_decrease(int done)
+{
+  thread_scoped_lock num_lock(num_mutex);
+  num -= done;
+
+  assert(num >= 0);
+  if (num == 0)
+    num_cond.notify_all();
+}
+
+void DedicatedTaskPool::num_increase()
+{
+  thread_scoped_lock num_lock(num_mutex);
+  num++;
+  num_cond.notify_all();
+}
+
+bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task)
+{
+  thread_scoped_lock queue_lock(queue_mutex);
+
+  while (queue.empty() && !do_exit)
+    queue_cond.wait(queue_lock);
+
+  if (queue.empty()) {
+    assert(do_exit);
+    return false;
+  }
+
+  task = queue.front();
+  queue.pop_front();
+
+  return true;
+}
+
+void DedicatedTaskPool::thread_run()
+{
+  TaskRunFunction task;
+
+  /* keep popping off tasks */
+  while (thread_wait_pop(task)) {
+    /* run task */
+    task();
+
+    /* delete task */
+    task = nullptr;
+
+    /* notify task was done */
+    num_decrease(1);
+  }
+}
+
+void DedicatedTaskPool::clear()
+{
+  thread_scoped_lock queue_lock(queue_mutex);
+
+  /* erase all tasks from the queue */
+  int done = queue.size();
+  queue.clear();
+
+  queue_lock.unlock();
+
+  /* notify done */
+  num_decrease(done);
+}
+
+string TaskPool::Summary::full_report() const
+{
+  string report = "";
+  report += string_printf("Total time:    %f\n", time_total);
+  report += string_printf("Tasks handled: %d\n", num_tasks_handled);
+  return report;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/task.h b/intern/cycles/util/task.h
new file mode 100644
index 00000000000..1a8f512b83a
--- /dev/null
+++ b/intern/cycles/util/task.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TASK_H__
+#define __UTIL_TASK_H__
+
+#include "util/list.h"
+#include "util/string.h"
+#include "util/tbb.h"
+#include "util/thread.h"
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class TaskPool;
+class TaskScheduler;
+
+typedef function<void(void)> TaskRunFunction;
+
+/* Task Pool
+ *
+ * Pool of tasks that will be executed by the central TaskScheduler. For each
+ * pool, we can wait for all tasks to be done, or cancel them before they are
+ * done.
+ *
+ * TaskRunFunction may be created with std::bind or lambda expressions. */
+
+class TaskPool {
+ public:
+  struct Summary {
+    /* Time spent to handle all tasks. */
+    double time_total;
+
+    /* Number of all tasks handled by this pool. */
+    int num_tasks_handled;
+
+    /* A full multi-line description of the state of the pool after
+     * all work is done.
+     */
+    string full_report() const;
+  };
+
+  TaskPool();
+  ~TaskPool();
+
+  void push(TaskRunFunction &&task);
+
+  void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
+  void cancel(); /* cancel all tasks and wait until they are no longer executing */
+
+  static bool canceled(); /* For worker threads, test if current task pool canceled. */
+
+ protected:
+  tbb::task_group tbb_group;
+
+  /* ** Statistics ** */
+
+  /* Time stamp of first task pushed. */
+  double start_time;
+
+  /* Number of all tasks pushed to the pool. Cleared after wait_work() and cancel(). */
+  int num_tasks_pushed;
+};
+
+/* Task Scheduler
+ *
+ * Central scheduler that holds running threads ready to execute tasks. A single
+ * queue holds the task from all pools. */
+
+class TaskScheduler {
+ public:
+  static void init(int num_threads = 0);
+  static void exit();
+  static void free_memory();
+
+  /* Approximate number of threads that will work on task, which may be lower
+   * or higher than the actual number of threads. Use as little as possible and
+   * leave splitting up tasks to the scheduler. */
+  static int num_threads();
+
+ protected:
+  static thread_mutex mutex;
+  static int users;
+  static int active_num_threads;
+
+#ifdef WITH_TBB_GLOBAL_CONTROL
+  static tbb::global_control *global_control;
+#endif
+};
+
+/* Dedicated Task Pool
+ *
+ * Like a TaskPool, but will launch one dedicated thread to execute all tasks.
+ *
+ * The run callback that actually executes the task may be created like this:
+ * function_bind(&MyClass::task_execute, this, _1, _2) */
+
+class DedicatedTaskPool {
+ public:
+  DedicatedTaskPool();
+  ~DedicatedTaskPool();
+
+  void push(TaskRunFunction &&run, bool front = false);
+
+  void wait();   /* wait until all tasks are done */
+  void cancel(); /* cancel all tasks, keep worker thread running */
+
+  bool canceled(); /* for worker thread, test if canceled */
+
+ protected:
+  void num_decrease(int done);
+  void num_increase();
+
+  void thread_run();
+  bool thread_wait_pop(TaskRunFunction &task);
+
+  void clear();
+
+  thread_mutex num_mutex;
+  thread_condition_variable num_cond;
+
+  list<TaskRunFunction> queue;
+  thread_mutex queue_mutex;
+  thread_condition_variable queue_cond;
+
+  int num;
+  bool do_cancel;
+  bool do_exit;
+
+  thread *worker_thread;
+};
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/tbb.h b/intern/cycles/util/tbb.h
new file mode 100644
index 00000000000..6fc3b8daad3
--- /dev/null
+++ b/intern/cycles/util/tbb.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TBB_H__
+#define __UTIL_TBB_H__
+
+/* TBB includes <windows.h>, do it ourselves first so we are sure
+ * WIN32_LEAN_AND_MEAN and similar are defined beforehand. */
+#include "util/windows.h"
+
+#include <tbb/enumerable_thread_specific.h>
+#include <tbb/parallel_for.h>
+#include <tbb/parallel_for_each.h>
+#include <tbb/task_arena.h>
+#include <tbb/task_group.h>
+
+#if TBB_INTERFACE_VERSION_MAJOR >= 10
+#  define WITH_TBB_GLOBAL_CONTROL
+#  include <tbb/global_control.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+using tbb::blocked_range;
+using tbb::enumerable_thread_specific;
+using tbb::parallel_for;
+
+static inline void parallel_for_cancel()
+{
+#if TBB_INTERFACE_VERSION_MAJOR >= 12
+  tbb::task_group_context *ctx = tbb::task::current_context();
+  if (ctx) {
+    ctx->cancel_group_execution();
+  }
+#else
+  tbb::task::self().cancel_group_execution();
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TBB_H__ */
diff --git a/intern/cycles/util/texture.h b/intern/cycles/util/texture.h
new file mode 100644
index 00000000000..5e37b79e340
--- /dev/null
+++ b/intern/cycles/util/texture.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TEXTURE_H__
+#define __UTIL_TEXTURE_H__
+
+#include "util/transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Color to use when textures are not found. */
+#define TEX_IMAGE_MISSING_R 1
+#define TEX_IMAGE_MISSING_G 0
+#define TEX_IMAGE_MISSING_B 1
+#define TEX_IMAGE_MISSING_A 1
+
+/* Interpolation types for textures
+ * cuda also use texture space to store other objects */
+typedef enum InterpolationType {
+  INTERPOLATION_NONE = -1,
+  INTERPOLATION_LINEAR = 0,
+  INTERPOLATION_CLOSEST = 1,
+  INTERPOLATION_CUBIC = 2,
+  INTERPOLATION_SMART = 3,
+
+  INTERPOLATION_NUM_TYPES,
+} InterpolationType;
+
+typedef enum ImageDataType {
+  IMAGE_DATA_TYPE_FLOAT4 = 0,
+  IMAGE_DATA_TYPE_BYTE4 = 1,
+  IMAGE_DATA_TYPE_HALF4 = 2,
+  IMAGE_DATA_TYPE_FLOAT = 3,
+  IMAGE_DATA_TYPE_BYTE = 4,
+  IMAGE_DATA_TYPE_HALF = 5,
+  IMAGE_DATA_TYPE_USHORT4 = 6,
+  IMAGE_DATA_TYPE_USHORT = 7,
+  IMAGE_DATA_TYPE_NANOVDB_FLOAT = 8,
+  IMAGE_DATA_TYPE_NANOVDB_FLOAT3 = 9,
+
+  IMAGE_DATA_NUM_TYPES
+} ImageDataType;
+
+/* Alpha types
+ * How to treat alpha in images. */
+typedef enum ImageAlphaType {
+  IMAGE_ALPHA_UNASSOCIATED = 0,
+  IMAGE_ALPHA_ASSOCIATED = 1,
+  IMAGE_ALPHA_CHANNEL_PACKED = 2,
+  IMAGE_ALPHA_IGNORE = 3,
+  IMAGE_ALPHA_AUTO = 4,
+
+  IMAGE_ALPHA_NUM_TYPES,
+} ImageAlphaType;
+
+/* Extension types for textures.
+ *
+ * Defines how the image is extrapolated past its original bounds. */
+typedef enum ExtensionType {
+  /* Cause the image to repeat horizontally and vertically. */
+  EXTENSION_REPEAT = 0,
+  /* Extend by repeating edge pixels of the image. */
+  EXTENSION_EXTEND = 1,
+  /* Clip to image size and set exterior pixels as transparent. */
+  EXTENSION_CLIP = 2,
+
+  EXTENSION_NUM_TYPES,
+} ExtensionType;
+
+typedef struct TextureInfo {
+  /* Pointer, offset or texture depending on device. */
+  uint64_t data;
+  /* Data Type */
+  uint data_type;
+  /* Interpolation and extension type. */
+  uint interpolation, extension;
+  /* Dimensions. */
+  uint width, height, depth;
+  /* Transform for 3D textures. */
+  uint use_transform_3d;
+  Transform transform_3d;
+} TextureInfo;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TEXTURE_H__ */
diff --git a/intern/cycles/util/thread.cpp b/intern/cycles/util/thread.cpp
new file mode 100644
index 00000000000..24a0600425d
--- /dev/null
+++ b/intern/cycles/util/thread.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/thread.h"
+
+#include "util/system.h"
+#include "util/windows.h"
+
+CCL_NAMESPACE_BEGIN
+
+thread::thread(function<void()> run_cb, int node) : run_cb_(run_cb), joined_(false), node_(node)
+{
+#ifdef __APPLE__
+  /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is
+   * too small for Embree, and consistent stack size also makes things more
+   * predictable in general. */
+  pthread_attr_t attribute;
+  pthread_attr_init(&attribute);
+  pthread_attr_setstacksize(&attribute, 1024 * 1024 * 2);
+  pthread_create(&pthread_id, &attribute, run, (void *)this);
+#else
+  std_thread = std::thread(&thread::run, this);
+#endif
+}
+
+thread::~thread()
+{
+  if (!joined_) {
+    join();
+  }
+}
+
+void *thread::run(void *arg)
+{
+  thread *self = (thread *)(arg);
+  if (self->node_ != -1) {
+    system_cpu_run_thread_on_node(self->node_);
+  }
+  self->run_cb_();
+  return NULL;
+}
+
+bool thread::join()
+{
+  joined_ = true;
+#ifdef __APPLE__
+  return pthread_join(pthread_id, NULL) == 0;
+#else
+  try {
+    std_thread.join();
+    return true;
+  }
+  catch (const std::system_error &) {
+    return false;
+  }
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/thread.h b/intern/cycles/util/thread.h
new file mode 100644
index 00000000000..09686e4b23f
--- /dev/null
+++ b/intern/cycles/util/thread.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_THREAD_H__
+#define __UTIL_THREAD_H__
+
+#include <condition_variable>
+#include <functional>
+#include <mutex>
+#include <queue>
+#include <thread>
+
+#ifdef _WIN32
+#  include "util/windows.h"
+#else
+#  include <pthread.h>
+#endif
+
+/* NOTE: Use tbb/spin_mutex.h instead of util_tbb.h because some of the TBB
+ * functionality requires RTTI, which is disabled for OSL kernel. */
+#include <tbb/spin_mutex.h>
+
+#include "util/function.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef std::mutex thread_mutex;
+typedef std::unique_lock<std::mutex> thread_scoped_lock;
+typedef std::condition_variable thread_condition_variable;
+
+/* Own thread implementation similar to std::thread, so we can set a
+ * custom stack size on macOS. */
+
+class thread {
+ public:
+  /* NOTE: Node index of -1 means that affinity will be inherited from the
+   * parent thread and no override on top of that will happen. */
+  thread(function<void()> run_cb, int node = -1);
+  ~thread();
+
+  static void *run(void *arg);
+  bool join();
+
+ protected:
+  function<void()> run_cb_;
+#ifdef __APPLE__
+  pthread_t pthread_id;
+#else
+  std::thread std_thread;
+#endif
+  bool joined_;
+  int node_;
+};
+
+using thread_spin_lock = tbb::spin_mutex;
+
+class thread_scoped_spin_lock {
+ public:
+  explicit thread_scoped_spin_lock(thread_spin_lock &lock) : lock_(lock)
+  {
+    lock_.lock();
+  }
+
+  ~thread_scoped_spin_lock()
+  {
+    lock_.unlock();
+  }
+
+  /* TODO(sergey): Implement manual control over lock/unlock. */
+
+ protected:
+  thread_spin_lock &lock_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_THREAD_H__ */
diff --git a/intern/cycles/util/time.cpp b/intern/cycles/util/time.cpp
new file mode 100644
index 00000000000..62d14b063be
--- /dev/null
+++ b/intern/cycles/util/time.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/time.h"
+
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#  include <sys/time.h>
+#  include <unistd.h>
+#endif
+
+#include "util/math.h"
+#include "util/string.h"
+#include "util/windows.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef _WIN32
+double time_dt()
+{
+  __int64 frequency, counter;
+
+  QueryPerformanceFrequency((LARGE_INTEGER *)&frequency);
+  QueryPerformanceCounter((LARGE_INTEGER *)&counter);
+
+  return (double)counter / (double)frequency;
+}
+
+void time_sleep(double t)
+{
+  Sleep((int)(t * 1000));
+}
+#else
+double time_dt()
+{
+  struct timeval now;
+  gettimeofday(&now, NULL);
+
+  return now.tv_sec + now.tv_usec * 1e-6;
+}
+
+/* sleep t seconds */
+void time_sleep(double t)
+{
+  /* get whole seconds */
+  int s = (int)t;
+
+  if (s >= 1) {
+    sleep(s);
+
+    /* adjust parameter to remove whole seconds */
+    t -= s;
+  }
+
+  /* get microseconds */
+  int us = (int)(t * 1e6);
+  if (us > 0)
+    usleep(us);
+}
+#endif
+
+/* Time in format "hours:minutes:seconds.hundreds" */
+
+string time_human_readable_from_seconds(const double seconds)
+{
+  const int h = (((int)seconds) / (60 * 60));
+  const int m = (((int)seconds) / 60) % 60;
+  const int s = (((int)seconds) % 60);
+  const int r = (((int)(seconds * 100)) % 100);
+
+  if (h > 0) {
+    return string_printf("%.2d:%.2d:%.2d.%.2d", h, m, s, r);
+  }
+  else {
+    return string_printf("%.2d:%.2d.%.2d", m, s, r);
+  }
+}
+
+double time_human_readable_to_seconds(const string &time_string)
+{
+  /* Those are multiplies of a corresponding token surrounded by : in the
+   * time string, which denotes how to convert value to seconds.
+   * Effectively: seconds, minutes, hours, days in seconds. */
+  const int multipliers[] = {1, 60, 60 * 60, 24 * 60 * 60};
+  const int num_multiplies = sizeof(multipliers) / sizeof(*multipliers);
+  if (time_string.empty()) {
+    return 0.0;
+  }
+  double result = 0.0;
+  /* Split fractions of a second from the encoded time. */
+  vector<string> fraction_tokens;
+  string_split(fraction_tokens, time_string, ".", false);
+  const int num_fraction_tokens = fraction_tokens.size();
+  if (num_fraction_tokens == 0) {
+    /* Time string is malformed. */
+    return 0.0;
+  }
+  else if (fraction_tokens.size() == 1) {
+    /* There is no fraction of a second specified, the rest of the code
+     * handles this normally. */
+  }
+  else if (fraction_tokens.size() == 2) {
+    result = atof(fraction_tokens[1].c_str());
+    result *= pow(0.1, fraction_tokens[1].length());
+  }
+  else {
+    /* This is not a valid string, the result can not be reliable. */
+    return 0.0;
+  }
+  /* Split hours, minutes and seconds.
+   * Hours part is optional. */
+  vector<string> tokens;
+  string_split(tokens, fraction_tokens[0], ":", false);
+  const int num_tokens = tokens.size();
+  if (num_tokens > num_multiplies) {
+    /* Can not reliably represent the value. */
+    return 0.0;
+  }
+  for (int i = 0; i < num_tokens; ++i) {
+    result += atoi(tokens[num_tokens - i - 1].c_str()) * multipliers[i];
+  }
+  return result;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/time.h b/intern/cycles/util/time.h
new file mode 100644
index 00000000000..380921664e8
--- /dev/null
+++ b/intern/cycles/util/time.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TIME_H__
+#define __UTIL_TIME_H__
+
+#include "util/function.h"
+#include "util/string.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Give current time in seconds in double precision, with good accuracy. */
+
+double time_dt();
+
+/* Sleep for the specified number of seconds. */
+
+void time_sleep(double t);
+
+/* Scoped timer. */
+
+class scoped_timer {
+ public:
+  explicit scoped_timer(double *value = NULL) : value_(value)
+  {
+    time_start_ = time_dt();
+  }
+
+  ~scoped_timer()
+  {
+    if (value_ != NULL) {
+      *value_ = get_time();
+    }
+  }
+
+  double get_start() const
+  {
+    return time_start_;
+  }
+
+  double get_time() const
+  {
+    return time_dt() - time_start_;
+  }
+
+ protected:
+  double *value_;
+  double time_start_;
+};
+
+class scoped_callback_timer {
+ public:
+  using callback_type = function<void(double)>;
+
+  explicit scoped_callback_timer(callback_type cb) : cb(cb)
+  {
+  }
+
+  ~scoped_callback_timer()
+  {
+    if (cb) {
+      cb(timer.get_time());
+    }
+  }
+
+ protected:
+  scoped_timer timer;
+  callback_type cb;
+};
+
+/* Make human readable string from time, compatible with Blender metadata. */
+
+string time_human_readable_from_seconds(const double seconds);
+double time_human_readable_to_seconds(const string &str);
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/util/transform.cpp b/intern/cycles/util/transform.cpp
new file mode 100644
index 00000000000..bd990cb0f79
--- /dev/null
+++ b/intern/cycles/util/transform.cpp
@@ -0,0 +1,345 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Adapted from code with license:
+ *
+ * Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
+ * Digital Ltd. LLC. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Industrial Light & Magic nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "util/transform.h"
+#include "util/projection.h"
+
+#include "util/boundbox.h"
+#include "util/math.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Transform Inverse */
+
+static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
+{
+  /* forward elimination */
+  for (int i = 0; i < 4; i++) {
+    int pivot = i;
+    float pivotsize = M[i][i];
+
+    if (pivotsize < 0)
+      pivotsize = -pivotsize;
+
+    for (int j = i + 1; j < 4; j++) {
+      float tmp = M[j][i];
+
+      if (tmp < 0)
+        tmp = -tmp;
+
+      if (tmp > pivotsize) {
+        pivot = j;
+        pivotsize = tmp;
+      }
+    }
+
+    if (UNLIKELY(pivotsize == 0.0f))
+      return false;
+
+    if (pivot != i) {
+      for (int j = 0; j < 4; j++) {
+        float tmp;
+
+        tmp = M[i][j];
+        M[i][j] = M[pivot][j];
+        M[pivot][j] = tmp;
+
+        tmp = R[i][j];
+        R[i][j] = R[pivot][j];
+        R[pivot][j] = tmp;
+      }
+    }
+
+    for (int j = i + 1; j < 4; j++) {
+      float f = M[j][i] / M[i][i];
+
+      for (int k = 0; k < 4; k++) {
+        M[j][k] -= f * M[i][k];
+        R[j][k] -= f * R[i][k];
+      }
+    }
+  }
+
+  /* backward substitution */
+  for (int i = 3; i >= 0; --i) {
+    float f;
+
+    if (UNLIKELY((f = M[i][i]) == 0.0f))
+      return false;
+
+    for (int j = 0; j < 4; j++) {
+      M[i][j] /= f;
+      R[i][j] /= f;
+    }
+
+    for (int j = 0; j < i; j++) {
+      f = M[j][i];
+
+      for (int k = 0; k < 4; k++) {
+        M[j][k] -= f * M[i][k];
+        R[j][k] -= f * R[i][k];
+      }
+    }
+  }
+
+  return true;
+}
+
+ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
+{
+  ProjectionTransform tfmR = projection_identity();
+  float M[4][4], R[4][4];
+
+  memcpy(R, &tfmR, sizeof(R));
+  memcpy(M, &tfm, sizeof(M));
+
+  if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
+    /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
+     * never be in this situation, but try to invert it anyway with tweak */
+    M[0][0] += 1e-8f;
+    M[1][1] += 1e-8f;
+    M[2][2] += 1e-8f;
+
+    if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
+      return projection_identity();
+    }
+  }
+
+  memcpy(&tfmR, R, sizeof(R));
+
+  return tfmR;
+}
+
+Transform transform_inverse(const Transform &tfm)
+{
+  ProjectionTransform projection(tfm);
+  return projection_to_transform(projection_inverse(projection));
+}
+
+Transform transform_transposed_inverse(const Transform &tfm)
+{
+  ProjectionTransform projection(tfm);
+  ProjectionTransform iprojection = projection_inverse(projection);
+  return projection_to_transform(projection_transpose(iprojection));
+}
+
+/* Motion Transform */
+
+float4 transform_to_quat(const Transform &tfm)
+{
+  double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]);
+  float4 qt;
+
+  if (trace > 0.0) {
+    double s = sqrt(trace + 1.0);
+
+    qt.w = (float)(s / 2.0);
+    s = 0.5 / s;
+
+    qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s);
+    qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s);
+    qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s);
+  }
+  else {
+    int i = 0;
+
+    if (tfm[1][1] > tfm[i][i])
+      i = 1;
+    if (tfm[2][2] > tfm[i][i])
+      i = 2;
+
+    int j = (i + 1) % 3;
+    int k = (j + 1) % 3;
+
+    double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0);
+
+    double q[3];
+    q[i] = s * 0.5;
+    if (s != 0.0)
+      s = 0.5 / s;
+
+    double w = (double)(tfm[k][j] - tfm[j][k]) * s;
+    q[j] = (double)(tfm[j][i] + tfm[i][j]) * s;
+    q[k] = (double)(tfm[k][i] + tfm[i][k]) * s;
+
+    qt.x = (float)q[0];
+    qt.y = (float)q[1];
+    qt.z = (float)q[2];
+    qt.w = (float)w;
+  }
+
+  return qt;
+}
+
+static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm)
+{
+  /* extract translation */
+  decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f);
+
+  /* extract rotation */
+  Transform M = *tfm;
+  M.x.w = 0.0f;
+  M.y.w = 0.0f;
+  M.z.w = 0.0f;
+
+#if 0
+  Transform R = M;
+  float norm;
+  int iteration = 0;
+
+  do {
+    Transform Rnext;
+    Transform Rit = transform_transposed_inverse(R);
+
+    for (int i = 0; i < 3; i++)
+      for (int j = 0; j < 4; j++)
+        Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
+
+    norm = 0.0f;
+    for (int i = 0; i < 3; i++) {
+      norm = max(norm,
+                 fabsf(R[i][0] - Rnext[i][0]) + fabsf(R[i][1] - Rnext[i][1]) +
+                     fabsf(R[i][2] - Rnext[i][2]));
+    }
+
+    R = Rnext;
+    iteration++;
+  } while (iteration < 100 && norm > 1e-4f);
+
+  if (transform_negative_scale(R))
+    R = R * transform_scale(-1.0f, -1.0f, -1.0f);
+
+  decomp->x = transform_to_quat(R);
+
+  /* extract scale and pack it */
+  Transform scale = transform_inverse(R) * M;
+  decomp->y.w = scale.x.x;
+  decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y);
+  decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
+#else
+  float3 colx = transform_get_column(&M, 0);
+  float3 coly = transform_get_column(&M, 1);
+  float3 colz = transform_get_column(&M, 2);
+
+  /* extract scale and shear first */
+  float3 scale, shear;
+  scale.x = len(colx);
+  colx = safe_divide_float3_float(colx, scale.x);
+  shear.z = dot(colx, coly);
+  coly -= shear.z * colx;
+  scale.y = len(coly);
+  coly = safe_divide_float3_float(coly, scale.y);
+  shear.y = dot(colx, colz);
+  colz -= shear.y * colx;
+  shear.x = dot(coly, colz);
+  colz -= shear.x * coly;
+  scale.z = len(colz);
+  colz = safe_divide_float3_float(colz, scale.z);
+
+  transform_set_column(&M, 0, colx);
+  transform_set_column(&M, 1, coly);
+  transform_set_column(&M, 2, colz);
+
+  if (transform_negative_scale(M)) {
+    scale *= -1.0f;
+    M = M * transform_scale(-1.0f, -1.0f, -1.0f);
+  }
+
+  decomp->x = transform_to_quat(M);
+
+  decomp->y.w = scale.x;
+  decomp->z = make_float4(shear.z, shear.y, 0.0f, scale.y);
+  decomp->w = make_float4(shear.x, 0.0f, 0.0f, scale.z);
+#endif
+}
+
+void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size)
+{
+  /* Decompose and correct rotation. */
+  for (size_t i = 0; i < size; i++) {
+    transform_decompose(decomp + i, motion + i);
+
+    if (i > 0) {
+      /* Ensure rotation around shortest angle, negated quaternions are the same
+       * but this means we don't have to do the check in quat_interpolate */
+      if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f)
+        decomp[i].x = -decomp[i].x;
+    }
+  }
+
+  /* Copy rotation to decomposed transform where scale is degenerate. This avoids weird object
+   * rotation interpolation when the scale goes to 0 for a time step.
+   *
+   * Note that this is very simple and naive implementation, which only deals with degenerated
+   * scale happening only on one frame. It is possible to improve it further by interpolating
+   * rotation into s degenerated range using rotation from time-steps from adjacent non-degenerated
+   * time steps. */
+  for (size_t i = 0; i < size; i++) {
+    const float3 scale = make_float3(decomp[i].y.w, decomp[i].z.w, decomp[i].w.w);
+    if (!is_zero(scale)) {
+      continue;
+    }
+
+    if (i > 0) {
+      decomp[i].x = decomp[i - 1].x;
+    }
+    else if (i < size - 1) {
+      decomp[i].x = decomp[i + 1].x;
+    }
+  }
+}
+
+Transform transform_from_viewplane(BoundBox2D &viewplane)
+{
+  return transform_scale(1.0f / (viewplane.right - viewplane.left),
+                         1.0f / (viewplane.top - viewplane.bottom),
+                         1.0f) *
+         transform_translate(-viewplane.left, -viewplane.bottom, 0.0f);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h
new file mode 100644
index 00000000000..7bfe747fcfb
--- /dev/null
+++ b/intern/cycles/util/transform.h
@@ -0,0 +1,512 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TRANSFORM_H__
+#define __UTIL_TRANSFORM_H__
+
+#ifndef __KERNEL_GPU__
+#  include <string.h>
+#endif
+
+#include "util/math.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Affine transformation, stored as 4x3 matrix. */
+
+typedef struct Transform {
+  float4 x, y, z;
+
+#ifndef __KERNEL_GPU__
+  float4 operator[](int i) const
+  {
+    return *(&x + i);
+  }
+  float4 &operator[](int i)
+  {
+    return *(&x + i);
+  }
+#endif
+} Transform;
+
+/* Transform decomposed in rotation/translation/scale. we use the same data
+ * structure as Transform, and tightly pack decomposition into it. first the
+ * rotation (4), then translation (3), then 3x3 scale matrix (9). */
+
+typedef struct DecomposedTransform {
+  float4 x, y, z, w;
+} DecomposedTransform;
+
+/* Functions */
+
+ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a)
+{
+  /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
+  ssef x, y, z, w, aa;
+  aa = a.m128;
+
+  x = _mm_loadu_ps(&t->x.x);
+  y = _mm_loadu_ps(&t->y.x);
+  z = _mm_loadu_ps(&t->z.x);
+  w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
+
+  _MM_TRANSPOSE4_PS(x, y, z, w);
+
+  ssef tmp = shuffle<0>(aa) * x;
+  tmp = madd(shuffle<1>(aa), y, tmp);
+  tmp = madd(shuffle<2>(aa), z, tmp);
+  tmp += w;
+
+  return float3(tmp.m128);
+#else
+  float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w,
+                         a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
+                         a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
+
+  return c;
+#endif
+}
+
+ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
+  ssef x, y, z, w, aa;
+  aa = a.m128;
+  x = _mm_loadu_ps(&t->x.x);
+  y = _mm_loadu_ps(&t->y.x);
+  z = _mm_loadu_ps(&t->z.x);
+  w = _mm_setzero_ps();
+
+  _MM_TRANSPOSE4_PS(x, y, z, w);
+
+  ssef tmp = shuffle<0>(aa) * x;
+  tmp = madd(shuffle<1>(aa), y, tmp);
+  tmp = madd(shuffle<2>(aa), z, tmp);
+
+  return float3(tmp.m128);
+#else
+  float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
+                         a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
+                         a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
+
+  return c;
+#endif
+}
+
+ccl_device_inline float3 transform_direction_transposed(ccl_private const Transform *t,
+                                                        const float3 a)
+{
+  float3 x = make_float3(t->x.x, t->y.x, t->z.x);
+  float3 y = make_float3(t->x.y, t->y.y, t->z.y);
+  float3 z = make_float3(t->x.z, t->y.z, t->z.z);
+
+  return make_float3(dot(x, a), dot(y, a), dot(z, a));
+}
+
+ccl_device_inline Transform make_transform(float a,
+                                           float b,
+                                           float c,
+                                           float d,
+                                           float e,
+                                           float f,
+                                           float g,
+                                           float h,
+                                           float i,
+                                           float j,
+                                           float k,
+                                           float l)
+{
+  Transform t;
+
+  t.x.x = a;
+  t.x.y = b;
+  t.x.z = c;
+  t.x.w = d;
+  t.y.x = e;
+  t.y.y = f;
+  t.y.z = g;
+  t.y.w = h;
+  t.z.x = i;
+  t.z.y = j;
+  t.z.z = k;
+  t.z.w = l;
+
+  return t;
+}
+
+ccl_device_inline Transform euler_to_transform(const float3 euler)
+{
+  float cx = cosf(euler.x);
+  float cy = cosf(euler.y);
+  float cz = cosf(euler.z);
+  float sx = sinf(euler.x);
+  float sy = sinf(euler.y);
+  float sz = sinf(euler.z);
+
+  Transform t;
+  t.x.x = cy * cz;
+  t.y.x = cy * sz;
+  t.z.x = -sy;
+
+  t.x.y = sy * sx * cz - cx * sz;
+  t.y.y = sy * sx * sz + cx * cz;
+  t.z.y = cy * sx;
+
+  t.x.z = sy * cx * cz + sx * sz;
+  t.y.z = sy * cx * sz - sx * cz;
+  t.z.z = cy * cx;
+
+  t.x.w = t.y.w = t.z.w = 0.0f;
+  return t;
+}
+
+/* Constructs a coordinate frame from a normalized normal. */
+ccl_device_inline Transform make_transform_frame(float3 N)
+{
+  const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
+  const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
+  const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1);
+  const float3 dy = normalize(cross(N, dx));
+  return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f);
+}
+
+#ifndef __KERNEL_GPU__
+
+ccl_device_inline Transform transform_zero()
+{
+  Transform zero = {zero_float4(), zero_float4(), zero_float4()};
+  return zero;
+}
+
+ccl_device_inline Transform operator*(const Transform a, const Transform b)
+{
+  float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
+  float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
+  float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
+  float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
+
+  Transform t;
+  t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
+  t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
+  t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
+
+  return t;
+}
+
+ccl_device_inline void print_transform(const char *label, const Transform &t)
+{
+  print_float4(label, t.x);
+  print_float4(label, t.y);
+  print_float4(label, t.z);
+  printf("\n");
+}
+
+ccl_device_inline Transform transform_translate(float3 t)
+{
+  return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z);
+}
+
+ccl_device_inline Transform transform_translate(float x, float y, float z)
+{
+  return transform_translate(make_float3(x, y, z));
+}
+
+ccl_device_inline Transform transform_scale(float3 s)
+{
+  return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
+}
+
+ccl_device_inline Transform transform_scale(float x, float y, float z)
+{
+  return transform_scale(make_float3(x, y, z));
+}
+
+ccl_device_inline Transform transform_rotate(float angle, float3 axis)
+{
+  float s = sinf(angle);
+  float c = cosf(angle);
+  float t = 1.0f - c;
+
+  axis = normalize(axis);
+
+  return make_transform(axis.x * axis.x * t + c,
+                        axis.x * axis.y * t - s * axis.z,
+                        axis.x * axis.z * t + s * axis.y,
+                        0.0f,
+
+                        axis.y * axis.x * t + s * axis.z,
+                        axis.y * axis.y * t + c,
+                        axis.y * axis.z * t - s * axis.x,
+                        0.0f,
+
+                        axis.z * axis.x * t - s * axis.y,
+                        axis.z * axis.y * t + s * axis.x,
+                        axis.z * axis.z * t + c,
+                        0.0f);
+}
+
+/* Euler is assumed to be in XYZ order. */
+ccl_device_inline Transform transform_euler(float3 euler)
+{
+  return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
+         transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
+         transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
+}
+
+ccl_device_inline Transform transform_identity()
+{
+  return transform_scale(1.0f, 1.0f, 1.0f);
+}
+
+ccl_device_inline bool operator==(const Transform &A, const Transform &B)
+{
+  return memcmp(&A, &B, sizeof(Transform)) == 0;
+}
+
+ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
+{
+  return !(A == B);
+}
+
+ccl_device_inline float3 transform_get_column(const Transform *t, int column)
+{
+  return make_float3(t->x[column], t->y[column], t->z[column]);
+}
+
+ccl_device_inline void transform_set_column(Transform *t, int column, float3 value)
+{
+  t->x[column] = value.x;
+  t->y[column] = value.y;
+  t->z[column] = value.z;
+}
+
+Transform transform_inverse(const Transform &a);
+Transform transform_transposed_inverse(const Transform &a);
+
+ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
+{
+  /* the epsilon here is quite arbitrary, but this function is only used for
+   * surface area and bump, where we expect it to not be so sensitive */
+  float eps = 1e-6f;
+
+  float sx = len_squared(float4_to_float3(tfm.x));
+  float sy = len_squared(float4_to_float3(tfm.y));
+  float sz = len_squared(float4_to_float3(tfm.z));
+  float stx = len_squared(transform_get_column(&tfm, 0));
+  float sty = len_squared(transform_get_column(&tfm, 1));
+  float stz = len_squared(transform_get_column(&tfm, 2));
+
+  if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps &&
+      fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) {
+    scale = sx;
+    return true;
+  }
+
+  return false;
+}
+
+ccl_device_inline bool transform_negative_scale(const Transform &tfm)
+{
+  float3 c0 = transform_get_column(&tfm, 0);
+  float3 c1 = transform_get_column(&tfm, 1);
+  float3 c2 = transform_get_column(&tfm, 2);
+
+  return (dot(cross(c0, c1), c2) < 0.0f);
+}
+
+ccl_device_inline Transform transform_clear_scale(const Transform &tfm)
+{
+  Transform ntfm = tfm;
+
+  transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0)));
+  transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1)));
+  transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2)));
+
+  return ntfm;
+}
+
+ccl_device_inline Transform transform_empty()
+{
+  return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+#endif
+
+/* Motion Transform */
+
+ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
+{
+  /* Optix is using lerp to interpolate motion transformations. */
+#ifdef __KERNEL_OPTIX__
+  return normalize((1.0f - t) * q1 + t * q2);
+#else  /* __KERNEL_OPTIX__ */
+  /* note: this does not ensure rotation around shortest angle, q1 and q2
+   * are assumed to be matched already in transform_motion_decompose */
+  float costheta = dot(q1, q2);
+
+  /* possible optimization: it might be possible to precompute theta/qperp */
+
+  if (costheta > 0.9995f) {
+    /* linear interpolation in degenerate case */
+    return normalize((1.0f - t) * q1 + t * q2);
+  }
+  else {
+    /* slerp */
+    float theta = acosf(clamp(costheta, -1.0f, 1.0f));
+    float4 qperp = normalize(q2 - q1 * costheta);
+    float thetap = theta * t;
+    return q1 * cosf(thetap) + qperp * sinf(thetap);
+  }
+#endif /* __KERNEL_OPTIX__ */
+}
+
+ccl_device_inline Transform transform_quick_inverse(Transform M)
+{
+  /* possible optimization: can we avoid doing this altogether and construct
+   * the inverse matrix directly from negated translation, transposed rotation,
+   * scale can be inverted but what about shearing? */
+  Transform R;
+  float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
+              M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
+  if (det == 0.0f) {
+    M.x.x += 1e-8f;
+    M.y.y += 1e-8f;
+    M.z.z += 1e-8f;
+    det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
+          M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
+  }
+  det = (det != 0.0f) ? 1.0f / det : 0.0f;
+
+  float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z,
+                                M.z.y * M.x.z - M.z.z * M.x.y,
+                                M.y.z * M.x.y - M.y.y * M.x.z);
+  float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x,
+                                M.z.z * M.x.x - M.z.x * M.x.z,
+                                M.y.x * M.x.z - M.y.z * M.x.x);
+  float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y,
+                                M.z.x * M.x.y - M.z.y * M.x.x,
+                                M.y.y * M.x.x - M.y.x * M.x.y);
+  float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
+
+  R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
+  R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
+  R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
+
+  return R;
+}
+
+ccl_device_inline void transform_compose(ccl_private Transform *tfm,
+                                         ccl_private const DecomposedTransform *decomp)
+{
+  /* rotation */
+  float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
+
+  q0 = M_SQRT2_F * decomp->x.w;
+  q1 = M_SQRT2_F * decomp->x.x;
+  q2 = M_SQRT2_F * decomp->x.y;
+  q3 = M_SQRT2_F * decomp->x.z;
+
+  qda = q0 * q1;
+  qdb = q0 * q2;
+  qdc = q0 * q3;
+  qaa = q1 * q1;
+  qab = q1 * q2;
+  qac = q1 * q3;
+  qbb = q2 * q2;
+  qbc = q2 * q3;
+  qcc = q3 * q3;
+
+  float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac);
+  float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc);
+  float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb);
+
+  /* scale */
+  float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
+  float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
+  float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
+
+  /* compose with translation */
+  tfm->x = make_float4(
+      dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
+  tfm->y = make_float4(
+      dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
+  tfm->z = make_float4(
+      dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
+}
+
+/* Interpolate from array of decomposed transforms. */
+ccl_device void transform_motion_array_interpolate(Transform *tfm,
+                                                   const DecomposedTransform *motion,
+                                                   uint numsteps,
+                                                   float time)
+{
+  /* Figure out which steps we need to interpolate. */
+  int maxstep = numsteps - 1;
+  int step = min((int)(time * maxstep), maxstep - 1);
+  float t = time * maxstep - step;
+
+  const DecomposedTransform *a = motion + step;
+  const DecomposedTransform *b = motion + step + 1;
+
+  /* Interpolate rotation, translation and scale. */
+  DecomposedTransform decomp;
+  decomp.x = quat_interpolate(a->x, b->x, t);
+  decomp.y = (1.0f - t) * a->y + t * b->y;
+  decomp.z = (1.0f - t) * a->z + t * b->z;
+  decomp.w = (1.0f - t) * a->w + t * b->w;
+
+  /* Compose rotation, translation, scale into matrix. */
+  transform_compose(tfm, &decomp);
+}
+
+ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm)
+{
+  return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z);
+}
+
+ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp)
+{
+  return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) &&
+         isfinite4_safe(decomp->w);
+}
+
+#ifndef __KERNEL_GPU__
+
+class BoundBox2D;
+
+ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
+{
+  return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
+}
+
+float4 transform_to_quat(const Transform &tfm);
+void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
+Transform transform_from_viewplane(BoundBox2D &viewplane);
+
+#endif
+
+/* TODO: This can be removed when we know if no devices will require explicit
+ * address space qualifiers for this case. */
+
+#define transform_point_auto transform_point
+#define transform_direction_auto transform_direction
+#define transform_direction_transposed_auto transform_direction_transposed
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TRANSFORM_H__ */
diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h
new file mode 100644
index 00000000000..697dc2b44ea
--- /dev/null
+++ b/intern/cycles/util/types.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_H__
+#define __UTIL_TYPES_H__
+
+#include <stdlib.h>
+
+/* Standard Integer Types */
+
+#if !defined(__KERNEL_GPU__)
+#  include <stdint.h>
+#endif
+
+#include "util/defines.h"
+
+#ifndef __KERNEL_GPU__
+#  include "util/optimization.h"
+#  include "util/simd.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Types
+ *
+ * Define simpler unsigned type names, and integer with defined number of bits.
+ * Also vector types, named to be compatible with OpenCL builtin types, while
+ * working for CUDA and C++ too. */
+
+/* Shorter Unsigned Names */
+
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned short ushort;
+
+/* Fixed Bits Types */
+
+#ifndef __KERNEL_GPU__
+/* Generic Memory Pointer */
+
+typedef uint64_t device_ptr;
+#endif /* __KERNEL_GPU__ */
+
+ccl_device_inline size_t align_up(size_t offset, size_t alignment)
+{
+  return (offset + alignment - 1) & ~(alignment - 1);
+}
+
+ccl_device_inline size_t divide_up(size_t x, size_t y)
+{
+  return (x + y - 1) / y;
+}
+
+ccl_device_inline size_t round_up(size_t x, size_t multiple)
+{
+  return ((x + multiple - 1) / multiple) * multiple;
+}
+
+ccl_device_inline size_t round_down(size_t x, size_t multiple)
+{
+  return (x / multiple) * multiple;
+}
+
+ccl_device_inline bool is_power_of_two(size_t x)
+{
+  return (x & (x - 1)) == 0;
+}
+
+CCL_NAMESPACE_END
+
+/* Vectorized types declaration. */
+#include "util/types_uchar2.h"
+#include "util/types_uchar3.h"
+#include "util/types_uchar4.h"
+
+#include "util/types_int2.h"
+#include "util/types_int3.h"
+#include "util/types_int4.h"
+
+#include "util/types_uint2.h"
+#include "util/types_uint3.h"
+#include "util/types_uint4.h"
+
+#include "util/types_ushort4.h"
+
+#include "util/types_float2.h"
+#include "util/types_float3.h"
+#include "util/types_float4.h"
+#include "util/types_float8.h"
+
+#include "util/types_vector3.h"
+
+/* Vectorized types implementation. */
+#include "util/types_uchar2_impl.h"
+#include "util/types_uchar3_impl.h"
+#include "util/types_uchar4_impl.h"
+
+#include "util/types_int2_impl.h"
+#include "util/types_int3_impl.h"
+#include "util/types_int4_impl.h"
+
+#include "util/types_uint2_impl.h"
+#include "util/types_uint3_impl.h"
+#include "util/types_uint4_impl.h"
+
+#include "util/types_float2_impl.h"
+#include "util/types_float3_impl.h"
+#include "util/types_float4_impl.h"
+#include "util/types_float8_impl.h"
+
+#include "util/types_vector3_impl.h"
+
+/* SSE types. */
+#ifndef __KERNEL_GPU__
+#  include "util/sseb.h"
+#  include "util/ssef.h"
+#  include "util/ssei.h"
+#  if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+#    include "util/avxb.h"
+#    include "util/avxf.h"
+#    include "util/avxi.h"
+#  endif
+#endif
+
+#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h
new file mode 100644
index 00000000000..e71204bef5b
--- /dev/null
+++ b/intern/cycles/util/types_float2.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT2_H__
+#define __UTIL_TYPES_FLOAT2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct float2 {
+  float x, y;
+
+  __forceinline float operator[](int i) const;
+  __forceinline float &operator[](int i);
+};
+
+ccl_device_inline float2 make_float2(float x, float y);
+ccl_device_inline void print_float2(const char *label, const float2 &a);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT2_H__ */
diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h
new file mode 100644
index 00000000000..c02c13f8c47
--- /dev/null
+++ b/intern/cycles/util/types_float2_impl.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__
+#define __UTIL_TYPES_FLOAT2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline float float2::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+__forceinline float &float2::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+ccl_device_inline float2 make_float2(float x, float y)
+{
+  float2 a = {x, y};
+  return a;
+}
+
+ccl_device_inline void print_float2(const char *label, const float2 &a)
+{
+  printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
new file mode 100644
index 00000000000..f990367e7b8
--- /dev/null
+++ b/intern/cycles/util/types_float3.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT3_H__
+#define __UTIL_TYPES_FLOAT3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct ccl_try_align(16) float3
+{
+#  ifdef __KERNEL_SSE__
+  union {
+    __m128 m128;
+    struct {
+      float x, y, z, w;
+    };
+  };
+
+  __forceinline float3();
+  __forceinline float3(const float3 &a);
+  __forceinline explicit float3(const __m128 &a);
+
+  __forceinline operator const __m128 &() const;
+  __forceinline operator __m128 &();
+
+  __forceinline float3 &operator=(const float3 &a);
+#  else  /* __KERNEL_SSE__ */
+  float x, y, z, w;
+#  endif /* __KERNEL_SSE__ */
+
+  __forceinline float operator[](int i) const;
+  __forceinline float &operator[](int i);
+};
+
+ccl_device_inline float3 make_float3(float f);
+ccl_device_inline float3 make_float3(float x, float y, float z);
+ccl_device_inline void print_float3(const char *label, const float3 &a);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT3_H__ */
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
new file mode 100644
index 00000000000..76a9067acc7
--- /dev/null
+++ b/intern/cycles/util/types_float3_impl.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__
+#define __UTIL_TYPES_FLOAT3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#  ifdef __KERNEL_SSE__
+__forceinline float3::float3()
+{
+}
+
+__forceinline float3::float3(const float3 &a) : m128(a.m128)
+{
+}
+
+__forceinline float3::float3(const __m128 &a) : m128(a)
+{
+}
+
+__forceinline float3::operator const __m128 &() const
+{
+  return m128;
+}
+
+__forceinline float3::operator __m128 &()
+{
+  return m128;
+}
+
+__forceinline float3 &float3::operator=(const float3 &a)
+{
+  m128 = a.m128;
+  return *this;
+}
+#  endif /* __KERNEL_SSE__ */
+
+__forceinline float float3::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+__forceinline float &float3::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+ccl_device_inline float3 make_float3(float f)
+{
+#  ifdef __KERNEL_SSE__
+  float3 a(_mm_set1_ps(f));
+#  else
+  float3 a = {f, f, f, f};
+#  endif
+  return a;
+}
+
+ccl_device_inline float3 make_float3(float x, float y, float z)
+{
+#  ifdef __KERNEL_SSE__
+  float3 a(_mm_set_ps(0.0f, z, y, x));
+#  else
+  float3 a = {x, y, z, 0.0f};
+#  endif
+  return a;
+}
+
+ccl_device_inline void print_float3(const char *label, const float3 &a)
+{
+  printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */
diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h
new file mode 100644
index 00000000000..8d4e07e7e4d
--- /dev/null
+++ b/intern/cycles/util/types_float4.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT4_H__
+#define __UTIL_TYPES_FLOAT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct int4;
+
+struct ccl_try_align(16) float4
+{
+#  ifdef __KERNEL_SSE__
+  union {
+    __m128 m128;
+    struct {
+      float x, y, z, w;
+    };
+  };
+
+  __forceinline float4();
+  __forceinline explicit float4(const __m128 &a);
+
+  __forceinline operator const __m128 &() const;
+  __forceinline operator __m128 &();
+
+  __forceinline float4 &operator=(const float4 &a);
+
+#  else  /* __KERNEL_SSE__ */
+  float x, y, z, w;
+#  endif /* __KERNEL_SSE__ */
+
+  __forceinline float operator[](int i) const;
+  __forceinline float &operator[](int i);
+};
+
+ccl_device_inline float4 make_float4(float f);
+ccl_device_inline float4 make_float4(float x, float y, float z, float w);
+ccl_device_inline float4 make_float4(const int4 &i);
+ccl_device_inline void print_float4(const char *label, const float4 &a);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT4_H__ */
diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h
new file mode 100644
index 00000000000..d75715332e5
--- /dev/null
+++ b/intern/cycles/util/types_float4_impl.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT4_IMPL_H__
+#define __UTIL_TYPES_FLOAT4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#  ifdef __KERNEL_SSE__
+__forceinline float4::float4()
+{
+}
+
+__forceinline float4::float4(const __m128 &a) : m128(a)
+{
+}
+
+__forceinline float4::operator const __m128 &() const
+{
+  return m128;
+}
+
+__forceinline float4::operator __m128 &()
+{
+  return m128;
+}
+
+__forceinline float4 &float4::operator=(const float4 &a)
+{
+  m128 = a.m128;
+  return *this;
+}
+#  endif /* __KERNEL_SSE__ */
+
+__forceinline float float4::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 4);
+  return *(&x + i);
+}
+
+__forceinline float &float4::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 4);
+  return *(&x + i);
+}
+
+ccl_device_inline float4 make_float4(float f)
+{
+#  ifdef __KERNEL_SSE__
+  float4 a(_mm_set1_ps(f));
+#  else
+  float4 a = {f, f, f, f};
+#  endif
+  return a;
+}
+
+ccl_device_inline float4 make_float4(float x, float y, float z, float w)
+{
+#  ifdef __KERNEL_SSE__
+  float4 a(_mm_set_ps(w, z, y, x));
+#  else
+  float4 a = {x, y, z, w};
+#  endif
+  return a;
+}
+
+ccl_device_inline float4 make_float4(const int4 &i)
+{
+#  ifdef __KERNEL_SSE__
+  float4 a(_mm_cvtepi32_ps(i.m128));
+#  else
+  float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
+#  endif
+  return a;
+}
+
+ccl_device_inline void print_float4(const char *label, const float4 &a)
+{
+  printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */
diff --git a/intern/cycles/util/types_float8.h b/intern/cycles/util/types_float8.h
new file mode 100644
index 00000000000..cf1f66b7622
--- /dev/null
+++ b/intern/cycles/util/types_float8.h
@@ -0,0 +1,74 @@
+/*
+ * Original code Copyright 2017, Intel Corporation
+ * Modifications Copyright 2018, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT8_H__
+#define __UTIL_TYPES_FLOAT8_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+
+struct ccl_try_align(32) float8
+{
+#  ifdef __KERNEL_AVX2__
+  union {
+    __m256 m256;
+    struct {
+      float a, b, c, d, e, f, g, h;
+    };
+  };
+
+  __forceinline float8();
+  __forceinline float8(const float8 &a);
+  __forceinline explicit float8(const __m256 &a);
+
+  __forceinline operator const __m256 &() const;
+  __forceinline operator __m256 &();
+
+  __forceinline float8 &operator=(const float8 &a);
+
+#  else  /* __KERNEL_AVX2__ */
+  float a, b, c, d, e, f, g, h;
+#  endif /* __KERNEL_AVX2__ */
+
+  __forceinline float operator[](int i) const;
+  __forceinline float &operator[](int i);
+};
+
+ccl_device_inline float8 make_float8(float f);
+ccl_device_inline float8
+make_float8(float a, float b, float c, float d, float e, float f, float g, float h);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT8_H__ */
diff --git a/intern/cycles/util/types_float8_impl.h b/intern/cycles/util/types_float8_impl.h
new file mode 100644
index 00000000000..a795666adc7
--- /dev/null
+++ b/intern/cycles/util/types_float8_impl.h
@@ -0,0 +1,112 @@
+/*
+ * Original code Copyright 2017, Intel Corporation
+ * Modifications Copyright 2018, Blender Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
+#define __UTIL_TYPES_FLOAT8_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#  ifdef __KERNEL_AVX2__
+__forceinline float8::float8()
+{
+}
+
+__forceinline float8::float8(const float8 &f) : m256(f.m256)
+{
+}
+
+__forceinline float8::float8(const __m256 &f) : m256(f)
+{
+}
+
+__forceinline float8::operator const __m256 &() const
+{
+  return m256;
+}
+
+__forceinline float8::operator __m256 &()
+{
+  return m256;
+}
+
+__forceinline float8 &float8::operator=(const float8 &f)
+{
+  m256 = f.m256;
+  return *this;
+}
+#  endif /* __KERNEL_AVX2__ */
+
+__forceinline float float8::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 8);
+  return *(&a + i);
+}
+
+__forceinline float &float8::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 8);
+  return *(&a + i);
+}
+
+ccl_device_inline float8 make_float8(float f)
+{
+#  ifdef __KERNEL_AVX2__
+  float8 r(_mm256_set1_ps(f));
+#  else
+  float8 r = {f, f, f, f, f, f, f, f};
+#  endif
+  return r;
+}
+
+ccl_device_inline float8
+make_float8(float a, float b, float c, float d, float e, float f, float g, float h)
+{
+#  ifdef __KERNEL_AVX2__
+  float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h));
+#  else
+  float8 r = {a, b, c, d, e, f, g, h};
+#  endif
+  return r;
+}
+
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */
diff --git a/intern/cycles/util/types_int2.h b/intern/cycles/util/types_int2.h
new file mode 100644
index 00000000000..75970577d77
--- /dev/null
+++ b/intern/cycles/util/types_int2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT2_H__
+#define __UTIL_TYPES_INT2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct int2 {
+  int x, y;
+
+  __forceinline int operator[](int i) const;
+  __forceinline int &operator[](int i);
+};
+
+ccl_device_inline int2 make_int2(int x, int y);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_INT2_H__ */
diff --git a/intern/cycles/util/types_int2_impl.h b/intern/cycles/util/types_int2_impl.h
new file mode 100644
index 00000000000..efa63cdfd2a
--- /dev/null
+++ b/intern/cycles/util/types_int2_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT2_IMPL_H__
+#define __UTIL_TYPES_INT2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+int int2::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+int &int2::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+ccl_device_inline int2 make_int2(int x, int y)
+{
+  int2 a = {x, y};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_INT2_IMPL_H__ */
diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h
new file mode 100644
index 00000000000..071a886136e
--- /dev/null
+++ b/intern/cycles/util/types_int3.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT3_H__
+#define __UTIL_TYPES_INT3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct ccl_try_align(16) int3
+{
+#  ifdef __KERNEL_SSE__
+  union {
+    __m128i m128;
+    struct {
+      int x, y, z, w;
+    };
+  };
+
+  __forceinline int3();
+  __forceinline int3(const int3 &a);
+  __forceinline explicit int3(const __m128i &a);
+
+  __forceinline operator const __m128i &() const;
+  __forceinline operator __m128i &();
+
+  __forceinline int3 &operator=(const int3 &a);
+#  else  /* __KERNEL_SSE__ */
+  int x, y, z, w;
+#  endif /* __KERNEL_SSE__ */
+
+  __forceinline int operator[](int i) const;
+  __forceinline int &operator[](int i);
+};
+
+ccl_device_inline int3 make_int3(int i);
+ccl_device_inline int3 make_int3(int x, int y, int z);
+ccl_device_inline void print_int3(const char *label, const int3 &a);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_INT3_H__ */
diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h
new file mode 100644
index 00000000000..c91c64b804e
--- /dev/null
+++ b/intern/cycles/util/types_int3_impl.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT3_IMPL_H__
+#define __UTIL_TYPES_INT3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#  ifdef __KERNEL_SSE__
+__forceinline int3::int3()
+{
+}
+
+__forceinline int3::int3(const __m128i &a) : m128(a)
+{
+}
+
+__forceinline int3::int3(const int3 &a) : m128(a.m128)
+{
+}
+
+__forceinline int3::operator const __m128i &() const
+{
+  return m128;
+}
+
+__forceinline int3::operator __m128i &()
+{
+  return m128;
+}
+
+__forceinline int3 &int3::operator=(const int3 &a)
+{
+  m128 = a.m128;
+  return *this;
+}
+#  endif /* __KERNEL_SSE__ */
+
+__forceinline int int3::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+__forceinline int &int3::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+ccl_device_inline int3 make_int3(int i)
+{
+#  ifdef __KERNEL_SSE__
+  int3 a(_mm_set1_epi32(i));
+#  else
+  int3 a = {i, i, i, i};
+#  endif
+  return a;
+}
+
+ccl_device_inline int3 make_int3(int x, int y, int z)
+{
+#  ifdef __KERNEL_SSE__
+  int3 a(_mm_set_epi32(0, z, y, x));
+#  else
+  int3 a = {x, y, z, 0};
+#  endif
+
+  return a;
+}
+
+ccl_device_inline void print_int3(const char *label, const int3 &a)
+{
+  printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_INT3_IMPL_H__ */
diff --git a/intern/cycles/util/types_int4.h b/intern/cycles/util/types_int4.h
new file mode 100644
index 00000000000..cb497d70035
--- /dev/null
+++ b/intern/cycles/util/types_int4.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT4_H__
+#define __UTIL_TYPES_INT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+
+struct float3;
+struct float4;
+
+struct ccl_try_align(16) int4
+{
+#  ifdef __KERNEL_SSE__
+  union {
+    __m128i m128;
+    struct {
+      int x, y, z, w;
+    };
+  };
+
+  __forceinline int4();
+  __forceinline int4(const int4 &a);
+  __forceinline explicit int4(const __m128i &a);
+
+  __forceinline operator const __m128i &() const;
+  __forceinline operator __m128i &();
+
+  __forceinline int4 &operator=(const int4 &a);
+#  else  /* __KERNEL_SSE__ */
+  int x, y, z, w;
+#  endif /* __KERNEL_SSE__ */
+
+  __forceinline int operator[](int i) const;
+  __forceinline int &operator[](int i);
+};
+
+ccl_device_inline int4 make_int4(int i);
+ccl_device_inline int4 make_int4(int x, int y, int z, int w);
+ccl_device_inline int4 make_int4(const float3 &f);
+ccl_device_inline int4 make_int4(const float4 &f);
+ccl_device_inline void print_int4(const char *label, const int4 &a);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_INT4_H__ */
diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h
new file mode 100644
index 00000000000..258b42c029e
--- /dev/null
+++ b/intern/cycles/util/types_int4_impl.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT4_IMPL_H__
+#define __UTIL_TYPES_INT4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#  ifdef __KERNEL_SSE__
+__forceinline int4::int4()
+{
+}
+
+__forceinline int4::int4(const int4 &a) : m128(a.m128)
+{
+}
+
+__forceinline int4::int4(const __m128i &a) : m128(a)
+{
+}
+
+__forceinline int4::operator const __m128i &() const
+{
+  return m128;
+}
+
+__forceinline int4::operator __m128i &()
+{
+  return m128;
+}
+
+__forceinline int4 &int4::operator=(const int4 &a)
+{
+  m128 = a.m128;
+  return *this;
+}
+#  endif /* __KERNEL_SSE__ */
+
+__forceinline int int4::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 4);
+  return *(&x + i);
+}
+
+__forceinline int &int4::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 4);
+  return *(&x + i);
+}
+
+ccl_device_inline int4 make_int4(int i)
+{
+#  ifdef __KERNEL_SSE__
+  int4 a(_mm_set1_epi32(i));
+#  else
+  int4 a = {i, i, i, i};
+#  endif
+  return a;
+}
+
+ccl_device_inline int4 make_int4(int x, int y, int z, int w)
+{
+#  ifdef __KERNEL_SSE__
+  int4 a(_mm_set_epi32(w, z, y, x));
+#  else
+  int4 a = {x, y, z, w};
+#  endif
+  return a;
+}
+
+ccl_device_inline int4 make_int4(const float3 &f)
+{
+#  ifdef __KERNEL_SSE__
+  int4 a(_mm_cvtps_epi32(f.m128));
+#  else
+  int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#  endif
+  return a;
+}
+
+ccl_device_inline int4 make_int4(const float4 &f)
+{
+#  ifdef __KERNEL_SSE__
+  int4 a(_mm_cvtps_epi32(f.m128));
+#  else
+  int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#  endif
+  return a;
+}
+
+ccl_device_inline void print_int4(const char *label, const int4 &a)
+{
+  printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_INT4_IMPL_H__ */
diff --git a/intern/cycles/util/types_uchar2.h b/intern/cycles/util/types_uchar2.h
new file mode 100644
index 00000000000..0dc1d46bf29
--- /dev/null
+++ b/intern/cycles/util/types_uchar2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR2_H__
+#define __UTIL_TYPES_UCHAR2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uchar2 {
+  uchar x, y;
+
+  __forceinline uchar operator[](int i) const;
+  __forceinline uchar &operator[](int i);
+};
+
+ccl_device_inline uchar2 make_uchar2(uchar x, uchar y);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UCHAR2_H__ */
diff --git a/intern/cycles/util/types_uchar2_impl.h b/intern/cycles/util/types_uchar2_impl.h
new file mode 100644
index 00000000000..234a71a2247
--- /dev/null
+++ b/intern/cycles/util/types_uchar2_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR2_IMPL_H__
+#define __UTIL_TYPES_UCHAR2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+uchar uchar2::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+uchar &uchar2::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+ccl_device_inline uchar2 make_uchar2(uchar x, uchar y)
+{
+  uchar2 a = {x, y};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UCHAR2_IMPL_H__ */
diff --git a/intern/cycles/util/types_uchar3.h b/intern/cycles/util/types_uchar3.h
new file mode 100644
index 00000000000..d3913afb3a2
--- /dev/null
+++ b/intern/cycles/util/types_uchar3.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR3_H__
+#define __UTIL_TYPES_UCHAR3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uchar3 {
+  uchar x, y, z;
+
+  __forceinline uchar operator[](int i) const;
+  __forceinline uchar &operator[](int i);
+};
+
+ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UCHAR3_H__ */
diff --git a/intern/cycles/util/types_uchar3_impl.h b/intern/cycles/util/types_uchar3_impl.h
new file mode 100644
index 00000000000..90f510e3b28
--- /dev/null
+++ b/intern/cycles/util/types_uchar3_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR3_IMPL_H__
+#define __UTIL_TYPES_UCHAR3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+uchar uchar3::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+uchar &uchar3::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
+{
+  uchar3 a = {x, y, z};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UCHAR3_IMPL_H__ */
diff --git a/intern/cycles/util/types_uchar4.h b/intern/cycles/util/types_uchar4.h
new file mode 100644
index 00000000000..bfe1c06acd8
--- /dev/null
+++ b/intern/cycles/util/types_uchar4.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR4_H__
+#define __UTIL_TYPES_UCHAR4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uchar4 {
+  uchar x, y, z, w;
+
+  __forceinline uchar operator[](int i) const;
+  __forceinline uchar &operator[](int i);
+};
+
+ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UCHAR4_H__ */
diff --git a/intern/cycles/util/types_uchar4_impl.h b/intern/cycles/util/types_uchar4_impl.h
new file mode 100644
index 00000000000..d15c74bed03
--- /dev/null
+++ b/intern/cycles/util/types_uchar4_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR4_IMPL_H__
+#define __UTIL_TYPES_UCHAR4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+uchar uchar4::operator[](int i) const
+{
+  util_assert(i >= 0);
+  util_assert(i < 4);
+  return *(&x + i);
+}
+
+uchar &uchar4::operator[](int i)
+{
+  util_assert(i >= 0);
+  util_assert(i < 4);
+  return *(&x + i);
+}
+
+ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
+{
+  uchar4 a = {x, y, z, w};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UCHAR4_IMPL_H__ */
diff --git a/intern/cycles/util/types_uint2.h b/intern/cycles/util/types_uint2.h
new file mode 100644
index 00000000000..7419977040b
--- /dev/null
+++ b/intern/cycles/util/types_uint2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT2_H__
+#define __UTIL_TYPES_UINT2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uint2 {
+  uint x, y;
+
+  __forceinline uint operator[](uint i) const;
+  __forceinline uint &operator[](uint i);
+};
+
+ccl_device_inline uint2 make_uint2(uint x, uint y);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UINT2_H__ */
diff --git a/intern/cycles/util/types_uint2_impl.h b/intern/cycles/util/types_uint2_impl.h
new file mode 100644
index 00000000000..8427f9694b5
--- /dev/null
+++ b/intern/cycles/util/types_uint2_impl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT2_IMPL_H__
+#define __UTIL_TYPES_UINT2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline uint uint2::operator[](uint i) const
+{
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+__forceinline uint &uint2::operator[](uint i)
+{
+  util_assert(i < 2);
+  return *(&x + i);
+}
+
+ccl_device_inline uint2 make_uint2(uint x, uint y)
+{
+  uint2 a = {x, y};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UINT2_IMPL_H__ */
diff --git a/intern/cycles/util/types_uint3.h b/intern/cycles/util/types_uint3.h
new file mode 100644
index 00000000000..1e97e7f2d36
--- /dev/null
+++ b/intern/cycles/util/types_uint3.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT3_H__
+#define __UTIL_TYPES_UINT3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uint3 {
+  uint x, y, z;
+
+  __forceinline uint operator[](uint i) const;
+  __forceinline uint &operator[](uint i);
+};
+
+ccl_device_inline uint3 make_uint3(uint x, uint y, uint z);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UINT3_H__ */
diff --git a/intern/cycles/util/types_uint3_impl.h b/intern/cycles/util/types_uint3_impl.h
new file mode 100644
index 00000000000..ba83cffe9a8
--- /dev/null
+++ b/intern/cycles/util/types_uint3_impl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT3_IMPL_H__
+#define __UTIL_TYPES_UINT3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline uint uint3::operator[](uint i) const
+{
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+__forceinline uint &uint3::operator[](uint i)
+{
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+ccl_device_inline uint3 make_uint3(uint x, uint y, uint z)
+{
+  uint3 a = {x, y, z};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UINT3_IMPL_H__ */
diff --git a/intern/cycles/util/types_uint4.h b/intern/cycles/util/types_uint4.h
new file mode 100644
index 00000000000..b135877b890
--- /dev/null
+++ b/intern/cycles/util/types_uint4.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT4_H__
+#define __UTIL_TYPES_UINT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uint4 {
+  uint x, y, z, w;
+
+  __forceinline uint operator[](uint i) const;
+  __forceinline uint &operator[](uint i);
+};
+
+ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UINT4_H__ */
diff --git a/intern/cycles/util/types_uint4_impl.h b/intern/cycles/util/types_uint4_impl.h
new file mode 100644
index 00000000000..b860fbfc49a
--- /dev/null
+++ b/intern/cycles/util/types_uint4_impl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT4_IMPL_H__
+#define __UTIL_TYPES_UINT4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline uint uint4::operator[](uint i) const
+{
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+__forceinline uint &uint4::operator[](uint i)
+{
+  util_assert(i < 3);
+  return *(&x + i);
+}
+
+ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
+{
+  uint4 a = {x, y, z, w};
+  return a;
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_UINT4_IMPL_H__ */
diff --git a/intern/cycles/util/types_ushort4.h b/intern/cycles/util/types_ushort4.h
new file mode 100644
index 00000000000..8d080bcc1b9
--- /dev/null
+++ b/intern/cycles/util/types_ushort4.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_USHORT4_H__
+#define __UTIL_TYPES_USHORT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+
+struct ushort4 {
+  uint16_t x, y, z, w;
+};
+
+#endif
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_USHORT4_H__ */
diff --git a/intern/cycles/util/types_vector3.h b/intern/cycles/util/types_vector3.h
new file mode 100644
index 00000000000..d46a0266855
--- /dev/null
+++ b/intern/cycles/util/types_vector3.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_VECTOR3_H__
+#define __UTIL_TYPES_VECTOR3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+template<typename T> class vector3 {
+ public:
+  T x, y, z;
+
+  __forceinline vector3();
+  __forceinline vector3(const T &a);
+  __forceinline vector3(const T &x, const T &y, const T &z);
+};
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_VECTOR3_H__ */
diff --git a/intern/cycles/util/types_vector3_impl.h b/intern/cycles/util/types_vector3_impl.h
new file mode 100644
index 00000000000..ff6dcd85b12
--- /dev/null
+++ b/intern/cycles/util/types_vector3_impl.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_VECTOR3_IMPL_H__
+#define __UTIL_TYPES_VECTOR3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+template<typename T> ccl_always_inline vector3<T>::vector3()
+{
+}
+
+template<typename T> ccl_always_inline vector3<T>::vector3(const T &a) : x(a), y(a), z(a)
+{
+}
+
+template<typename T>
+ccl_always_inline vector3<T>::vector3(const T &x, const T &y, const T &z) : x(x), y(y), z(z)
+{
+}
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_VECTOR3_IMPL_H__ */
diff --git a/intern/cycles/util/unique_ptr.h b/intern/cycles/util/unique_ptr.h
new file mode 100644
index 00000000000..3181eafd43d
--- /dev/null
+++ b/intern/cycles/util/unique_ptr.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_UNIQUE_PTR_H__
+#define __UTIL_UNIQUE_PTR_H__
+
+#include <memory>
+
+CCL_NAMESPACE_BEGIN
+
+using std::make_unique;
+using std::unique_ptr;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_UNIQUE_PTR_H__ */
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
deleted file mode 100644
index 63abd4e92a3..00000000000
--- a/intern/cycles/util/util_algorithm.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_ALGORITHM_H__
-#define __UTIL_ALGORITHM_H__
-
-#include <algorithm>
-
-CCL_NAMESPACE_BEGIN
-
-using std::max;
-using std::min;
-using std::remove;
-using std::sort;
-using std::stable_sort;
-using std::swap;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_ALGORITHM_H__ */
diff --git a/intern/cycles/util/util_aligned_malloc.cpp b/intern/cycles/util/util_aligned_malloc.cpp
deleted file mode 100644
index 9b729cd4fc4..00000000000
--- a/intern/cycles/util/util_aligned_malloc.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_aligned_malloc.h"
-#include "util/util_guarded_allocator.h"
-
-#include <cassert>
-
-/* Adopted from Libmv. */
-
-#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
-/* Needed for memalign on Linux and _aligned_alloc on Windows. */
-#  ifdef FREE_WINDOWS
-/* Make sure _aligned_malloc is included. */
-#    ifdef __MSVCRT_VERSION__
-#      undef __MSVCRT_VERSION__
-#    endif
-#    define __MSVCRT_VERSION__ 0x0700
-#  endif /* FREE_WINDOWS */
-#  include <malloc.h>
-#else
-/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include
- * stdilb instead.
- */
-#  include <cstdlib>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-void *util_aligned_malloc(size_t size, int alignment)
-{
-#ifdef WITH_BLENDER_GUARDEDALLOC
-  return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc");
-#elif defined(_WIN32)
-  return _aligned_malloc(size, alignment);
-#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__APPLE__)
-  void *result;
-  if (posix_memalign(&result, alignment, size)) {
-    /* Non-zero means allocation error
-     * either no allocation or bad alignment value.
-     */
-    return NULL;
-  }
-  return result;
-#else /* This is for Linux. */
-  return memalign(alignment, size);
-#endif
-}
-
-void util_aligned_free(void *ptr)
-{
-#if defined(WITH_BLENDER_GUARDEDALLOC)
-  if (ptr != NULL) {
-    MEM_freeN(ptr);
-  }
-#elif defined(_WIN32)
-  _aligned_free(ptr);
-#else
-  free(ptr);
-#endif
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_aligned_malloc.h b/intern/cycles/util/util_aligned_malloc.h
deleted file mode 100644
index df7d93c056d..00000000000
--- a/intern/cycles/util/util_aligned_malloc.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_ALIGNED_MALLOC_H__
-#define __UTIL_ALIGNED_MALLOC_H__
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Minimum alignment needed by all CPU native data types (SSE, AVX). */
-#define MIN_ALIGNMENT_CPU_DATA_TYPES 16
-
-/* Allocate block of size bytes at least aligned to a given value. */
-void *util_aligned_malloc(size_t size, int alignment);
-
-/* Free memory allocated by util_aligned_malloc. */
-void util_aligned_free(void *ptr);
-
-/* Aligned new operator. */
-template<typename T, typename... Args> T *util_aligned_new(Args... args)
-{
-  void *mem = util_aligned_malloc(sizeof(T), alignof(T));
-  return new (mem) T(args...);
-}
-
-template<typename T> void util_aligned_delete(T *t)
-{
-  if (t) {
-    t->~T();
-    util_aligned_free(t);
-  }
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_ALIGNED_MALLOC_H__ */
diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h
deleted file mode 100644
index be6f2c2b9f1..00000000000
--- a/intern/cycles/util/util_args.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_ARGS_H__
-#define __UTIL_ARGS_H__
-
-/* Argument Parsing for command line, we use the OpenImageIO
- * library because it has nice functions to do this. */
-
-#include <OpenImageIO/argparse.h>
-
-CCL_NAMESPACE_BEGIN
-
-OIIO_NAMESPACE_USING
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_ARGS_H__ */
diff --git a/intern/cycles/util/util_array.h b/intern/cycles/util/util_array.h
deleted file mode 100644
index 73f7d6cf7f8..00000000000
--- a/intern/cycles/util/util_array.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_ARRAY_H__
-#define __UTIL_ARRAY_H__
-
-#include <cassert>
-#include <cstring>
-
-#include "util/util_aligned_malloc.h"
-#include "util/util_guarded_allocator.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Simplified version of vector, serving multiple purposes:
- * - somewhat faster in that it does not clear memory on resize/alloc,
- *   this was actually showing up in profiles quite significantly. it
- *   also does not run any constructors/destructors
- * - if this is used, we are not tempted to use inefficient operations
- * - aligned allocation for CPU native data types */
-
-template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class array {
- public:
-  array() : data_(NULL), datasize_(0), capacity_(0)
-  {
-  }
-
-  explicit array(size_t newsize)
-  {
-    if (newsize == 0) {
-      data_ = NULL;
-      datasize_ = 0;
-      capacity_ = 0;
-    }
-    else {
-      data_ = mem_allocate(newsize);
-      datasize_ = newsize;
-      capacity_ = datasize_;
-    }
-  }
-
-  array(const array &from)
-  {
-    if (from.datasize_ == 0) {
-      data_ = NULL;
-      datasize_ = 0;
-      capacity_ = 0;
-    }
-    else {
-      data_ = mem_allocate(from.datasize_);
-      if (from.datasize_ > 0) {
-        memcpy(data_, from.data_, from.datasize_ * sizeof(T));
-      }
-      datasize_ = from.datasize_;
-      capacity_ = datasize_;
-    }
-  }
-
-  array &operator=(const array &from)
-  {
-    if (this != &from) {
-      resize(from.size());
-      if (datasize_ > 0) {
-        memcpy((void *)data_, from.data_, datasize_ * sizeof(T));
-      }
-    }
-
-    return *this;
-  }
-
-  array &operator=(const vector<T> &from)
-  {
-    resize(from.size());
-
-    if (from.size() > 0 && datasize_ > 0) {
-      memcpy(data_, &from[0], datasize_ * sizeof(T));
-    }
-
-    return *this;
-  }
-
-  ~array()
-  {
-    mem_free(data_, capacity_);
-  }
-
-  bool operator==(const array<T> &other) const
-  {
-    if (datasize_ != other.datasize_) {
-      return false;
-    }
-    if (datasize_ == 0) {
-      return true;
-    }
-
-    return memcmp(data_, other.data_, datasize_ * sizeof(T)) == 0;
-  }
-
-  bool operator!=(const array<T> &other) const
-  {
-    return !(*this == other);
-  }
-
-  void steal_data(array &from)
-  {
-    if (this != &from) {
-      clear();
-
-      data_ = from.data_;
-      datasize_ = from.datasize_;
-      capacity_ = from.capacity_;
-
-      from.data_ = NULL;
-      from.datasize_ = 0;
-      from.capacity_ = 0;
-    }
-  }
-
-  void set_data(T *ptr_, size_t datasize)
-  {
-    clear();
-    data_ = ptr_;
-    datasize_ = datasize;
-    capacity_ = datasize;
-  }
-
-  T *steal_pointer()
-  {
-    T *ptr = data_;
-    data_ = NULL;
-    clear();
-    return ptr;
-  }
-
-  T *resize(size_t newsize)
-  {
-    if (newsize == 0) {
-      clear();
-    }
-    else if (newsize != datasize_) {
-      if (newsize > capacity_) {
-        T *newdata = mem_allocate(newsize);
-        if (newdata == NULL) {
-          /* Allocation failed, likely out of memory. */
-          clear();
-          return NULL;
-        }
-        else if (data_ != NULL) {
-          memcpy(
-              (void *)newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize) * sizeof(T));
-          mem_free(data_, capacity_);
-        }
-        data_ = newdata;
-        capacity_ = newsize;
-      }
-      datasize_ = newsize;
-    }
-    return data_;
-  }
-
-  T *resize(size_t newsize, const T &value)
-  {
-    size_t oldsize = size();
-    resize(newsize);
-
-    for (size_t i = oldsize; i < size(); i++) {
-      data_[i] = value;
-    }
-
-    return data_;
-  }
-
-  void clear()
-  {
-    if (data_ != NULL) {
-      mem_free(data_, capacity_);
-      data_ = NULL;
-    }
-    datasize_ = 0;
-    capacity_ = 0;
-  }
-
-  size_t empty() const
-  {
-    return datasize_ == 0;
-  }
-
-  size_t size() const
-  {
-    return datasize_;
-  }
-
-  T *data()
-  {
-    return data_;
-  }
-
-  const T *data() const
-  {
-    return data_;
-  }
-
-  T &operator[](size_t i) const
-  {
-    assert(i < datasize_);
-    return data_[i];
-  }
-
-  T *begin()
-  {
-    return data_;
-  }
-
-  const T *begin() const
-  {
-    return data_;
-  }
-
-  T *end()
-  {
-    return data_ + datasize_;
-  }
-
-  const T *end() const
-  {
-    return data_ + datasize_;
-  }
-
-  void reserve(size_t newcapacity)
-  {
-    if (newcapacity > capacity_) {
-      T *newdata = mem_allocate(newcapacity);
-      if (data_ != NULL) {
-        memcpy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity) * sizeof(T));
-        mem_free(data_, capacity_);
-      }
-      data_ = newdata;
-      capacity_ = newcapacity;
-    }
-  }
-
-  size_t capacity() const
-  {
-    return capacity_;
-  }
-
-  // do not use this method unless you are sure the code is not performance critical
-  void push_back_slow(const T &t)
-  {
-    if (capacity_ == datasize_) {
-      reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2));
-    }
-
-    data_[datasize_++] = t;
-  }
-
-  void push_back_reserved(const T &t)
-  {
-    assert(datasize_ < capacity_);
-    push_back_slow(t);
-  }
-
-  void append(const array<T> &from)
-  {
-    if (from.size()) {
-      size_t old_size = size();
-      resize(old_size + from.size());
-      memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
-    }
-  }
-
- protected:
-  inline T *mem_allocate(size_t N)
-  {
-    if (N == 0) {
-      return NULL;
-    }
-    T *mem = (T *)util_aligned_malloc(sizeof(T) * N, alignment);
-    if (mem != NULL) {
-      util_guarded_mem_alloc(sizeof(T) * N);
-    }
-    else {
-      throw std::bad_alloc();
-    }
-    return mem;
-  }
-
-  inline void mem_free(T *mem, size_t N)
-  {
-    if (mem != NULL) {
-      util_guarded_mem_free(sizeof(T) * N);
-      util_aligned_free(mem);
-    }
-  }
-
-  T *data_;
-  size_t datasize_;
-  size_t capacity_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_ARRAY_H__ */
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
deleted file mode 100644
index faba411c769..00000000000
--- a/intern/cycles/util/util_atomic.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright 2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_ATOMIC_H__
-#define __UTIL_ATOMIC_H__
-
-#ifndef __KERNEL_GPU__
-
-/* Using atomic ops header from Blender. */
-#  include "atomic_ops.h"
-
-#  define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
-#  define atomic_compare_and_swap_float(p, old_val, new_val) \
-    atomic_cas_float((p), (old_val), (new_val))
-
-#  define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
-#  define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
-
-#  define CCL_LOCAL_MEM_FENCE 0
-#  define ccl_barrier(flags) ((void)0)
-
-#else /* __KERNEL_GPU__ */
-
-#  if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
-
-#    define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
-
-#    define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x))
-#    define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
-#    define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
-#    define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
-#    define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
-
-ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
-                                                      const float old_val,
-                                                      const float new_val)
-{
-  union {
-    unsigned int int_value;
-    float float_value;
-  } new_value, prev_value, result;
-  prev_value.float_value = old_val;
-  new_value.float_value = new_val;
-  result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value);
-  return result.float_value;
-}
-
-#    define CCL_LOCAL_MEM_FENCE
-#    define ccl_barrier(flags) __syncthreads()
-
-#  endif /* __KERNEL_CUDA__ */
-
-#endif /* __KERNEL_GPU__ */
-
-#endif /* __UTIL_ATOMIC_H__ */
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
deleted file mode 100644
index 15215d04ca3..00000000000
--- a/intern/cycles/util/util_avxb.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0(the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_AVXB_H__
-#define __UTIL_AVXB_H__
-
-CCL_NAMESPACE_BEGIN
-
-struct avxf;
-
-/*! 4-wide SSE bool type. */
-struct avxb {
-  typedef avxb Mask;   // mask type
-  typedef avxf Float;  // float type
-
-  enum { size = 8 };  // number of SIMD elements
-  union {
-    __m256 m256;
-    int32_t v[8];
-  };  // data
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constructors, Assignment & Cast Operators
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline avxb()
-  {
-  }
-  __forceinline avxb(const avxb &other)
-  {
-    m256 = other.m256;
-  }
-  __forceinline avxb &operator=(const avxb &other)
-  {
-    m256 = other.m256;
-    return *this;
-  }
-
-  __forceinline avxb(const __m256 input) : m256(input)
-  {
-  }
-  __forceinline avxb(const __m128 &a, const __m128 &b)
-      : m256(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1))
-  {
-  }
-  __forceinline operator const __m256 &(void) const
-  {
-    return m256;
-  }
-  __forceinline operator const __m256i(void) const
-  {
-    return _mm256_castps_si256(m256);
-  }
-  __forceinline operator const __m256d(void) const
-  {
-    return _mm256_castps_pd(m256);
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constants
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps())
-  {
-  }
-  __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1)))
-  {
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Array Access
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline bool operator[](const size_t i) const
-  {
-    assert(i < 8);
-    return (_mm256_movemask_ps(m256) >> i) & 1;
-  }
-  __forceinline int32_t &operator[](const size_t i)
-  {
-    assert(i < 8);
-    return v[i];
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator!(const avxb &a)
-{
-  return _mm256_xor_ps(a, avxb(True));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator&(const avxb &a, const avxb &b)
-{
-  return _mm256_and_ps(a, b);
-}
-__forceinline const avxb operator|(const avxb &a, const avxb &b)
-{
-  return _mm256_or_ps(a, b);
-}
-__forceinline const avxb operator^(const avxb &a, const avxb &b)
-{
-  return _mm256_xor_ps(a, b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator&=(avxb &a, const avxb &b)
-{
-  return a = a & b;
-}
-__forceinline const avxb operator|=(avxb &a, const avxb &b)
-{
-  return a = a | b;
-}
-__forceinline const avxb operator^=(avxb &a, const avxb &b)
-{
-  return a = a ^ b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb operator!=(const avxb &a, const avxb &b)
-{
-  return _mm256_xor_ps(a, b);
-}
-__forceinline const avxb operator==(const avxb &a, const avxb &b)
-{
-#ifdef __KERNEL_AVX2__
-  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
-#else
-  __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
-  __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
-  __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
-  __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
-  __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
-  __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
-  __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
-  return _mm256_castsi256_ps(result);
-#endif
-}
-
-__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
-{
-#if defined(__KERNEL_SSE41__)
-  return _mm256_blendv_ps(f, t, m);
-#else
-  return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
-{
-  return _mm256_unpacklo_ps(a, b);
-}
-__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
-{
-  return _mm256_unpackhi_ps(a, b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reduction Operations
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_SSE41__)
-__forceinline uint32_t popcnt(const avxb &a)
-{
-  return _mm_popcnt_u32(_mm256_movemask_ps(a));
-}
-#else
-__forceinline uint32_t popcnt(const avxb &a)
-{
-  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) +
-         bool(a[7]);
-}
-#endif
-
-__forceinline bool reduce_and(const avxb &a)
-{
-  return _mm256_movemask_ps(a) == 0xf;
-}
-__forceinline bool reduce_or(const avxb &a)
-{
-  return _mm256_movemask_ps(a) != 0x0;
-}
-__forceinline bool all(const avxb &b)
-{
-  return _mm256_movemask_ps(b) == 0xf;
-}
-__forceinline bool any(const avxb &b)
-{
-  return _mm256_movemask_ps(b) != 0x0;
-}
-__forceinline bool none(const avxb &b)
-{
-  return _mm256_movemask_ps(b) == 0x0;
-}
-
-__forceinline uint32_t movemask(const avxb &a)
-{
-  return _mm256_movemask_ps(a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_avxb(const char *label, const avxb &a)
-{
-  printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
-}
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
deleted file mode 100644
index 1fb3ded422f..00000000000
--- a/intern/cycles/util/util_avxf.h
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright 2016 Intel Corporation
- *
- * Licensed under the Apache License, Version 2.0(the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_AVXF_H__
-#define __UTIL_AVXF_H__
-
-CCL_NAMESPACE_BEGIN
-
-struct avxb;
-
-struct avxf {
-  typedef avxf Float;
-
-  enum { size = 8 }; /* Number of SIMD elements. */
-
-  union {
-    __m256 m256;
-    float f[8];
-    int i[8];
-  };
-
-  __forceinline avxf()
-  {
-  }
-  __forceinline avxf(const avxf &other)
-  {
-    m256 = other.m256;
-  }
-  __forceinline avxf &operator=(const avxf &other)
-  {
-    m256 = other.m256;
-    return *this;
-  }
-
-  __forceinline avxf(const __m256 a) : m256(a)
-  {
-  }
-  __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps(a))
-  {
-  }
-
-  __forceinline operator const __m256 &() const
-  {
-    return m256;
-  }
-  __forceinline operator __m256 &()
-  {
-    return m256;
-  }
-
-  __forceinline avxf(float a) : m256(_mm256_set1_ps(a))
-  {
-  }
-
-  __forceinline avxf(float high32x4, float low32x4)
-      : m256(_mm256_set_ps(
-            high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4))
-  {
-  }
-
-  __forceinline avxf(float a3, float a2, float a1, float a0)
-      : m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0))
-  {
-  }
-
-  __forceinline avxf(
-      float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0)
-      : m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0))
-  {
-  }
-
-  __forceinline avxf(float3 a) : m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x))
-  {
-  }
-
-  __forceinline avxf(int a3, int a2, int a1, int a0)
-  {
-    const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0);
-    m256 = _mm256_castsi256_ps(foo);
-  }
-
-  __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0)
-  {
-    const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0);
-    m256 = _mm256_castsi256_ps(foo);
-  }
-
-  __forceinline avxf(__m128 a, __m128 b)
-  {
-    const __m256 foo = _mm256_castps128_ps256(a);
-    m256 = _mm256_insertf128_ps(foo, b, 1);
-  }
-
-  __forceinline const float &operator[](const size_t i) const
-  {
-    assert(i < 8);
-    return f[i];
-  }
-  __forceinline float &operator[](const size_t i)
-  {
-    assert(i < 8);
-    return f[i];
-  }
-};
-
-__forceinline avxf cross(const avxf &a, const avxf &b)
-{
-  avxf r(0.0,
-         a[4] * b[5] - a[5] * b[4],
-         a[6] * b[4] - a[4] * b[6],
-         a[5] * b[6] - a[6] * b[5],
-         0.0,
-         a[0] * b[1] - a[1] * b[0],
-         a[2] * b[0] - a[0] * b[2],
-         a[1] * b[2] - a[2] * b[1]);
-  return r;
-}
-
-__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
-{
-  const avxf t = _mm256_mul_ps(a.m256, b.m256);
-  den = ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
-  den2 = ((float *)&t)[4] + ((float *)&t)[5] + ((float *)&t)[6];
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxf cast(const __m256i &a)
-{
-  return _mm256_castsi256_ps(a);
-}
-
-__forceinline const avxf mm256_sqrt(const avxf &a)
-{
-  return _mm256_sqrt_ps(a.m256);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxf operator+(const avxf &a, const avxf &b)
-{
-  return _mm256_add_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator+(const avxf &a, const float &b)
-{
-  return a + avxf(b);
-}
-__forceinline const avxf operator+(const float &a, const avxf &b)
-{
-  return avxf(a) + b;
-}
-
-__forceinline const avxf operator-(const avxf &a, const avxf &b)
-{
-  return _mm256_sub_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator-(const avxf &a, const float &b)
-{
-  return a - avxf(b);
-}
-__forceinline const avxf operator-(const float &a, const avxf &b)
-{
-  return avxf(a) - b;
-}
-
-__forceinline const avxf operator*(const avxf &a, const avxf &b)
-{
-  return _mm256_mul_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator*(const avxf &a, const float &b)
-{
-  return a * avxf(b);
-}
-__forceinline const avxf operator*(const float &a, const avxf &b)
-{
-  return avxf(a) * b;
-}
-
-__forceinline const avxf operator/(const avxf &a, const avxf &b)
-{
-  return _mm256_div_ps(a.m256, b.m256);
-}
-__forceinline const avxf operator/(const avxf &a, const float &b)
-{
-  return a / avxf(b);
-}
-__forceinline const avxf operator/(const float &a, const avxf &b)
-{
-  return avxf(a) / b;
-}
-
-__forceinline const avxf operator|(const avxf &a, const avxf &b)
-{
-  return _mm256_or_ps(a.m256, b.m256);
-}
-
-__forceinline const avxf operator^(const avxf &a, const avxf &b)
-{
-  return _mm256_xor_ps(a.m256, b.m256);
-}
-
-__forceinline const avxf operator&(const avxf &a, const avxf &b)
-{
-  return _mm256_and_ps(a.m256, b.m256);
-}
-
-__forceinline const avxf max(const avxf &a, const avxf &b)
-{
-  return _mm256_max_ps(a.m256, b.m256);
-}
-__forceinline const avxf min(const avxf &a, const avxf &b)
-{
-  return _mm256_min_ps(a.m256, b.m256);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxf shuffle(const avxf &a, const __m256i &shuf)
-{
-  return _mm256_permutevar_ps(a, shuf);
-}
-
-template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
-__forceinline const avxf shuffle(const avxf &a)
-{
-  return _mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxf shuffle(const avxf &a, const avxf &b)
-{
-  return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-}
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxf shuffle(const avxf &a)
-{
-  return shuffle<i0, i1, i2, i3>(a, a);
-}
-template<size_t i0> __forceinline const avxf shuffle(const avxf &a, const avxf &b)
-{
-  return shuffle<i0, i0, i0, i0>(a, b);
-}
-template<size_t i0> __forceinline const avxf shuffle(const avxf &a)
-{
-  return shuffle<i0>(a, a);
-}
-
-template<size_t i> __forceinline float extract(const avxf &a)
-{
-  __m256 b = shuffle<i, i, i, i>(a).m256;
-  return _mm256_cvtss_f32(b);
-}
-template<> __forceinline float extract<0>(const avxf &a)
-{
-  return _mm256_cvtss_f32(a.m256);
-}
-
-__forceinline ssef low(const avxf &a)
-{
-  return _mm256_extractf128_ps(a.m256, 0);
-}
-__forceinline ssef high(const avxf &a)
-{
-  return _mm256_extractf128_ps(a.m256, 1);
-}
-
-template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
-__forceinline const avxf permute(const avxf &a)
-{
-#ifdef __KERNEL_AVX2__
-  return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
-#else
-  float temp[8];
-  _mm256_storeu_ps((float *)&temp, a);
-  return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
-#endif
-}
-
-template<int S0, int S1, int S2, int S3, int S4, int S5, int S6, int S7>
-ccl_device_inline const avxf set_sign_bit(const avxf &a)
-{
-  return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31, S2 << 31, S1 << 31, S0 << 31);
-}
-
-template<size_t S0, size_t S1, size_t S2, size_t S3, size_t S4, size_t S5, size_t S6, size_t S7>
-ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
-{
-  return _mm256_blend_ps(
-      a, b, S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7);
-}
-
-template<size_t S0, size_t S1, size_t S2, size_t S3>
-ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
-{
-  return blend<S0, S1, S2, S3, S0, S1, S2, S3>(a, b);
-}
-
-//#if defined(__KERNEL_SSE41__)
-__forceinline avxf maxi(const avxf &a, const avxf &b)
-{
-  const avxf ci = _mm256_max_ps(a, b);
-  return ci;
-}
-
-__forceinline avxf mini(const avxf &a, const avxf &b)
-{
-  const avxf ci = _mm256_min_ps(a, b);
-  return ci;
-}
-//#endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Ternary Operators
-////////////////////////////////////////////////////////////////////////////////
-__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
-{
-#ifdef __KERNEL_AVX2__
-  return _mm256_fmadd_ps(a, b, c);
-#else
-  return c + (a * b);
-#endif
-}
-
-__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
-{
-#ifdef __KERNEL_AVX2__
-  return _mm256_fnmadd_ps(a, b, c);
-#else
-  return c - (a * b);
-#endif
-}
-__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
-{
-#ifdef __KERNEL_AVX2__
-  return _mm256_fmsub_ps(a, b, c);
-#else
-  return (a * b) - c;
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-__forceinline const avxb operator<=(const avxf &a, const avxf &b)
-{
-  return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
-}
-
-__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f)
-{
-  return _mm256_blendv_ps(f, t, m);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Common Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t)
-{
-  return madd(t, b, (avxf(1.0f) - t) * a);
-}
-
-#ifndef _mm256_set_m128
-#  define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
-    _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
-#endif
-
-#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \
-  _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_avxi.h b/intern/cycles/util/util_avxi.h
deleted file mode 100644
index 0ae4bf271c8..00000000000
--- a/intern/cycles/util/util_avxi.h
+++ /dev/null
@@ -1,745 +0,0 @@
-/*
- * Copyright 2009-2013 Intel Corporation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_AVXI_H__
-#define __UTIL_AVXI_H__
-
-CCL_NAMESPACE_BEGIN
-
-struct avxb;
-
-struct avxi {
-  typedef avxb Mask;  // mask type for us
-  enum { size = 8 };  // number of SIMD elements
-  union {             // data
-    __m256i m256;
-#if !defined(__KERNEL_AVX2__)
-    struct {
-      __m128i l, h;
-    };
-#endif
-    int32_t v[8];
-  };
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constructors, Assignment & Cast Operators
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline avxi()
-  {
-  }
-  __forceinline avxi(const avxi &a)
-  {
-    m256 = a.m256;
-  }
-  __forceinline avxi &operator=(const avxi &a)
-  {
-    m256 = a.m256;
-    return *this;
-  }
-
-  __forceinline avxi(const __m256i a) : m256(a)
-  {
-  }
-  __forceinline operator const __m256i &(void) const
-  {
-    return m256;
-  }
-  __forceinline operator __m256i &(void)
-  {
-    return m256;
-  }
-
-  __forceinline explicit avxi(const ssei &a)
-      : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), a, 1))
-  {
-  }
-  __forceinline avxi(const ssei &a, const ssei &b)
-      : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
-  {
-  }
-#if defined(__KERNEL_AVX2__)
-  __forceinline avxi(const __m128i &a, const __m128i &b)
-      : m256(_mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1))
-  {
-  }
-#else
-  __forceinline avxi(const __m128i &a, const __m128i &b) : l(a), h(b)
-  {
-  }
-#endif
-  __forceinline explicit avxi(const int32_t *const a)
-      : m256(_mm256_castps_si256(_mm256_loadu_ps((const float *)a)))
-  {
-  }
-  __forceinline avxi(int32_t a) : m256(_mm256_set1_epi32(a))
-  {
-  }
-  __forceinline avxi(int32_t a, int32_t b) : m256(_mm256_set_epi32(b, a, b, a, b, a, b, a))
-  {
-  }
-  __forceinline avxi(int32_t a, int32_t b, int32_t c, int32_t d)
-      : m256(_mm256_set_epi32(d, c, b, a, d, c, b, a))
-  {
-  }
-  __forceinline avxi(
-      int32_t a, int32_t b, int32_t c, int32_t d, int32_t e, int32_t f, int32_t g, int32_t h)
-      : m256(_mm256_set_epi32(h, g, f, e, d, c, b, a))
-  {
-  }
-
-  __forceinline explicit avxi(const __m256 a) : m256(_mm256_cvtps_epi32(a))
-  {
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constants
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline avxi(ZeroTy) : m256(_mm256_setzero_si256())
-  {
-  }
-#if defined(__KERNEL_AVX2__)
-  __forceinline avxi(OneTy) : m256(_mm256_set1_epi32(1))
-  {
-  }
-  __forceinline avxi(PosInfTy) : m256(_mm256_set1_epi32(pos_inf))
-  {
-  }
-  __forceinline avxi(NegInfTy) : m256(_mm256_set1_epi32(neg_inf))
-  {
-  }
-#else
-  __forceinline avxi(OneTy) : m256(_mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1))
-  {
-  }
-  __forceinline avxi(PosInfTy)
-      : m256(_mm256_set_epi32(
-            pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf, pos_inf))
-  {
-  }
-  __forceinline avxi(NegInfTy)
-      : m256(_mm256_set_epi32(
-            neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf, neg_inf))
-  {
-  }
-#endif
-  __forceinline avxi(StepTy) : m256(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0))
-  {
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Array Access
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline const int32_t &operator[](const size_t i) const
-  {
-    assert(i < 8);
-    return v[i];
-  }
-  __forceinline int32_t &operator[](const size_t i)
-  {
-    assert(i < 8);
-    return v[i];
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxi cast(const __m256 &a)
-{
-  return _mm256_castps_si256(a);
-}
-__forceinline const avxi operator+(const avxi &a)
-{
-  return a;
-}
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator-(const avxi &a)
-{
-  return _mm256_sub_epi32(_mm256_setzero_si256(), a.m256);
-}
-__forceinline const avxi abs(const avxi &a)
-{
-  return _mm256_abs_epi32(a.m256);
-}
-#else
-__forceinline const avxi operator-(const avxi &a)
-{
-  return avxi(_mm_sub_epi32(_mm_setzero_si128(), a.l), _mm_sub_epi32(_mm_setzero_si128(), a.h));
-}
-__forceinline const avxi abs(const avxi &a)
-{
-  return avxi(_mm_abs_epi32(a.l), _mm_abs_epi32(a.h));
-}
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator+(const avxi &a, const avxi &b)
-{
-  return _mm256_add_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator+(const avxi &a, const avxi &b)
-{
-  return avxi(_mm_add_epi32(a.l, b.l), _mm_add_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi operator+(const avxi &a, const int32_t b)
-{
-  return a + avxi(b);
-}
-__forceinline const avxi operator+(const int32_t a, const avxi &b)
-{
-  return avxi(a) + b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator-(const avxi &a, const avxi &b)
-{
-  return _mm256_sub_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator-(const avxi &a, const avxi &b)
-{
-  return avxi(_mm_sub_epi32(a.l, b.l), _mm_sub_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi operator-(const avxi &a, const int32_t b)
-{
-  return a - avxi(b);
-}
-__forceinline const avxi operator-(const int32_t a, const avxi &b)
-{
-  return avxi(a) - b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator*(const avxi &a, const avxi &b)
-{
-  return _mm256_mullo_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator*(const avxi &a, const avxi &b)
-{
-  return avxi(_mm_mullo_epi32(a.l, b.l), _mm_mullo_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi operator*(const avxi &a, const int32_t b)
-{
-  return a * avxi(b);
-}
-__forceinline const avxi operator*(const int32_t a, const avxi &b)
-{
-  return avxi(a) * b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator&(const avxi &a, const avxi &b)
-{
-  return _mm256_and_si256(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator&(const avxi &a, const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-__forceinline const avxi operator&(const avxi &a, const int32_t b)
-{
-  return a & avxi(b);
-}
-__forceinline const avxi operator&(const int32_t a, const avxi &b)
-{
-  return avxi(a) & b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator|(const avxi &a, const avxi &b)
-{
-  return _mm256_or_si256(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator|(const avxi &a, const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-__forceinline const avxi operator|(const avxi &a, const int32_t b)
-{
-  return a | avxi(b);
-}
-__forceinline const avxi operator|(const int32_t a, const avxi &b)
-{
-  return avxi(a) | b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator^(const avxi &a, const avxi &b)
-{
-  return _mm256_xor_si256(a.m256, b.m256);
-}
-#else
-__forceinline const avxi operator^(const avxi &a, const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-__forceinline const avxi operator^(const avxi &a, const int32_t b)
-{
-  return a ^ avxi(b);
-}
-__forceinline const avxi operator^(const int32_t a, const avxi &b)
-{
-  return avxi(a) ^ b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi operator<<(const avxi &a, const int32_t n)
-{
-  return _mm256_slli_epi32(a.m256, n);
-}
-__forceinline const avxi operator>>(const avxi &a, const int32_t n)
-{
-  return _mm256_srai_epi32(a.m256, n);
-}
-
-__forceinline const avxi sra(const avxi &a, const int32_t b)
-{
-  return _mm256_srai_epi32(a.m256, b);
-}
-__forceinline const avxi srl(const avxi &a, const int32_t b)
-{
-  return _mm256_srli_epi32(a.m256, b);
-}
-#else
-__forceinline const avxi operator<<(const avxi &a, const int32_t n)
-{
-  return avxi(_mm_slli_epi32(a.l, n), _mm_slli_epi32(a.h, n));
-}
-__forceinline const avxi operator>>(const avxi &a, const int32_t n)
-{
-  return avxi(_mm_srai_epi32(a.l, n), _mm_srai_epi32(a.h, n));
-}
-
-__forceinline const avxi sra(const avxi &a, const int32_t b)
-{
-  return avxi(_mm_srai_epi32(a.l, b), _mm_srai_epi32(a.h, b));
-}
-__forceinline const avxi srl(const avxi &a, const int32_t b)
-{
-  return avxi(_mm_srli_epi32(a.l, b), _mm_srli_epi32(a.h, b));
-}
-#endif
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi min(const avxi &a, const avxi &b)
-{
-  return _mm256_min_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi min(const avxi &a, const avxi &b)
-{
-  return avxi(_mm_min_epi32(a.l, b.l), _mm_min_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi min(const avxi &a, const int32_t b)
-{
-  return min(a, avxi(b));
-}
-__forceinline const avxi min(const int32_t a, const avxi &b)
-{
-  return min(avxi(a), b);
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxi max(const avxi &a, const avxi &b)
-{
-  return _mm256_max_epi32(a.m256, b.m256);
-}
-#else
-__forceinline const avxi max(const avxi &a, const avxi &b)
-{
-  return avxi(_mm_max_epi32(a.l, b.l), _mm_max_epi32(a.h, b.h));
-}
-#endif
-__forceinline const avxi max(const avxi &a, const int32_t b)
-{
-  return max(a, avxi(b));
-}
-__forceinline const avxi max(const int32_t a, const avxi &b)
-{
-  return max(avxi(a), b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline avxi &operator+=(avxi &a, const avxi &b)
-{
-  return a = a + b;
-}
-__forceinline avxi &operator+=(avxi &a, const int32_t b)
-{
-  return a = a + b;
-}
-
-__forceinline avxi &operator-=(avxi &a, const avxi &b)
-{
-  return a = a - b;
-}
-__forceinline avxi &operator-=(avxi &a, const int32_t b)
-{
-  return a = a - b;
-}
-
-__forceinline avxi &operator*=(avxi &a, const avxi &b)
-{
-  return a = a * b;
-}
-__forceinline avxi &operator*=(avxi &a, const int32_t b)
-{
-  return a = a * b;
-}
-
-__forceinline avxi &operator&=(avxi &a, const avxi &b)
-{
-  return a = a & b;
-}
-__forceinline avxi &operator&=(avxi &a, const int32_t b)
-{
-  return a = a & b;
-}
-
-__forceinline avxi &operator|=(avxi &a, const avxi &b)
-{
-  return a = a | b;
-}
-__forceinline avxi &operator|=(avxi &a, const int32_t b)
-{
-  return a = a | b;
-}
-
-__forceinline avxi &operator^=(avxi &a, const avxi &b)
-{
-  return a = a ^ b;
-}
-__forceinline avxi &operator^=(avxi &a, const int32_t b)
-{
-  return a = a ^ b;
-}
-
-__forceinline avxi &operator<<=(avxi &a, const int32_t b)
-{
-  return a = a << b;
-}
-__forceinline avxi &operator>>=(avxi &a, const int32_t b)
-{
-  return a = a >> b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxb operator==(const avxi &a, const avxi &b)
-{
-  return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a.m256, b.m256));
-}
-#else
-__forceinline const avxb operator==(const avxi &a, const avxi &b)
-{
-  return avxb(_mm_castsi128_ps(_mm_cmpeq_epi32(a.l, b.l)),
-              _mm_castsi128_ps(_mm_cmpeq_epi32(a.h, b.h)));
-}
-#endif
-__forceinline const avxb operator==(const avxi &a, const int32_t b)
-{
-  return a == avxi(b);
-}
-__forceinline const avxb operator==(const int32_t a, const avxi &b)
-{
-  return avxi(a) == b;
-}
-
-__forceinline const avxb operator!=(const avxi &a, const avxi &b)
-{
-  return !(a == b);
-}
-__forceinline const avxb operator!=(const avxi &a, const int32_t b)
-{
-  return a != avxi(b);
-}
-__forceinline const avxb operator!=(const int32_t a, const avxi &b)
-{
-  return avxi(a) != b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxb operator<(const avxi &a, const avxi &b)
-{
-  return _mm256_castsi256_ps(_mm256_cmpgt_epi32(b.m256, a.m256));
-}
-#else
-__forceinline const avxb operator<(const avxi &a, const avxi &b)
-{
-  return avxb(_mm_castsi128_ps(_mm_cmplt_epi32(a.l, b.l)),
-              _mm_castsi128_ps(_mm_cmplt_epi32(a.h, b.h)));
-}
-#endif
-__forceinline const avxb operator<(const avxi &a, const int32_t b)
-{
-  return a < avxi(b);
-}
-__forceinline const avxb operator<(const int32_t a, const avxi &b)
-{
-  return avxi(a) < b;
-}
-
-__forceinline const avxb operator>=(const avxi &a, const avxi &b)
-{
-  return !(a < b);
-}
-__forceinline const avxb operator>=(const avxi &a, const int32_t b)
-{
-  return a >= avxi(b);
-}
-__forceinline const avxb operator>=(const int32_t a, const avxi &b)
-{
-  return avxi(a) >= b;
-}
-
-#if defined(__KERNEL_AVX2__)
-__forceinline const avxb operator>(const avxi &a, const avxi &b)
-{
-  return _mm256_castsi256_ps(_mm256_cmpgt_epi32(a.m256, b.m256));
-}
-#else
-__forceinline const avxb operator>(const avxi &a, const avxi &b)
-{
-  return avxb(_mm_castsi128_ps(_mm_cmpgt_epi32(a.l, b.l)),
-              _mm_castsi128_ps(_mm_cmpgt_epi32(a.h, b.h)));
-}
-#endif
-__forceinline const avxb operator>(const avxi &a, const int32_t b)
-{
-  return a > avxi(b);
-}
-__forceinline const avxb operator>(const int32_t a, const avxi &b)
-{
-  return avxi(a) > b;
-}
-
-__forceinline const avxb operator<=(const avxi &a, const avxi &b)
-{
-  return !(a > b);
-}
-__forceinline const avxb operator<=(const avxi &a, const int32_t b)
-{
-  return a <= avxi(b);
-}
-__forceinline const avxb operator<=(const int32_t a, const avxi &b)
-{
-  return avxi(a) <= b;
-}
-
-__forceinline const avxi select(const avxb &m, const avxi &t, const avxi &f)
-{
-  return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-#if defined(__KERNEL_AVX2__)
-__forceinline avxi unpacklo(const avxi &a, const avxi &b)
-{
-  return _mm256_unpacklo_epi32(a.m256, b.m256);
-}
-__forceinline avxi unpackhi(const avxi &a, const avxi &b)
-{
-  return _mm256_unpackhi_epi32(a.m256, b.m256);
-}
-#else
-__forceinline avxi unpacklo(const avxi &a, const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-__forceinline avxi unpackhi(const avxi &a, const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b)));
-}
-#endif
-
-template<size_t i> __forceinline const avxi shuffle(const avxi &a)
-{
-  return _mm256_castps_si256(_mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i, i, i, i)));
-}
-
-template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a)
-{
-  return _mm256_permute2f128_si256(a, a, (i1 << 4) | (i0 << 0));
-}
-
-template<size_t i0, size_t i1> __forceinline const avxi shuffle(const avxi &a, const avxi &b)
-{
-  return _mm256_permute2f128_si256(a, b, (i1 << 4) | (i0 << 0));
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxi shuffle(const avxi &a)
-{
-  return _mm256_castps_si256(
-      _mm256_permute_ps(_mm256_castsi256_ps(a), _MM_SHUFFLE(i3, i2, i1, i0)));
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const avxi shuffle(const avxi &a, const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_shuffle_ps(
-      _mm256_castsi256_ps(a), _mm256_castsi256_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
-}
-
-template<> __forceinline const avxi shuffle<0, 0, 2, 2>(const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_moveldup_ps(_mm256_castsi256_ps(b)));
-}
-template<> __forceinline const avxi shuffle<1, 1, 3, 3>(const avxi &b)
-{
-  return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(b)));
-}
-template<> __forceinline const avxi shuffle<0, 1, 0, 1>(const avxi &b)
-{
-  return _mm256_castps_si256(
-      _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(b)))));
-}
-
-__forceinline const avxi broadcast(const int *ptr)
-{
-  return _mm256_castps_si256(_mm256_broadcast_ss((const float *)ptr));
-}
-template<size_t i> __forceinline const avxi insert(const avxi &a, const ssei &b)
-{
-  return _mm256_insertf128_si256(a, b, i);
-}
-template<size_t i> __forceinline const ssei extract(const avxi &a)
-{
-  return _mm256_extractf128_si256(a, i);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reductions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const avxi vreduce_min2(const avxi &v)
-{
-  return min(v, shuffle<1, 0, 3, 2>(v));
-}
-__forceinline const avxi vreduce_min4(const avxi &v)
-{
-  avxi v1 = vreduce_min2(v);
-  return min(v1, shuffle<2, 3, 0, 1>(v1));
-}
-__forceinline const avxi vreduce_min(const avxi &v)
-{
-  avxi v1 = vreduce_min4(v);
-  return min(v1, shuffle<1, 0>(v1));
-}
-
-__forceinline const avxi vreduce_max2(const avxi &v)
-{
-  return max(v, shuffle<1, 0, 3, 2>(v));
-}
-__forceinline const avxi vreduce_max4(const avxi &v)
-{
-  avxi v1 = vreduce_max2(v);
-  return max(v1, shuffle<2, 3, 0, 1>(v1));
-}
-__forceinline const avxi vreduce_max(const avxi &v)
-{
-  avxi v1 = vreduce_max4(v);
-  return max(v1, shuffle<1, 0>(v1));
-}
-
-__forceinline const avxi vreduce_add2(const avxi &v)
-{
-  return v + shuffle<1, 0, 3, 2>(v);
-}
-__forceinline const avxi vreduce_add4(const avxi &v)
-{
-  avxi v1 = vreduce_add2(v);
-  return v1 + shuffle<2, 3, 0, 1>(v1);
-}
-__forceinline const avxi vreduce_add(const avxi &v)
-{
-  avxi v1 = vreduce_add4(v);
-  return v1 + shuffle<1, 0>(v1);
-}
-
-__forceinline int reduce_min(const avxi &v)
-{
-  return extract<0>(extract<0>(vreduce_min(v)));
-}
-__forceinline int reduce_max(const avxi &v)
-{
-  return extract<0>(extract<0>(vreduce_max(v)));
-}
-__forceinline int reduce_add(const avxi &v)
-{
-  return extract<0>(extract<0>(vreduce_add(v)));
-}
-
-__forceinline uint32_t select_min(const avxi &v)
-{
-  return __bsf(movemask(v == vreduce_min(v)));
-}
-__forceinline uint32_t select_max(const avxi &v)
-{
-  return __bsf(movemask(v == vreduce_max(v)));
-}
-
-__forceinline uint32_t select_min(const avxb &valid, const avxi &v)
-{
-  const avxi a = select(valid, v, avxi(pos_inf));
-  return __bsf(movemask(valid & (a == vreduce_min(a))));
-}
-__forceinline uint32_t select_max(const avxb &valid, const avxi &v)
-{
-  const avxi a = select(valid, v, avxi(neg_inf));
-  return __bsf(movemask(valid & (a == vreduce_max(a))));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Output Operators
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_avxi(const char *label, const avxi &a)
-{
-  printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
-}
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
deleted file mode 100644
index 7fab7bd5a15..00000000000
--- a/intern/cycles/util/util_boundbox.h
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_BOUNDBOX_H__
-#define __UTIL_BOUNDBOX_H__
-
-#include <float.h>
-#include <math.h>
-
-#include "util/util_math.h"
-#include "util/util_string.h"
-#include "util/util_transform.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* 3D BoundBox */
-
-class BoundBox {
- public:
-  float3 min, max;
-
-  __forceinline BoundBox()
-  {
-  }
-
-  __forceinline BoundBox(const float3 &pt) : min(pt), max(pt)
-  {
-  }
-
-  __forceinline BoundBox(const float3 &min_, const float3 &max_) : min(min_), max(max_)
-  {
-  }
-
-  enum empty_t { empty = 0 };
-
-  __forceinline BoundBox(empty_t)
-      : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX))
-  {
-  }
-
-  __forceinline void grow(const float3 &pt)
-  {
-    /* the order of arguments to min is such that if pt is nan, it will not
-     * influence the resulting bounding box */
-    min = ccl::min(pt, min);
-    max = ccl::max(pt, max);
-  }
-
-  __forceinline void grow(const float3 &pt, float border)
-  {
-    float3 shift = make_float3(border, border, border);
-    min = ccl::min(pt - shift, min);
-    max = ccl::max(pt + shift, max);
-  }
-
-  __forceinline void grow(const BoundBox &bbox)
-  {
-    grow(bbox.min);
-    grow(bbox.max);
-  }
-
-  __forceinline void grow_safe(const float3 &pt)
-  {
-    /* the order of arguments to min is such that if pt is nan, it will not
-     * influence the resulting bounding box */
-    if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z)) {
-      min = ccl::min(pt, min);
-      max = ccl::max(pt, max);
-    }
-  }
-
-  __forceinline void grow_safe(const float3 &pt, float border)
-  {
-    if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z) && isfinite(border)) {
-      float3 shift = make_float3(border, border, border);
-      min = ccl::min(pt - shift, min);
-      max = ccl::max(pt + shift, max);
-    }
-  }
-
-  __forceinline void grow_safe(const BoundBox &bbox)
-  {
-    grow_safe(bbox.min);
-    grow_safe(bbox.max);
-  }
-
-  __forceinline void intersect(const BoundBox &bbox)
-  {
-    min = ccl::max(min, bbox.min);
-    max = ccl::min(max, bbox.max);
-  }
-
-  /* todo: avoid using this */
-  __forceinline float safe_area() const
-  {
-    if (!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z)))
-      return 0.0f;
-
-    return area();
-  }
-
-  __forceinline float area() const
-  {
-    return half_area() * 2.0f;
-  }
-
-  __forceinline float half_area() const
-  {
-    float3 d = max - min;
-    return (d.x * d.z + d.y * d.z + d.x * d.y);
-  }
-
-  __forceinline float3 center() const
-  {
-    return 0.5f * (min + max);
-  }
-
-  __forceinline float3 center2() const
-  {
-    return min + max;
-  }
-
-  __forceinline float3 size() const
-  {
-    return max - min;
-  }
-
-  __forceinline bool valid() const
-  {
-    return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) &&
-           (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) &&
-           (isfinite(max.x) && isfinite(max.y) && isfinite(max.z));
-  }
-
-  BoundBox transformed(const Transform *tfm) const
-  {
-    BoundBox result = BoundBox::empty;
-
-    for (int i = 0; i < 8; i++) {
-      float3 p;
-
-      p.x = (i & 1) ? min.x : max.x;
-      p.y = (i & 2) ? min.y : max.y;
-      p.z = (i & 4) ? min.z : max.z;
-
-      result.grow(transform_point(tfm, p));
-    }
-
-    return result;
-  }
-
-  __forceinline bool intersects(const BoundBox &other)
-  {
-    float3 center_diff = center() - other.center(), total_size = (size() + other.size()) * 0.5f;
-    return fabsf(center_diff.x) <= total_size.x && fabsf(center_diff.y) <= total_size.y &&
-           fabsf(center_diff.z) <= total_size.z;
-  }
-};
-
-__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt)
-{
-  return BoundBox(min(bbox.min, pt), max(bbox.max, pt));
-}
-
-__forceinline BoundBox merge(const BoundBox &a, const BoundBox &b)
-{
-  return BoundBox(min(a.min, b.min), max(a.max, b.max));
-}
-
-__forceinline BoundBox merge(const BoundBox &a,
-                             const BoundBox &b,
-                             const BoundBox &c,
-                             const BoundBox &d)
-{
-  return merge(merge(a, b), merge(c, d));
-}
-
-__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b)
-{
-  return BoundBox(max(a.min, b.min), min(a.max, b.max));
-}
-
-__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b, const BoundBox &c)
-{
-  return intersect(a, intersect(b, c));
-}
-
-/* 2D BoundBox */
-
-class BoundBox2D {
- public:
-  float left;
-  float right;
-  float bottom;
-  float top;
-
-  BoundBox2D() : left(0.0f), right(1.0f), bottom(0.0f), top(1.0f)
-  {
-  }
-
-  bool operator==(const BoundBox2D &other) const
-  {
-    return (left == other.left && right == other.right && bottom == other.bottom &&
-            top == other.top);
-  }
-
-  float width()
-  {
-    return right - left;
-  }
-
-  float height()
-  {
-    return top - bottom;
-  }
-
-  BoundBox2D operator*(float f) const
-  {
-    BoundBox2D result;
-
-    result.left = left * f;
-    result.right = right * f;
-    result.bottom = bottom * f;
-    result.top = top * f;
-
-    return result;
-  }
-
-  BoundBox2D subset(const BoundBox2D &other) const
-  {
-    BoundBox2D subset;
-
-    subset.left = left + other.left * (right - left);
-    subset.right = left + other.right * (right - left);
-    subset.bottom = bottom + other.bottom * (top - bottom);
-    subset.top = bottom + other.top * (top - bottom);
-
-    return subset;
-  }
-
-  BoundBox2D make_relative_to(const BoundBox2D &other) const
-  {
-    BoundBox2D result;
-
-    result.left = ((left - other.left) / (other.right - other.left));
-    result.right = ((right - other.left) / (other.right - other.left));
-    result.bottom = ((bottom - other.bottom) / (other.top - other.bottom));
-    result.top = ((top - other.bottom) / (other.top - other.bottom));
-
-    return result;
-  }
-
-  BoundBox2D clamp(float mn = 0.0f, float mx = 1.0f)
-  {
-    BoundBox2D result;
-
-    result.left = ccl::clamp(left, mn, mx);
-    result.right = ccl::clamp(right, mn, mx);
-    result.bottom = ccl::clamp(bottom, mn, mx);
-    result.top = ccl::clamp(top, mn, mx);
-
-    return result;
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_BOUNDBOX_H__ */
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
deleted file mode 100644
index 361c36d9061..00000000000
--- a/intern/cycles/util/util_color.h
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_COLOR_H__
-#define __UTIL_COLOR_H__
-
-#include "util/util_math.h"
-#include "util/util_types.h"
-
-#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
-#  include "util/util_simd.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device uchar float_to_byte(float val)
-{
-  return ((val <= 0.0f) ? 0 :
-                          ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f)));
-}
-
-ccl_device uchar4 color_float_to_byte(float3 c)
-{
-  uchar r, g, b;
-
-  r = float_to_byte(c.x);
-  g = float_to_byte(c.y);
-  b = float_to_byte(c.z);
-
-  return make_uchar4(r, g, b, 0);
-}
-
-ccl_device uchar4 color_float4_to_uchar4(float4 c)
-{
-  uchar r, g, b, a;
-
-  r = float_to_byte(c.x);
-  g = float_to_byte(c.y);
-  b = float_to_byte(c.z);
-  a = float_to_byte(c.w);
-
-  return make_uchar4(r, g, b, a);
-}
-
-ccl_device_inline float3 color_byte_to_float(uchar4 c)
-{
-  return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f));
-}
-
-ccl_device_inline float4 color_uchar4_to_float4(uchar4 c)
-{
-  return make_float4(
-      c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f), c.w * (1.0f / 255.0f));
-}
-
-ccl_device float color_srgb_to_linear(float c)
-{
-  if (c < 0.04045f)
-    return (c < 0.0f) ? 0.0f : c * (1.0f / 12.92f);
-  else
-    return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f);
-}
-
-ccl_device float color_linear_to_srgb(float c)
-{
-  if (c < 0.0031308f)
-    return (c < 0.0f) ? 0.0f : c * 12.92f;
-  else
-    return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f;
-}
-
-ccl_device float3 rgb_to_hsv(float3 rgb)
-{
-  float cmax, cmin, h, s, v, cdelta;
-  float3 c;
-
-  cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
-  cmin = min(rgb.x, min(rgb.y, rgb.z));
-  cdelta = cmax - cmin;
-
-  v = cmax;
-
-  if (cmax != 0.0f) {
-    s = cdelta / cmax;
-  }
-  else {
-    s = 0.0f;
-    h = 0.0f;
-  }
-
-  if (s != 0.0f) {
-    float3 cmax3 = make_float3(cmax, cmax, cmax);
-    c = (cmax3 - rgb) / cdelta;
-
-    if (rgb.x == cmax)
-      h = c.z - c.y;
-    else if (rgb.y == cmax)
-      h = 2.0f + c.x - c.z;
-    else
-      h = 4.0f + c.y - c.x;
-
-    h /= 6.0f;
-
-    if (h < 0.0f)
-      h += 1.0f;
-  }
-  else {
-    h = 0.0f;
-  }
-
-  return make_float3(h, s, v);
-}
-
-ccl_device float3 hsv_to_rgb(float3 hsv)
-{
-  float i, f, p, q, t, h, s, v;
-  float3 rgb;
-
-  h = hsv.x;
-  s = hsv.y;
-  v = hsv.z;
-
-  if (s != 0.0f) {
-    if (h == 1.0f)
-      h = 0.0f;
-
-    h *= 6.0f;
-    i = floorf(h);
-    f = h - i;
-    rgb = make_float3(f, f, f);
-    p = v * (1.0f - s);
-    q = v * (1.0f - (s * f));
-    t = v * (1.0f - (s * (1.0f - f)));
-
-    if (i == 0.0f)
-      rgb = make_float3(v, t, p);
-    else if (i == 1.0f)
-      rgb = make_float3(q, v, p);
-    else if (i == 2.0f)
-      rgb = make_float3(p, v, t);
-    else if (i == 3.0f)
-      rgb = make_float3(p, q, v);
-    else if (i == 4.0f)
-      rgb = make_float3(t, p, v);
-    else
-      rgb = make_float3(v, p, q);
-  }
-  else {
-    rgb = make_float3(v, v, v);
-  }
-
-  return rgb;
-}
-
-ccl_device float3 xyY_to_xyz(float x, float y, float Y)
-{
-  float X, Z;
-
-  if (y != 0.0f)
-    X = (x / y) * Y;
-  else
-    X = 0.0f;
-
-  if (y != 0.0f && Y != 0.0f)
-    Z = (1.0f - x - y) / y * Y;
-  else
-    Z = 0.0f;
-
-  return make_float3(X, Y, Z);
-}
-
-#ifdef __KERNEL_SSE2__
-/*
- * Calculate initial guess for arg^exp based on float representation
- * This method gives a constant bias,
- * which can be easily compensated by multiplication with bias_coeff.
- * Gives better results for exponents near 1 (e. g. 4/5).
- * exp = exponent, encoded as uint32_t
- * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
- */
-template<unsigned exp, unsigned e2coeff> ccl_device_inline ssef fastpow(const ssef &arg)
-{
-  ssef ret;
-  ret = arg * cast(ssei(e2coeff));
-  ret = ssef(cast(ret));
-  ret = ret * cast(ssei(exp));
-  ret = cast(ssei(ret));
-  return ret;
-}
-
-/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
-ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x)
-{
-  ssef approx2 = old_result * old_result;
-  ssef approx4 = approx2 * approx2;
-  ssef t = x / approx4;
-  ssef summ = madd(ssef(4.0f), old_result, t);
-  return summ * ssef(1.0f / 5.0f);
-}
-
-/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
-ccl_device_inline ssef fastpow24(const ssef &arg)
-{
-  /* max, avg and |avg| errors were calculated in gcc without FMA instructions
-   * The final precision should be better than powf in glibc */
-
-  /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
-  /* 0x3F4CCCCD = 4/5 */
-  /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
-  ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg);  // error max = 0.17  avg = 0.0018    |avg| = 0.05
-  ssef arg2 = arg * arg;
-  ssef arg4 = arg2 * arg2;
-
-  /* error max = 0.018     avg = 0.0031    |avg| = 0.0031 */
-  x = improve_5throot_solution(x, arg4);
-  /* error max = 0.00021   avg = 1.6e-05   |avg| = 1.6e-05 */
-  x = improve_5throot_solution(x, arg4);
-  /* error max = 6.1e-07   avg = 5.2e-08   |avg| = 1.1e-07 */
-  x = improve_5throot_solution(x, arg4);
-
-  return x * (x * x);
-}
-
-ccl_device ssef color_srgb_to_linear(const ssef &c)
-{
-  sseb cmp = c < ssef(0.04045f);
-  ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f));
-  ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */
-  ssef gte = fastpow24(gtebase);
-  return select(cmp, lt, gte);
-}
-#endif /* __KERNEL_SSE2__ */
-
-ccl_device float3 color_srgb_to_linear_v3(float3 c)
-{
-  return make_float3(
-      color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z));
-}
-
-ccl_device float3 color_linear_to_srgb_v3(float3 c)
-{
-  return make_float3(
-      color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z));
-}
-
-ccl_device float4 color_linear_to_srgb_v4(float4 c)
-{
-  return make_float4(
-      color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z), c.w);
-}
-
-ccl_device float4 color_srgb_to_linear_v4(float4 c)
-{
-#ifdef __KERNEL_SSE2__
-  ssef r_ssef;
-  float4 &r = (float4 &)r_ssef;
-  r = c;
-  r_ssef = color_srgb_to_linear(r_ssef);
-  r.w = c.w;
-  return r;
-#else
-  return make_float4(
-      color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z), c.w);
-#endif
-}
-
-ccl_device float3 color_highlight_compress(float3 color, ccl_private float3 *variance)
-{
-  color += one_float3();
-  if (variance) {
-    *variance *= sqr3(one_float3() / color);
-  }
-  return log3(color);
-}
-
-ccl_device float3 color_highlight_uncompress(float3 color)
-{
-  return exp3(color) - one_float3();
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
deleted file mode 100644
index 2245668d02f..00000000000
--- a/intern/cycles/util/util_debug.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_debug.h"
-
-#include <stdlib.h>
-
-#include "bvh/bvh_params.h"
-
-#include "util/util_logging.h"
-#include "util/util_string.h"
-
-CCL_NAMESPACE_BEGIN
-
-DebugFlags::CPU::CPU()
-    : avx2(true), avx(true), sse41(true), sse3(true), sse2(true), bvh_layout(BVH_LAYOUT_AUTO)
-{
-  reset();
-}
-
-void DebugFlags::CPU::reset()
-{
-#define STRINGIFY(x) #x
-#define CHECK_CPU_FLAGS(flag, env) \
-  do { \
-    flag = (getenv(env) == NULL); \
-    if (!flag) { \
-      VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \
-    } \
-  } while (0)
-
-  CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
-  CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX");
-  CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41");
-  CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3");
-  CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2");
-
-#undef STRINGIFY
-#undef CHECK_CPU_FLAGS
-
-  bvh_layout = BVH_LAYOUT_AUTO;
-}
-
-DebugFlags::CUDA::CUDA() : adaptive_compile(false)
-{
-  reset();
-}
-
-DebugFlags::HIP::HIP() : adaptive_compile(false)
-{
-  reset();
-}
-
-void DebugFlags::CUDA::reset()
-{
-  if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
-    adaptive_compile = true;
-}
-
-void DebugFlags::HIP::reset()
-{
-  if (getenv("CYCLES_HIP_ADAPTIVE_COMPILE") != NULL)
-    adaptive_compile = true;
-}
-
-DebugFlags::OptiX::OptiX()
-{
-  reset();
-}
-
-void DebugFlags::OptiX::reset()
-{
-  use_debug = false;
-}
-
-DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false)
-{
-  /* Nothing for now. */
-}
-
-void DebugFlags::reset()
-{
-  viewport_static_bvh = false;
-  cpu.reset();
-  cuda.reset();
-  optix.reset();
-}
-
-std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
-{
-  os << "CPU flags:\n"
-     << "  AVX2       : " << string_from_bool(debug_flags.cpu.avx2) << "\n"
-     << "  AVX        : " << string_from_bool(debug_flags.cpu.avx) << "\n"
-     << "  SSE4.1     : " << string_from_bool(debug_flags.cpu.sse41) << "\n"
-     << "  SSE3       : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
-     << "  SSE2       : " << string_from_bool(debug_flags.cpu.sse2) << "\n"
-     << "  BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n";
-
-  os << "CUDA flags:\n"
-     << "  Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
-
-  os << "OptiX flags:\n"
-     << "  Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
-
-  os << "HIP flags:\n"
-     << "  HIP streams : " << string_from_bool(debug_flags.hip.adaptive_compile) << "\n";
-
-  return os;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
deleted file mode 100644
index 81677201790..00000000000
--- a/intern/cycles/util/util_debug.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_DEBUG_H__
-#define __UTIL_DEBUG_H__
-
-#include <cassert>
-#include <iostream>
-
-#include "bvh/bvh_params.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Global storage for all sort of flags used to fine-tune behavior of particular
- * areas for the development purposes, without officially exposing settings to
- * the interface.
- */
-class DebugFlags {
- public:
-  /* Use static BVH in viewport, to match final render exactly. */
-  bool viewport_static_bvh;
-
-  bool running_inside_blender;
-
-  /* Descriptor of CPU feature-set to be used. */
-  struct CPU {
-    CPU();
-
-    /* Reset flags to their defaults. */
-    void reset();
-
-    /* Flags describing which instructions sets are allowed for use. */
-    bool avx2;
-    bool avx;
-    bool sse41;
-    bool sse3;
-    bool sse2;
-
-    /* Check functions to see whether instructions up to the given one
-     * are allowed for use.
-     */
-    bool has_avx2()
-    {
-      return has_avx() && avx2;
-    }
-    bool has_avx()
-    {
-      return has_sse41() && avx;
-    }
-    bool has_sse41()
-    {
-      return has_sse3() && sse41;
-    }
-    bool has_sse3()
-    {
-      return has_sse2() && sse3;
-    }
-    bool has_sse2()
-    {
-      return sse2;
-    }
-
-    /* Requested BVH layout.
-     *
-     * By default the fastest will be used. For debugging the BVH used by other
-     * CPUs and GPUs can be selected here instead.
-     */
-    BVHLayout bvh_layout;
-  };
-
-  /* Descriptor of CUDA feature-set to be used. */
-  struct CUDA {
-    CUDA();
-
-    /* Reset flags to their defaults. */
-    void reset();
-
-    /* Whether adaptive feature based runtime compile is enabled or not.
-     * Requires the CUDA Toolkit and only works on Linux at the moment. */
-    bool adaptive_compile;
-  };
-
-  /* Descriptor of HIP feature-set to be used. */
-  struct HIP {
-    HIP();
-
-    /* Reset flags to their defaults. */
-    void reset();
-
-    /* Whether adaptive feature based runtime compile is enabled or not.*/
-    bool adaptive_compile;
-  };
-
-  /* Descriptor of OptiX feature-set to be used. */
-  struct OptiX {
-    OptiX();
-
-    /* Reset flags to their defaults. */
-    void reset();
-
-    /* Load OptiX module with debug capabilities. Will lower logging verbosity level, enable
-     * validations, and lower optimization level. */
-    bool use_debug;
-  };
-
-  /* Get instance of debug flags registry. */
-  static DebugFlags &get()
-  {
-    static DebugFlags instance;
-    return instance;
-  }
-
-  /* Reset flags to their defaults. */
-  void reset();
-
-  /* Requested CPU flags. */
-  CPU cpu;
-
-  /* Requested CUDA flags. */
-  CUDA cuda;
-
-  /* Requested OptiX flags. */
-  OptiX optix;
-
-  /* Requested HIP flags. */
-  HIP hip;
-
- private:
-  DebugFlags();
-
-#if (__cplusplus > 199711L)
- public:
-  explicit DebugFlags(DebugFlags const & /*other*/) = delete;
-  void operator=(DebugFlags const & /*other*/) = delete;
-#else
- private:
-  explicit DebugFlags(DebugFlags const & /*other*/);
-  void operator=(DebugFlags const & /*other*/);
-#endif
-};
-
-typedef DebugFlags &DebugFlagsRef;
-typedef const DebugFlags &DebugFlagsConstRef;
-
-inline DebugFlags &DebugFlags()
-{
-  return DebugFlags::get();
-}
-
-std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_DEBUG_H__ */
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
deleted file mode 100644
index 9b1698d461a..00000000000
--- a/intern/cycles/util/util_defines.h
+++ /dev/null
@@ -1,146 +0,0 @@
-
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* clang-format off */
-
-/* #define __forceinline triggers a bug in some clang-format versions, disable
- * format for entire file to keep results consistent. */
-
-#ifndef __UTIL_DEFINES_H__
-#define __UTIL_DEFINES_H__
-
-/* Bitness */
-
-#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \
-    defined(_M_X64) || defined(__aarch64__)
-#  define __KERNEL_64_BIT__
-#endif
-
-/* Qualifiers for kernel code shared by CPU and GPU */
-
-#ifndef __KERNEL_GPU__
-#  define ccl_device static inline
-#  define ccl_device_noinline static
-#  define ccl_device_noinline_cpu ccl_device_noinline
-#  define ccl_global
-#  define ccl_static_constant static const
-#  define ccl_constant const
-#  define ccl_local
-#  define ccl_local_param
-#  define ccl_private
-#  define ccl_restrict __restrict
-#  define ccl_optional_struct_init
-#  define ccl_loop_no_unroll
-#  define ccl_attr_maybe_unused [[maybe_unused]]
-#  define __KERNEL_WITH_SSE_ALIGN__
-
-#  if defined(_WIN32) && !defined(FREE_WINDOWS)
-#    define ccl_device_inline static __forceinline
-#    define ccl_device_forceinline static __forceinline
-#    define ccl_align(...) __declspec(align(__VA_ARGS__))
-#    ifdef __KERNEL_64_BIT__
-#      define ccl_try_align(...) __declspec(align(__VA_ARGS__))
-#    else /* __KERNEL_64_BIT__ */
-#      undef __KERNEL_WITH_SSE_ALIGN__
-/* No support for function arguments (error C2719). */
-#      define ccl_try_align(...)
-#    endif /* __KERNEL_64_BIT__ */
-#    define ccl_may_alias
-#    define ccl_always_inline __forceinline
-#    define ccl_never_inline __declspec(noinline)
-#  else /* _WIN32 && !FREE_WINDOWS */
-#    define ccl_device_inline static inline __attribute__((always_inline))
-#    define ccl_device_forceinline static inline __attribute__((always_inline))
-#    define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
-#    ifndef FREE_WINDOWS64
-#      define __forceinline inline __attribute__((always_inline))
-#    endif
-#    define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
-#    define ccl_may_alias __attribute__((__may_alias__))
-#    define ccl_always_inline __attribute__((always_inline))
-#    define ccl_never_inline __attribute__((noinline))
-#  endif /* _WIN32 && !FREE_WINDOWS */
-
-/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
-#  ifndef ATTR_FALLTHROUGH
-#    if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
-#      define ATTR_FALLTHROUGH __attribute__((fallthrough))
-#    else
-#      define ATTR_FALLTHROUGH ((void)0)
-#    endif
-#  endif
-#endif /* __KERNEL_GPU__ */
-
-/* macros */
-
-/* hints for branch prediction, only use in code that runs a _lot_ */
-#if defined(__GNUC__) && defined(__KERNEL_CPU__)
-#  define LIKELY(x) __builtin_expect(!!(x), 1)
-#  define UNLIKELY(x) __builtin_expect(!!(x), 0)
-#else
-#  define LIKELY(x) (x)
-#  define UNLIKELY(x) (x)
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-#  if defined(__cplusplus)
-/* Some magic to be sure we don't have reference in the type. */
-template<typename T> static inline T decltype_helper(T x)
-{
-  return x;
-}
-#    define TYPEOF(x) decltype(decltype_helper(x))
-#  else
-#    define TYPEOF(x) typeof(x)
-#  endif
-#endif
-
-/* Causes warning:
- * incompatible types when assigning to type 'Foo' from type 'Bar'
- * ... the compiler optimizes away the temp var */
-#ifdef __GNUC__
-#  define CHECK_TYPE(var, type) \
-    { \
-      TYPEOF(var) * __tmp; \
-      __tmp = (type *)NULL; \
-      (void)__tmp; \
-    } \
-    (void)0
-
-#  define CHECK_TYPE_PAIR(var_a, var_b) \
-    { \
-      TYPEOF(var_a) * __tmp; \
-      __tmp = (typeof(var_b) *)NULL; \
-      (void)__tmp; \
-    } \
-    (void)0
-#else
-#  define CHECK_TYPE(var, type)
-#  define CHECK_TYPE_PAIR(var_a, var_b)
-#endif
-
-/* can be used in simple macros */
-#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val)))
-
-#ifndef __KERNEL_GPU__
-#  include <cassert>
-#  define util_assert(statement) assert(statement)
-#else
-#  define util_assert(statement)
-#endif
-
-#endif /* __UTIL_DEFINES_H__ */
diff --git a/intern/cycles/util/util_deque.h b/intern/cycles/util/util_deque.h
deleted file mode 100644
index ccac961aa7d..00000000000
--- a/intern/cycles/util/util_deque.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_DEQUE_H__
-#define __UTIL_DEQUE_H__
-
-#include <deque>
-
-CCL_NAMESPACE_BEGIN
-
-using std::deque;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_DEQUE_H__ */
diff --git a/intern/cycles/util/util_disjoint_set.h b/intern/cycles/util/util_disjoint_set.h
deleted file mode 100644
index 946632371d2..00000000000
--- a/intern/cycles/util/util_disjoint_set.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_DISJOINT_SET_H__
-#define __UTIL_DISJOINT_SET_H__
-
-#include "util_array.h"
-#include <utility>
-
-CCL_NAMESPACE_BEGIN
-
-class DisjointSet {
- private:
-  array<size_t> parents;
-  array<size_t> ranks;
-
- public:
-  DisjointSet(size_t size) : parents(size), ranks(size)
-  {
-    for (size_t i = 0; i < size; i++) {
-      parents[i] = i;
-      ranks[i] = 0;
-    }
-  }
-
-  size_t find(size_t x)
-  {
-    size_t root = x;
-    while (parents[root] != root) {
-      root = parents[root];
-    }
-    while (parents[x] != root) {
-      size_t parent = parents[x];
-      parents[x] = root;
-      x = parent;
-    }
-    return root;
-  }
-
-  void join(size_t x, size_t y)
-  {
-    size_t x_root = find(x);
-    size_t y_root = find(y);
-
-    if (x_root == y_root) {
-      return;
-    }
-
-    if (ranks[x_root] < ranks[y_root]) {
-      std::swap(x_root, y_root);
-    }
-    parents[y_root] = x_root;
-
-    if (ranks[x_root] == ranks[y_root]) {
-      ranks[x_root]++;
-    }
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_DISJOINT_SET_H__ */
diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h
deleted file mode 100644
index d907974be91..00000000000
--- a/intern/cycles/util/util_foreach.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_FOREACH_H__
-#define __UTIL_FOREACH_H__
-
-/* Nice foreach() loops for STL data structures. */
-
-#define foreach(x, y) for (x : y)
-
-#endif /* __UTIL_FOREACH_H__ */
diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h
deleted file mode 100644
index f3cc00329ad..00000000000
--- a/intern/cycles/util/util_function.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_FUNCTION_H__
-#define __UTIL_FUNCTION_H__
-
-#include <functional>
-
-CCL_NAMESPACE_BEGIN
-
-#define function_bind std::bind
-#define function_null nullptr
-using std::function;
-using std::placeholders::_1;
-using std::placeholders::_2;
-using std::placeholders::_3;
-using std::placeholders::_4;
-using std::placeholders::_5;
-using std::placeholders::_6;
-using std::placeholders::_7;
-using std::placeholders::_8;
-using std::placeholders::_9;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_FUNCTION_H__ */
diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp
deleted file mode 100644
index 1cb466a1ffa..00000000000
--- a/intern/cycles/util/util_guarded_allocator.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_guarded_allocator.h"
-#include "util/util_stats.h"
-
-CCL_NAMESPACE_BEGIN
-
-static Stats global_stats(Stats::static_init);
-
-/* Internal API. */
-
-void util_guarded_mem_alloc(size_t n)
-{
-  global_stats.mem_alloc(n);
-}
-
-void util_guarded_mem_free(size_t n)
-{
-  global_stats.mem_free(n);
-}
-
-/* Public API. */
-
-size_t util_guarded_get_mem_used()
-{
-  return global_stats.mem_used;
-}
-
-size_t util_guarded_get_mem_peak()
-{
-  return global_stats.mem_peak;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h
deleted file mode 100644
index f78cc5f5da9..00000000000
--- a/intern/cycles/util/util_guarded_allocator.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_GUARDED_ALLOCATOR_H__
-#define __UTIL_GUARDED_ALLOCATOR_H__
-
-#include <cstddef>
-#include <cstdlib>
-#include <memory>
-
-#ifdef WITH_BLENDER_GUARDEDALLOC
-#  include "../../guardedalloc/MEM_guardedalloc.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Internal use only. */
-void util_guarded_mem_alloc(size_t n);
-void util_guarded_mem_free(size_t n);
-
-/* Guarded allocator for the use with STL. */
-template<typename T> class GuardedAllocator {
- public:
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef T *pointer;
-  typedef const T *const_pointer;
-  typedef T &reference;
-  typedef const T &const_reference;
-  typedef T value_type;
-
-  GuardedAllocator()
-  {
-  }
-  GuardedAllocator(const GuardedAllocator &)
-  {
-  }
-
-  T *allocate(size_t n, const void *hint = 0)
-  {
-    (void)hint;
-    size_t size = n * sizeof(T);
-    util_guarded_mem_alloc(size);
-    if (n == 0) {
-      return NULL;
-    }
-    T *mem;
-#ifdef WITH_BLENDER_GUARDEDALLOC
-    /* C++ standard requires allocation functions to allocate memory suitably
-     * aligned for any standard type. This is 16 bytes for 64 bit platform as
-     * far as i concerned. We might over-align on 32bit here, but that should
-     * be all safe actually.
-     */
-    mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc");
-#else
-    mem = (T *)malloc(size);
-#endif
-    if (mem == NULL) {
-      throw std::bad_alloc();
-    }
-    return mem;
-  }
-
-  void deallocate(T *p, size_t n)
-  {
-    util_guarded_mem_free(n * sizeof(T));
-    if (p != NULL) {
-#ifdef WITH_BLENDER_GUARDEDALLOC
-      MEM_freeN(p);
-#else
-      free(p);
-#endif
-    }
-  }
-
-  T *address(T &x) const
-  {
-    return &x;
-  }
-
-  const T *address(const T &x) const
-  {
-    return &x;
-  }
-
-  GuardedAllocator<T> &operator=(const GuardedAllocator &)
-  {
-    return *this;
-  }
-
-  size_t max_size() const
-  {
-    return size_t(-1);
-  }
-
-  template<class U> struct rebind {
-    typedef GuardedAllocator<U> other;
-  };
-
-  template<class U> GuardedAllocator(const GuardedAllocator<U> &)
-  {
-  }
-
-  template<class U> GuardedAllocator &operator=(const GuardedAllocator<U> &)
-  {
-    return *this;
-  }
-
-  inline bool operator==(GuardedAllocator const & /*other*/) const
-  {
-    return true;
-  }
-  inline bool operator!=(GuardedAllocator const &other) const
-  {
-    return !operator==(other);
-  }
-
-#ifdef _MSC_VER
-  /* Welcome to the black magic here.
-   *
-   * The issue is that MSVC C++ allocates container proxy on any
-   * vector initialization, including static vectors which don't
-   * have any data yet. This leads to several issues:
-   *
-   * - Static objects initialization fiasco (global_stats from
-   *   util_stats.h might not be initialized yet).
-   * - If main() function changes allocator type (for example,
-   *   this might happen with `blender --debug-memory`) nobody
-   *   will know how to convert already allocated memory to a new
-   *   guarded allocator.
-   *
-   * Here we work this around by making it so container proxy does
-   * not use guarded allocation. A bit fragile, unfortunately.
-   */
-  template<> struct rebind<std::_Container_proxy> {
-    typedef std::allocator<std::_Container_proxy> other;
-  };
-
-  operator std::allocator<std::_Container_proxy>() const
-  {
-    return std::allocator<std::_Container_proxy>();
-  }
-#endif
-};
-
-/* Get memory usage and peak from the guarded STL allocator. */
-size_t util_guarded_get_mem_used();
-size_t util_guarded_get_mem_peak();
-
-/* Call given function and keep track if it runs out of memory.
- *
- * If it does run out f memory, stop execution and set progress
- * to do a global cancel.
- *
- * It's not fully robust, but good enough to catch obvious issues
- * when running out of memory.
- */
-#define MEM_GUARDED_CALL(progress, func, ...) \
-  do { \
-    try { \
-      (func)(__VA_ARGS__); \
-    } \
-    catch (std::bad_alloc &) { \
-      fprintf(stderr, "Error: run out of memory!\n"); \
-      fflush(stderr); \
-      (progress)->set_error("Out of memory"); \
-    } \
-  } while (false)
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
deleted file mode 100644
index 0db5acd319a..00000000000
--- a/intern/cycles/util/util_half.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_HALF_H__
-#define __UTIL_HALF_H__
-
-#include "util/util_math.h"
-#include "util/util_types.h"
-
-#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
-#  include "util/util_simd.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Half Floats */
-
-/* CUDA has its own half data type, no need to define then */
-#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__)
-/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
- * unsigned shorts. */
-class half {
- public:
-  half() : v(0)
-  {
-  }
-  half(const unsigned short &i) : v(i)
-  {
-  }
-  operator unsigned short()
-  {
-    return v;
-  }
-  half &operator=(const unsigned short &i)
-  {
-    v = i;
-    return *this;
-  }
-
- private:
-  unsigned short v;
-};
-#endif
-
-struct half4 {
-  half x, y, z, w;
-};
-
-/* Conversion to/from half float for image textures
- *
- * Simplified float to half for fast sampling on processor without a native
- * instruction, and eliminating any NaN and inf values. */
-
-ccl_device_inline half float_to_half_image(float f)
-{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
-  return __float2half(f);
-#else
-  const uint u = __float_as_uint(f);
-  /* Sign bit, shifted to its position. */
-  uint sign_bit = u & 0x80000000;
-  sign_bit >>= 16;
-  /* Exponent. */
-  uint exponent_bits = u & 0x7f800000;
-  /* Non-sign bits. */
-  uint value_bits = u & 0x7fffffff;
-  value_bits >>= 13;     /* Align mantissa on MSB. */
-  value_bits -= 0x1c000; /* Adjust bias. */
-  /* Flush-to-zero. */
-  value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
-  /* Clamp-to-max. */
-  value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
-  /* Denormals-as-zero. */
-  value_bits = (exponent_bits == 0 ? 0 : value_bits);
-  /* Re-insert sign bit and return. */
-  return (value_bits | sign_bit);
-#endif
-}
-
-ccl_device_inline float half_to_float_image(half h)
-{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
-  return __half2float(h);
-#else
-  const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
-  return __int_as_float(x);
-#endif
-}
-
-ccl_device_inline float4 half4_to_float4_image(const half4 h)
-{
-  /* Unable to use because it gives different results half_to_float_image, can we
-   * modify float_to_half_image so the conversion results are identical? */
-#if 0 /* defined(__KERNEL_AVX2__) */
-  /* CPU: AVX. */
-  __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h));
-  return float4(_mm_cvtph_ps(x));
-#endif
-
-  const float4 f = make_float4(half_to_float_image(h.x),
-                               half_to_float_image(h.y),
-                               half_to_float_image(h.z),
-                               half_to_float_image(h.w));
-  return f;
-}
-
-/* Conversion to half float texture for display.
- *
- * Simplified float to half for fast display texture conversion on processors
- * without a native instruction. Assumes no negative, no NaN, no inf, and sets
- * denormal to 0. */
-
-ccl_device_inline half float_to_half_display(const float f)
-{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
-  return __float2half(f);
-#else
-  const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
-  const int absolute = x & 0x7FFFFFFF;
-  const int Z = absolute + 0xC8000000;
-  const int result = (absolute < 0x38800000) ? 0 : Z;
-  const int rshift = (result >> 13);
-  return (rshift & 0x7FFF);
-#endif
-}
-
-ccl_device_inline half4 float4_to_half4_display(const float4 f)
-{
-#ifdef __KERNEL_SSE2__
-  /* CPU: SSE and AVX. */
-  ssef x = min(max(load4f(f), 0.0f), 65504.0f);
-#  ifdef __KERNEL_AVX2__
-  ssei rpack = _mm_cvtps_ph(x, 0);
-#  else
-  ssei absolute = cast(x) & 0x7FFFFFFF;
-  ssei Z = absolute + 0xC8000000;
-  ssei result = andnot(absolute < 0x38800000, Z);
-  ssei rshift = (result >> 13) & 0x7FFF;
-  ssei rpack = _mm_packs_epi32(rshift, rshift);
-#  endif
-  half4 h;
-  _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
-  return h;
-#else
-  /* GPU and scalar fallback. */
-  const half4 h = {float_to_half_display(f.x),
-                   float_to_half_display(f.y),
-                   float_to_half_display(f.z),
-                   float_to_half_display(f.w)};
-  return h;
-#endif
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_HALF_H__ */
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
deleted file mode 100644
index 0021eec169b..00000000000
--- a/intern/cycles/util/util_hash.h
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_HASH_H__
-#define __UTIL_HASH_H__
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* ***** Jenkins Lookup3 Hash Functions ***** */
-
-/* Source: http://burtleburtle.net/bob/c/lookup3.c */
-
-#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
-
-#define mix(a, b, c) \
-  { \
-    a -= c; \
-    a ^= rot(c, 4); \
-    c += b; \
-    b -= a; \
-    b ^= rot(a, 6); \
-    a += c; \
-    c -= b; \
-    c ^= rot(b, 8); \
-    b += a; \
-    a -= c; \
-    a ^= rot(c, 16); \
-    c += b; \
-    b -= a; \
-    b ^= rot(a, 19); \
-    a += c; \
-    c -= b; \
-    c ^= rot(b, 4); \
-    b += a; \
-  } \
-  ((void)0)
-
-#define final(a, b, c) \
-  { \
-    c ^= b; \
-    c -= rot(b, 14); \
-    a ^= c; \
-    a -= rot(c, 11); \
-    b ^= a; \
-    b -= rot(a, 25); \
-    c ^= b; \
-    c -= rot(b, 16); \
-    a ^= c; \
-    a -= rot(c, 4); \
-    b ^= a; \
-    b -= rot(a, 14); \
-    c ^= b; \
-    c -= rot(b, 24); \
-  } \
-  ((void)0)
-
-ccl_device_inline uint hash_uint(uint kx)
-{
-  uint a, b, c;
-  a = b = c = 0xdeadbeef + (1 << 2) + 13;
-
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline uint hash_uint2(uint kx, uint ky)
-{
-  uint a, b, c;
-  a = b = c = 0xdeadbeef + (2 << 2) + 13;
-
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz)
-{
-  uint a, b, c;
-  a = b = c = 0xdeadbeef + (3 << 2) + 13;
-
-  c += kz;
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw)
-{
-  uint a, b, c;
-  a = b = c = 0xdeadbeef + (4 << 2) + 13;
-
-  a += kx;
-  b += ky;
-  c += kz;
-  mix(a, b, c);
-
-  a += kw;
-  final(a, b, c);
-
-  return c;
-}
-
-#undef rot
-#undef final
-#undef mix
-
-/* Hashing uint or uint[234] into a float in the range [0, 1]. */
-
-ccl_device_inline float hash_uint_to_float(uint kx)
-{
-  return (float)hash_uint(kx) / (float)0xFFFFFFFFu;
-}
-
-ccl_device_inline float hash_uint2_to_float(uint kx, uint ky)
-{
-  return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu;
-}
-
-ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz)
-{
-  return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu;
-}
-
-ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw)
-{
-  return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu;
-}
-
-/* Hashing float or float[234] into a float in the range [0, 1]. */
-
-ccl_device_inline float hash_float_to_float(float k)
-{
-  return hash_uint_to_float(__float_as_uint(k));
-}
-
-ccl_device_inline float hash_float2_to_float(float2 k)
-{
-  return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y));
-}
-
-ccl_device_inline float hash_float3_to_float(float3 k)
-{
-  return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z));
-}
-
-ccl_device_inline float hash_float4_to_float(float4 k)
-{
-  return hash_uint4_to_float(
-      __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w));
-}
-
-/* Hashing float[234] into float[234] of components in the range [0, 1]. */
-
-ccl_device_inline float2 hash_float2_to_float2(float2 k)
-{
-  return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0)));
-}
-
-ccl_device_inline float3 hash_float3_to_float3(float3 k)
-{
-  return make_float3(hash_float3_to_float(k),
-                     hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)),
-                     hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0)));
-}
-
-ccl_device_inline float4 hash_float4_to_float4(float4 k)
-{
-  return make_float4(hash_float4_to_float(k),
-                     hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)),
-                     hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)),
-                     hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x)));
-}
-
-/* Hashing float or float[234] into float3 of components in range [0, 1]. */
-
-ccl_device_inline float3 hash_float_to_float3(float k)
-{
-  return make_float3(hash_float_to_float(k),
-                     hash_float2_to_float(make_float2(k, 1.0)),
-                     hash_float2_to_float(make_float2(k, 2.0)));
-}
-
-ccl_device_inline float3 hash_float2_to_float3(float2 k)
-{
-  return make_float3(hash_float2_to_float(k),
-                     hash_float3_to_float(make_float3(k.x, k.y, 1.0)),
-                     hash_float3_to_float(make_float3(k.x, k.y, 2.0)));
-}
-
-ccl_device_inline float3 hash_float4_to_float3(float4 k)
-{
-  return make_float3(hash_float4_to_float(k),
-                     hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)),
-                     hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x)));
-}
-
-/* SSE Versions Of Jenkins Lookup3 Hash Functions */
-
-#ifdef __KERNEL_SSE2__
-#  define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
-
-#  define mix(a, b, c) \
-    { \
-      a -= c; \
-      a ^= rot(c, 4); \
-      c += b; \
-      b -= a; \
-      b ^= rot(a, 6); \
-      a += c; \
-      c -= b; \
-      c ^= rot(b, 8); \
-      b += a; \
-      a -= c; \
-      a ^= rot(c, 16); \
-      c += b; \
-      b -= a; \
-      b ^= rot(a, 19); \
-      a += c; \
-      c -= b; \
-      c ^= rot(b, 4); \
-      b += a; \
-    }
-
-#  define final(a, b, c) \
-    { \
-      c ^= b; \
-      c -= rot(b, 14); \
-      a ^= c; \
-      a -= rot(c, 11); \
-      b ^= a; \
-      b -= rot(a, 25); \
-      c ^= b; \
-      c -= rot(b, 16); \
-      a ^= c; \
-      a -= rot(c, 4); \
-      b ^= a; \
-      b -= rot(a, 14); \
-      c ^= b; \
-      c -= rot(b, 24); \
-    }
-
-ccl_device_inline ssei hash_ssei(ssei kx)
-{
-  ssei a, b, c;
-  a = b = c = ssei(0xdeadbeef + (1 << 2) + 13);
-
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline ssei hash_ssei2(ssei kx, ssei ky)
-{
-  ssei a, b, c;
-  a = b = c = ssei(0xdeadbeef + (2 << 2) + 13);
-
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline ssei hash_ssei3(ssei kx, ssei ky, ssei kz)
-{
-  ssei a, b, c;
-  a = b = c = ssei(0xdeadbeef + (3 << 2) + 13);
-
-  c += kz;
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline ssei hash_ssei4(ssei kx, ssei ky, ssei kz, ssei kw)
-{
-  ssei a, b, c;
-  a = b = c = ssei(0xdeadbeef + (4 << 2) + 13);
-
-  a += kx;
-  b += ky;
-  c += kz;
-  mix(a, b, c);
-
-  a += kw;
-  final(a, b, c);
-
-  return c;
-}
-
-#  if defined(__KERNEL_AVX__)
-ccl_device_inline avxi hash_avxi(avxi kx)
-{
-  avxi a, b, c;
-  a = b = c = avxi(0xdeadbeef + (1 << 2) + 13);
-
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline avxi hash_avxi2(avxi kx, avxi ky)
-{
-  avxi a, b, c;
-  a = b = c = avxi(0xdeadbeef + (2 << 2) + 13);
-
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline avxi hash_avxi3(avxi kx, avxi ky, avxi kz)
-{
-  avxi a, b, c;
-  a = b = c = avxi(0xdeadbeef + (3 << 2) + 13);
-
-  c += kz;
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-}
-
-ccl_device_inline avxi hash_avxi4(avxi kx, avxi ky, avxi kz, avxi kw)
-{
-  avxi a, b, c;
-  a = b = c = avxi(0xdeadbeef + (4 << 2) + 13);
-
-  a += kx;
-  b += ky;
-  c += kz;
-  mix(a, b, c);
-
-  a += kw;
-  final(a, b, c);
-
-  return c;
-}
-#  endif
-
-#  undef rot
-#  undef final
-#  undef mix
-
-#endif
-
-#ifndef __KERNEL_GPU__
-static inline uint hash_string(const char *str)
-{
-  uint i = 0, c;
-
-  while ((c = *str++))
-    i = i * 37 + c;
-
-  return i;
-}
-#endif
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_HASH_H__ */
diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp
deleted file mode 100644
index 62d3d42186d..00000000000
--- a/intern/cycles/util/util_ies.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-
-#include "util/util_foreach.h"
-#include "util/util_ies.h"
-#include "util/util_math.h"
-#include "util/util_string.h"
-
-CCL_NAMESPACE_BEGIN
-
-// NOTE: For some reason gcc-7.2 does not instantiate this versio of allocator
-// gere (used in IESTextParser). Works fine for gcc-6, gcc-7.3 and gcc-8.
-//
-// TODO(sergey): Get to the root of this issue, or confirm this i a compiler
-// issue.
-template class GuardedAllocator<char>;
-
-bool IESFile::load(const string &ies)
-{
-  clear();
-  if (!parse(ies) || !process()) {
-    clear();
-    return false;
-  }
-  return true;
-}
-
-void IESFile::clear()
-{
-  intensity.clear();
-  v_angles.clear();
-  h_angles.clear();
-}
-
-int IESFile::packed_size()
-{
-  if (v_angles.size() && h_angles.size() > 0) {
-    return 2 + h_angles.size() + v_angles.size() + h_angles.size() * v_angles.size();
-  }
-  return 0;
-}
-
-void IESFile::pack(float *data)
-{
-  if (v_angles.size() && h_angles.size()) {
-    *(data++) = __int_as_float(h_angles.size());
-    *(data++) = __int_as_float(v_angles.size());
-
-    memcpy(data, &h_angles[0], h_angles.size() * sizeof(float));
-    data += h_angles.size();
-    memcpy(data, &v_angles[0], v_angles.size() * sizeof(float));
-    data += v_angles.size();
-
-    for (int h = 0; h < intensity.size(); h++) {
-      memcpy(data, &intensity[h][0], v_angles.size() * sizeof(float));
-      data += v_angles.size();
-    }
-  }
-}
-
-class IESTextParser {
- public:
-  vector<char> text;
-  char *data;
-
-  IESTextParser(const string &str) : text(str.begin(), str.end())
-  {
-    std::replace(text.begin(), text.end(), ',', ' ');
-    data = strstr(&text[0], "\nTILT=");
-  }
-
-  bool eof()
-  {
-    return (data == NULL) || (data[0] == '\0');
-  }
-
-  double get_double()
-  {
-    if (eof()) {
-      return 0.0;
-    }
-    char *old_data = data;
-    double val = strtod(data, &data);
-    if (data == old_data) {
-      data = NULL;
-      return 0.0;
-    }
-    return val;
-  }
-
-  long get_long()
-  {
-    if (eof()) {
-      return 0;
-    }
-    char *old_data = data;
-    long val = strtol(data, &data, 10);
-    if (data == old_data) {
-      data = NULL;
-      return 0;
-    }
-    return val;
-  }
-};
-
-bool IESFile::parse(const string &ies)
-{
-  if (ies.empty()) {
-    return false;
-  }
-
-  IESTextParser parser(ies);
-  if (parser.eof()) {
-    return false;
-  }
-
-  /* Handle the tilt data block. */
-  if (strncmp(parser.data, "\nTILT=INCLUDE", 13) == 0) {
-    parser.data += 13;
-    parser.get_double();              /* Lamp to Luminaire geometry */
-    int num_tilt = parser.get_long(); /* Amount of tilt angles and factors */
-    /* Skip over angles and factors. */
-    for (int i = 0; i < 2 * num_tilt; i++) {
-      parser.get_double();
-    }
-  }
-  else {
-    /* Skip to next line. */
-    parser.data = strstr(parser.data + 1, "\n");
-  }
-
-  if (parser.eof()) {
-    return false;
-  }
-  parser.data++;
-
-  parser.get_long();                    /* Number of lamps */
-  parser.get_double();                  /* Lumens per lamp */
-  double factor = parser.get_double();  /* Candela multiplier */
-  int v_angles_num = parser.get_long(); /* Number of vertical angles */
-  int h_angles_num = parser.get_long(); /* Number of horizontal angles */
-  type = (IESType)parser.get_long();    /* Photometric type */
-
-  /* TODO(lukas): Test whether the current type B processing can also deal with type A files.
-   * In theory the only difference should be orientation which we ignore anyways, but with IES you
-   * never know...
-   */
-  if (type != TYPE_B && type != TYPE_C) {
-    return false;
-  }
-
-  parser.get_long();             /* Unit of the geometry data */
-  parser.get_double();           /* Width */
-  parser.get_double();           /* Length */
-  parser.get_double();           /* Height */
-  factor *= parser.get_double(); /* Ballast factor */
-  factor *= parser.get_double(); /* Ballast-Lamp Photometric factor */
-  parser.get_double();           /* Input Watts */
-
-  /* Intensity values in IES files are specified in candela (lumen/sr), a photometric quantity.
-   * Cycles expects radiometric quantities, though, which requires a conversion.
-   * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution
-   * of the light source since lumens take human perception into account.
-   * Since this spectral distribution is not known from the IES file, a typical one must be
-   * assumed. The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to
-   * convert to Watt/sr. A more advanced approach would be to add a Blackbody Temperature input to
-   * the node and numerically integrate the Luminous efficacy from the resulting spectral
-   * distribution. Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that
-   * Cycles expects for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela
-   * to Watt factor.
-   */
-  factor *= 0.0706650768394;
-
-  v_angles.reserve(v_angles_num);
-  for (int i = 0; i < v_angles_num; i++) {
-    v_angles.push_back((float)parser.get_double());
-  }
-
-  h_angles.reserve(h_angles_num);
-  for (int i = 0; i < h_angles_num; i++) {
-    h_angles.push_back((float)parser.get_double());
-  }
-
-  intensity.resize(h_angles_num);
-  for (int i = 0; i < h_angles_num; i++) {
-    intensity[i].reserve(v_angles_num);
-    for (int j = 0; j < v_angles_num; j++) {
-      intensity[i].push_back((float)(factor * parser.get_double()));
-    }
-  }
-
-  return !parser.eof();
-}
-
-bool IESFile::process_type_b()
-{
-  vector<vector<float>> newintensity;
-  newintensity.resize(v_angles.size());
-  for (int i = 0; i < v_angles.size(); i++) {
-    newintensity[i].reserve(h_angles.size());
-    for (int j = 0; j < h_angles.size(); j++) {
-      newintensity[i].push_back(intensity[j][i]);
-    }
-  }
-  intensity.swap(newintensity);
-  h_angles.swap(v_angles);
-
-  float h_first = h_angles[0], h_last = h_angles[h_angles.size() - 1];
-  if (h_last != 90.0f) {
-    return false;
-  }
-
-  if (h_first == 0.0f) {
-    /* The range in the file corresponds to 90°-180°, we need to mirror that to get the
-     * full 180° range. */
-    vector<float> new_h_angles;
-    vector<vector<float>> new_intensity;
-    int hnum = h_angles.size();
-    new_h_angles.reserve(2 * hnum - 1);
-    new_intensity.reserve(2 * hnum - 1);
-    for (int i = hnum - 1; i > 0; i--) {
-      new_h_angles.push_back(90.0f - h_angles[i]);
-      new_intensity.push_back(intensity[i]);
-    }
-    for (int i = 0; i < hnum; i++) {
-      new_h_angles.push_back(90.0f + h_angles[i]);
-      new_intensity.push_back(intensity[i]);
-    }
-    h_angles.swap(new_h_angles);
-    intensity.swap(new_intensity);
-  }
-  else if (h_first == -90.0f) {
-    /* We have full 180° coverage, so just shift to match the angle range convention. */
-    for (int i = 0; i < h_angles.size(); i++) {
-      h_angles[i] += 90.0f;
-    }
-  }
-  /* To get correct results with the cubic interpolation in the kernel, the horizontal range
-   * has to cover all 360°. Therefore, we copy the 0° entry to 360° to ensure full coverage
-   * and seamless interpolation. */
-  h_angles.push_back(360.0f);
-  intensity.push_back(intensity[0]);
-
-  float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1];
-  if (v_last != 90.0f) {
-    return false;
-  }
-
-  if (v_first == 0.0f) {
-    /* The range in the file corresponds to 90°-180°, we need to mirror that to get the
-     * full 180° range. */
-    vector<float> new_v_angles;
-    int hnum = h_angles.size();
-    int vnum = v_angles.size();
-    new_v_angles.reserve(2 * vnum - 1);
-    for (int i = vnum - 1; i > 0; i--) {
-      new_v_angles.push_back(90.0f - v_angles[i]);
-    }
-    for (int i = 0; i < vnum; i++) {
-      new_v_angles.push_back(90.0f + v_angles[i]);
-    }
-    for (int i = 0; i < hnum; i++) {
-      vector<float> new_intensity;
-      new_intensity.reserve(2 * vnum - 1);
-      for (int j = vnum - 2; j >= 0; j--) {
-        new_intensity.push_back(intensity[i][j]);
-      }
-      new_intensity.insert(new_intensity.end(), intensity[i].begin(), intensity[i].end());
-      intensity[i].swap(new_intensity);
-    }
-    v_angles.swap(new_v_angles);
-  }
-  else if (v_first == -90.0f) {
-    /* We have full 180° coverage, so just shift to match the angle range convention. */
-    for (int i = 0; i < v_angles.size(); i++) {
-      v_angles[i] += 90.0f;
-    }
-  }
-
-  return true;
-}
-
-bool IESFile::process_type_c()
-{
-  if (h_angles[0] == 90.0f) {
-    /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range
-     * here. */
-    for (int i = 0; i < h_angles.size(); i++) {
-      h_angles[i] -= 90.0f;
-    }
-  }
-
-  if (h_angles[0] != 0.0f) {
-    return false;
-  }
-
-  if (h_angles.size() == 1) {
-    h_angles.push_back(360.0f);
-    intensity.push_back(intensity[0]);
-  }
-
-  if (h_angles[h_angles.size() - 1] == 90.0f) {
-    /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four).
-     * Since the two->four mirroring step might also be required if we get an input of two
-     * quadrants, we only do the first mirror here and later do the second mirror in either case.
-     */
-    int hnum = h_angles.size();
-    for (int i = hnum - 2; i >= 0; i--) {
-      h_angles.push_back(180.0f - h_angles[i]);
-      intensity.push_back(intensity[i]);
-    }
-  }
-
-  if (h_angles[h_angles.size() - 1] == 180.0f) {
-    /* Mirror half to the full range. */
-    int hnum = h_angles.size();
-    for (int i = hnum - 2; i >= 0; i--) {
-      h_angles.push_back(360.0f - h_angles[i]);
-      intensity.push_back(intensity[i]);
-    }
-  }
-
-  /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to
-   * the 0° entry. If the file has a discernible order in its spacing, just fix this. */
-  if (h_angles[h_angles.size() - 1] != 360.0f) {
-    int hnum = h_angles.size();
-    float last_step = h_angles[hnum - 1] - h_angles[hnum - 2];
-    float first_step = h_angles[1] - h_angles[0];
-    float difference = 360.0f - h_angles[hnum - 1];
-    if (last_step == difference || first_step == difference) {
-      h_angles.push_back(360.0f);
-      intensity.push_back(intensity[0]);
-    }
-    else {
-      return false;
-    }
-  }
-
-  float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1];
-  if (v_first == 90.0f) {
-    if (v_last == 180.0f) {
-      /* Flip to ensure that vertical angles always start at 0°. */
-      for (int i = 0; i < v_angles.size(); i++) {
-        v_angles[i] = 180.0f - v_angles[i];
-      }
-    }
-    else {
-      return false;
-    }
-  }
-  else if (v_first != 0.0f) {
-    return false;
-  }
-
-  return true;
-}
-
-bool IESFile::process()
-{
-  if (h_angles.size() == 0 || v_angles.size() == 0) {
-    return false;
-  }
-
-  if (type == TYPE_B) {
-    if (!process_type_b()) {
-      return false;
-    }
-  }
-  else {
-    assert(type == TYPE_C);
-    if (!process_type_c()) {
-      return false;
-    }
-  }
-
-  assert(v_angles[0] == 0.0f);
-  assert(h_angles[0] == 0.0f);
-  assert(h_angles[h_angles.size() - 1] == 360.0f);
-
-  /* Convert from deg to rad. */
-  for (int i = 0; i < v_angles.size(); i++) {
-    v_angles[i] *= M_PI_F / 180.f;
-  }
-  for (int i = 0; i < h_angles.size(); i++) {
-    h_angles[i] *= M_PI_F / 180.f;
-  }
-
-  return true;
-}
-
-IESFile::~IESFile()
-{
-  clear();
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h
deleted file mode 100644
index 95473103614..00000000000
--- a/intern/cycles/util/util_ies.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_IES_H__
-#define __UTIL_IES_H__
-
-#include "util/util_string.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class IESFile {
- public:
-  IESFile()
-  {
-  }
-  ~IESFile();
-
-  int packed_size();
-  void pack(float *data);
-
-  bool load(const string &ies);
-  void clear();
-
- protected:
-  bool parse(const string &ies);
-  bool process();
-  bool process_type_b();
-  bool process_type_c();
-
-  /* The brightness distribution is stored in spherical coordinates.
-   * The horizontal angles correspond to theta in the regular notation
-   * and always span the full range from 0° to 360°.
-   * The vertical angles correspond to phi and always start at 0°. */
-  vector<float> v_angles, h_angles;
-  /* The actual values are stored here, with every entry storing the values
-   * of one horizontal segment. */
-  vector<vector<float>> intensity;
-
-  /* Types of angle representation in IES files. Currently, only B and C are supported. */
-  enum IESType { TYPE_A = 3, TYPE_B = 2, TYPE_C = 1 } type;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_IES_H__ */
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
deleted file mode 100644
index b082b971613..00000000000
--- a/intern/cycles/util/util_image.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_IMAGE_H__
-#  define __UTIL_IMAGE_H__
-
-/* OpenImageIO is used for all image file reading and writing. */
-
-#  include <OpenImageIO/imageio.h>
-
-#  include "util/util_half.h"
-#  include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-OIIO_NAMESPACE_USING
-
-template<typename T>
-void util_image_resize_pixels(const vector<T> &input_pixels,
-                              const size_t input_width,
-                              const size_t input_height,
-                              const size_t input_depth,
-                              const size_t components,
-                              vector<T> *output_pixels,
-                              size_t *output_width,
-                              size_t *output_height,
-                              size_t *output_depth);
-
-/* Cast input pixel from unknown storage to float. */
-template<typename T> inline float util_image_cast_to_float(T value);
-
-template<> inline float util_image_cast_to_float(float value)
-{
-  return value;
-}
-template<> inline float util_image_cast_to_float(uchar value)
-{
-  return (float)value / 255.0f;
-}
-template<> inline float util_image_cast_to_float(uint16_t value)
-{
-  return (float)value / 65535.0f;
-}
-template<> inline float util_image_cast_to_float(half value)
-{
-  return half_to_float_image(value);
-}
-
-/* Cast float value to output pixel type. */
-template<typename T> inline T util_image_cast_from_float(float value);
-
-template<> inline float util_image_cast_from_float(float value)
-{
-  return value;
-}
-template<> inline uchar util_image_cast_from_float(float value)
-{
-  if (value < 0.0f) {
-    return 0;
-  }
-  else if (value > (1.0f - 0.5f / 255.0f)) {
-    return 255;
-  }
-  return (uchar)((255.0f * value) + 0.5f);
-}
-template<> inline uint16_t util_image_cast_from_float(float value)
-{
-  if (value < 0.0f) {
-    return 0;
-  }
-  else if (value > (1.0f - 0.5f / 65535.0f)) {
-    return 65535;
-  }
-  return (uint16_t)((65535.0f * value) + 0.5f);
-}
-template<> inline half util_image_cast_from_float(float value)
-{
-  return float_to_half_image(value);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_IMAGE_H__ */
-
-#include "util/util_image_impl.h"
diff --git a/intern/cycles/util/util_image_impl.h b/intern/cycles/util/util_image_impl.h
deleted file mode 100644
index 3eb30d070ea..00000000000
--- a/intern/cycles/util/util_image_impl.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_IMAGE_IMPL_H__
-#define __UTIL_IMAGE_IMPL_H__
-
-#include "util/util_algorithm.h"
-#include "util/util_half.h"
-#include "util/util_image.h"
-
-CCL_NAMESPACE_BEGIN
-
-namespace {
-
-template<typename T>
-const T *util_image_read(const vector<T> &pixels,
-                         const size_t width,
-                         const size_t height,
-                         const size_t /*depth*/,
-                         const size_t components,
-                         const size_t x,
-                         const size_t y,
-                         const size_t z)
-{
-  const size_t index = ((size_t)z * (width * height) + (size_t)y * width + (size_t)x) * components;
-  return &pixels[index];
-}
-
-template<typename T>
-void util_image_downscale_sample(const vector<T> &pixels,
-                                 const size_t width,
-                                 const size_t height,
-                                 const size_t depth,
-                                 const size_t components,
-                                 const size_t kernel_size,
-                                 const float x,
-                                 const float y,
-                                 const float z,
-                                 T *result)
-{
-  assert(components <= 4);
-  const size_t ix = (size_t)x, iy = (size_t)y, iz = (size_t)z;
-  /* TODO(sergey): Support something smarter than box filer. */
-  float accum[4] = {0};
-  size_t count = 0;
-  for (size_t dz = 0; dz < kernel_size; ++dz) {
-    for (size_t dy = 0; dy < kernel_size; ++dy) {
-      for (size_t dx = 0; dx < kernel_size; ++dx) {
-        const size_t nx = ix + dx, ny = iy + dy, nz = iz + dz;
-        if (nx >= width || ny >= height || nz >= depth) {
-          continue;
-        }
-        const T *pixel = util_image_read(pixels, width, height, depth, components, nx, ny, nz);
-        for (size_t k = 0; k < components; ++k) {
-          accum[k] += util_image_cast_to_float(pixel[k]);
-        }
-        ++count;
-      }
-    }
-  }
-  if (count != 0) {
-    const float inv_count = 1.0f / (float)count;
-    for (size_t k = 0; k < components; ++k) {
-      result[k] = util_image_cast_from_float<T>(accum[k] * inv_count);
-    }
-  }
-  else {
-    for (size_t k = 0; k < components; ++k) {
-      result[k] = T(0.0f);
-    }
-  }
-}
-
-template<typename T>
-void util_image_downscale_pixels(const vector<T> &input_pixels,
-                                 const size_t input_width,
-                                 const size_t input_height,
-                                 const size_t input_depth,
-                                 const size_t components,
-                                 const float inv_scale_factor,
-                                 const size_t output_width,
-                                 const size_t output_height,
-                                 const size_t output_depth,
-                                 vector<T> *output_pixels)
-{
-  const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f);
-  for (size_t z = 0; z < output_depth; ++z) {
-    for (size_t y = 0; y < output_height; ++y) {
-      for (size_t x = 0; x < output_width; ++x) {
-        const float input_x = (float)x * inv_scale_factor, input_y = (float)y * inv_scale_factor,
-                    input_z = (float)z * inv_scale_factor;
-        const size_t output_index = (z * output_width * output_height + y * output_width + x) *
-                                    components;
-        util_image_downscale_sample(input_pixels,
-                                    input_width,
-                                    input_height,
-                                    input_depth,
-                                    components,
-                                    kernel_size,
-                                    input_x,
-                                    input_y,
-                                    input_z,
-                                    &output_pixels->at(output_index));
-      }
-    }
-  }
-}
-
-} /* namespace */
-
-template<typename T>
-void util_image_resize_pixels(const vector<T> &input_pixels,
-                              const size_t input_width,
-                              const size_t input_height,
-                              const size_t input_depth,
-                              const size_t components,
-                              const float scale_factor,
-                              vector<T> *output_pixels,
-                              size_t *output_width,
-                              size_t *output_height,
-                              size_t *output_depth)
-{
-  /* Early output for case when no scaling is applied. */
-  if (scale_factor == 1.0f) {
-    *output_width = input_width;
-    *output_height = input_height;
-    *output_depth = input_depth;
-    *output_pixels = input_pixels;
-    return;
-  }
-  /* First of all, we calculate output image dimensions.
-   * We clamp them to be 1 pixel at least so we do not generate degenerate
-   * image.
-   */
-  *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1);
-  *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1);
-  *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1);
-  /* Prepare pixel storage for the result. */
-  const size_t num_output_pixels = ((*output_width) * (*output_height) * (*output_depth)) *
-                                   components;
-  output_pixels->resize(num_output_pixels);
-  if (scale_factor < 1.0f) {
-    const float inv_scale_factor = 1.0f / scale_factor;
-    util_image_downscale_pixels(input_pixels,
-                                input_width,
-                                input_height,
-                                input_depth,
-                                components,
-                                inv_scale_factor,
-                                *output_width,
-                                *output_height,
-                                *output_depth,
-                                output_pixels);
-  }
-  else {
-    /* TODO(sergey): Needs implementation. */
-  }
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_IMAGE_IMPL_H__ */
diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h
deleted file mode 100644
index f555b001186..00000000000
--- a/intern/cycles/util/util_list.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_LIST_H__
-#define __UTIL_LIST_H__
-
-#include <list>
-
-CCL_NAMESPACE_BEGIN
-
-using std::list;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp
deleted file mode 100644
index 8272728a7a0..00000000000
--- a/intern/cycles/util/util_logging.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_logging.h"
-
-#include "util/util_math.h"
-#include "util/util_string.h"
-
-#include <stdio.h>
-#ifdef _MSC_VER
-#  define snprintf _snprintf
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef WITH_CYCLES_LOGGING
-static bool is_verbosity_set()
-{
-  using CYCLES_GFLAGS_NAMESPACE::GetCommandLineOption;
-
-  std::string verbosity;
-  if (!GetCommandLineOption("v", &verbosity)) {
-    return false;
-  }
-  return verbosity != "0";
-}
-#endif
-
-void util_logging_init(const char *argv0)
-{
-#ifdef WITH_CYCLES_LOGGING
-  using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
-
-  google::InitGoogleLogging(argv0);
-  SetCommandLineOption("logtostderr", "1");
-  if (!is_verbosity_set()) {
-    SetCommandLineOption("v", "0");
-  }
-  SetCommandLineOption("stderrthreshold", "0");
-  SetCommandLineOption("minloglevel", "0");
-#else
-  (void)argv0;
-#endif
-}
-
-void util_logging_start()
-{
-#ifdef WITH_CYCLES_LOGGING
-  using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
-  SetCommandLineOption("logtostderr", "1");
-  if (!is_verbosity_set()) {
-    SetCommandLineOption("v", "2");
-  }
-  SetCommandLineOption("stderrthreshold", "0");
-  SetCommandLineOption("minloglevel", "0");
-#endif
-}
-
-void util_logging_verbosity_set(int verbosity)
-{
-#ifdef WITH_CYCLES_LOGGING
-  using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
-  char val[10];
-  snprintf(val, sizeof(val), "%d", verbosity);
-  SetCommandLineOption("v", val);
-#else
-  (void)verbosity;
-#endif
-}
-
-std::ostream &operator<<(std::ostream &os, const int2 &value)
-{
-  os << "(" << value.x << ", " << value.y << ")";
-  return os;
-}
-
-std::ostream &operator<<(std::ostream &os, const float3 &value)
-{
-  os << "(" << value.x << ", " << value.y << ", " << value.z << ")";
-  return os;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
deleted file mode 100644
index 35c2d436d09..00000000000
--- a/intern/cycles/util/util_logging.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright 2011-2014 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_LOGGING_H__
-#define __UTIL_LOGGING_H__
-
-#if defined(WITH_CYCLES_LOGGING) && !defined(__KERNEL_GPU__)
-#  include <gflags/gflags.h>
-#  include <glog/logging.h>
-#endif
-
-#include <iostream>
-
-CCL_NAMESPACE_BEGIN
-
-#if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__)
-class StubStream {
- public:
-  template<class T> StubStream &operator<<(const T &)
-  {
-    return *this;
-  }
-};
-
-class LogMessageVoidify {
- public:
-  LogMessageVoidify()
-  {
-  }
-  void operator&(const StubStream &)
-  {
-  }
-};
-
-#  define LOG_SUPPRESS() (true) ? ((void)0) : LogMessageVoidify() & StubStream()
-#  define LOG(severity) LOG_SUPPRESS()
-#  define VLOG(severity) LOG_SUPPRESS()
-#  define VLOG_IF(severity, condition) LOG_SUPPRESS()
-#  define VLOG_IS_ON(severity) false
-
-#  define CHECK(expression) LOG_SUPPRESS()
-
-#  define CHECK_NOTNULL(expression) LOG_SUPPRESS()
-#  define CHECK_NULL(expression) LOG_SUPPRESS()
-
-#  define CHECK_NEAR(actual, expected, eps) LOG_SUPPRESS()
-
-#  define CHECK_GE(a, b) LOG_SUPPRESS()
-#  define CHECK_NE(a, b) LOG_SUPPRESS()
-#  define CHECK_EQ(a, b) LOG_SUPPRESS()
-#  define CHECK_GT(a, b) LOG_SUPPRESS()
-#  define CHECK_LT(a, b) LOG_SUPPRESS()
-#  define CHECK_LE(a, b) LOG_SUPPRESS()
-
-#  define DCHECK(expression) LOG_SUPPRESS()
-
-#  define DCHECK_NOTNULL(expression) LOG_SUPPRESS()
-#  define DCHECK_NULL(expression) LOG_SUPPRESS()
-
-#  define DCHECK_NEAR(actual, expected, eps) LOG_SUPPRESS()
-
-#  define DCHECK_GE(a, b) LOG_SUPPRESS()
-#  define DCHECK_NE(a, b) LOG_SUPPRESS()
-#  define DCHECK_EQ(a, b) LOG_SUPPRESS()
-#  define DCHECK_GT(a, b) LOG_SUPPRESS()
-#  define DCHECK_LT(a, b) LOG_SUPPRESS()
-#  define DCHECK_LE(a, b) LOG_SUPPRESS()
-
-#  define LOG_ASSERT(expression) LOG_SUPPRESS()
-#endif
-
-#define VLOG_ONCE(level, flag) \
-  if (!flag) \
-  flag = true, VLOG(level)
-
-struct int2;
-struct float3;
-
-void util_logging_init(const char *argv0);
-void util_logging_start();
-void util_logging_verbosity_set(int verbosity);
-
-std::ostream &operator<<(std::ostream &os, const int2 &value);
-std::ostream &operator<<(std::ostream &os, const float3 &value);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_LOGGING_H__ */
diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h
deleted file mode 100644
index f1b2522362f..00000000000
--- a/intern/cycles/util/util_map.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MAP_H__
-#define __UTIL_MAP_H__
-
-#include <map>
-#include <unordered_map>
-
-CCL_NAMESPACE_BEGIN
-
-using std::map;
-using std::pair;
-using std::unordered_map;
-using std::unordered_multimap;
-
-template<typename T> static void map_free_memory(T &data)
-{
-  /* Use swap() trick to actually free all internal memory. */
-  T empty_data;
-  data.swap(empty_data);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MAP_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
deleted file mode 100644
index 535b6881d3f..00000000000
--- a/intern/cycles/util/util_math.h
+++ /dev/null
@@ -1,870 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_H__
-#define __UTIL_MATH_H__
-
-/* Math
- *
- * Basic math functions on scalar and vector types. This header is used by
- * both the kernel code when compiled as C++, and other C++ non-kernel code. */
-
-#ifndef __KERNEL_GPU__
-#  include <cmath>
-#endif
-
-#ifdef __HIP__
-#  include <hip/hip_vector_types.h>
-#endif
-
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Float Pi variations */
-
-/* Division */
-#ifndef M_PI_F
-#  define M_PI_F (3.1415926535897932f) /* pi */
-#endif
-#ifndef M_PI_2_F
-#  define M_PI_2_F (1.5707963267948966f) /* pi/2 */
-#endif
-#ifndef M_PI_4_F
-#  define M_PI_4_F (0.7853981633974830f) /* pi/4 */
-#endif
-#ifndef M_1_PI_F
-#  define M_1_PI_F (0.3183098861837067f) /* 1/pi */
-#endif
-#ifndef M_2_PI_F
-#  define M_2_PI_F (0.6366197723675813f) /* 2/pi */
-#endif
-#ifndef M_1_2PI_F
-#  define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */
-#endif
-#ifndef M_SQRT_PI_8_F
-#  define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */
-#endif
-#ifndef M_LN_2PI_F
-#  define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */
-#endif
-
-/* Multiplication */
-#ifndef M_2PI_F
-#  define M_2PI_F (6.2831853071795864f) /* 2*pi */
-#endif
-#ifndef M_4PI_F
-#  define M_4PI_F (12.566370614359172f) /* 4*pi */
-#endif
-
-/* Float sqrt variations */
-#ifndef M_SQRT2_F
-#  define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
-#endif
-#ifndef M_LN2_F
-#  define M_LN2_F (0.6931471805599453f) /* ln(2) */
-#endif
-#ifndef M_LN10_F
-#  define M_LN10_F (2.3025850929940457f) /* ln(10) */
-#endif
-
-/* Scalar */
-
-#ifndef __HIP__
-#  ifdef _WIN32
-ccl_device_inline float fmaxf(float a, float b)
-{
-  return (a > b) ? a : b;
-}
-
-ccl_device_inline float fminf(float a, float b)
-{
-  return (a < b) ? a : b;
-}
-
-#  endif /* _WIN32 */
-#endif   /* __HIP__ */
-
-#ifndef __KERNEL_GPU__
-using std::isfinite;
-using std::isnan;
-using std::sqrt;
-
-ccl_device_inline int abs(int x)
-{
-  return (x > 0) ? x : -x;
-}
-
-ccl_device_inline int max(int a, int b)
-{
-  return (a > b) ? a : b;
-}
-
-ccl_device_inline int min(int a, int b)
-{
-  return (a < b) ? a : b;
-}
-
-ccl_device_inline uint min(uint a, uint b)
-{
-  return (a < b) ? a : b;
-}
-
-ccl_device_inline float max(float a, float b)
-{
-  return (a > b) ? a : b;
-}
-
-ccl_device_inline float min(float a, float b)
-{
-  return (a < b) ? a : b;
-}
-
-ccl_device_inline double max(double a, double b)
-{
-  return (a > b) ? a : b;
-}
-
-ccl_device_inline double min(double a, double b)
-{
-  return (a < b) ? a : b;
-}
-
-/* These 2 guys are templated for usage with registers data.
- *
- * NOTE: Since this is CPU-only functions it is ok to use references here.
- * But for other devices we'll need to be careful about this.
- */
-
-template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
-{
-  return min(min(a, b), min(c, d));
-}
-
-template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
-{
-  return max(max(a, b), max(c, d));
-}
-#endif /* __KERNEL_GPU__ */
-
-ccl_device_inline float min4(float a, float b, float c, float d)
-{
-  return min(min(a, b), min(c, d));
-}
-
-ccl_device_inline float max4(float a, float b, float c, float d)
-{
-  return max(max(a, b), max(c, d));
-}
-
-/* Int/Float conversion */
-
-ccl_device_inline int as_int(uint i)
-{
-  union {
-    uint ui;
-    int i;
-  } u;
-  u.ui = i;
-  return u.i;
-}
-
-ccl_device_inline uint as_uint(int i)
-{
-  union {
-    uint ui;
-    int i;
-  } u;
-  u.i = i;
-  return u.ui;
-}
-
-ccl_device_inline uint as_uint(float f)
-{
-  union {
-    uint i;
-    float f;
-  } u;
-  u.f = f;
-  return u.i;
-}
-
-#ifndef __HIP__
-ccl_device_inline int __float_as_int(float f)
-{
-  union {
-    int i;
-    float f;
-  } u;
-  u.f = f;
-  return u.i;
-}
-
-ccl_device_inline float __int_as_float(int i)
-{
-  union {
-    int i;
-    float f;
-  } u;
-  u.i = i;
-  return u.f;
-}
-
-ccl_device_inline uint __float_as_uint(float f)
-{
-  union {
-    uint i;
-    float f;
-  } u;
-  u.f = f;
-  return u.i;
-}
-
-ccl_device_inline float __uint_as_float(uint i)
-{
-  union {
-    uint i;
-    float f;
-  } u;
-  u.i = i;
-  return u.f;
-}
-#endif
-
-ccl_device_inline int4 __float4_as_int4(float4 f)
-{
-#ifdef __KERNEL_SSE__
-  return int4(_mm_castps_si128(f.m128));
-#else
-  return make_int4(
-      __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
-#endif
-}
-
-ccl_device_inline float4 __int4_as_float4(int4 i)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_castsi128_ps(i.m128));
-#else
-  return make_float4(
-      __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
-#endif
-}
-
-template<typename T> ccl_device_inline uint pointer_pack_to_uint_0(T *ptr)
-{
-  return ((uint64_t)ptr) & 0xFFFFFFFF;
-}
-
-template<typename T> ccl_device_inline uint pointer_pack_to_uint_1(T *ptr)
-{
-  return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF;
-}
-
-template<typename T> ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b)
-{
-  return (T *)(((uint64_t)b << 32) | a);
-}
-
-ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b)
-{
-  return (a << 16) | b;
-}
-
-ccl_device_inline uint uint16_unpack_from_uint_0(const uint i)
-{
-  return i >> 16;
-}
-
-ccl_device_inline uint uint16_unpack_from_uint_1(const uint i)
-{
-  return i & 0xFFFF;
-}
-
-/* Versions of functions which are safe for fast math. */
-ccl_device_inline bool isnan_safe(float f)
-{
-  unsigned int x = __float_as_uint(f);
-  return (x << 1) > 0xff000000u;
-}
-
-ccl_device_inline bool isfinite_safe(float f)
-{
-  /* By IEEE 754 rule, 2*Inf equals Inf */
-  unsigned int x = __float_as_uint(f);
-  return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u);
-}
-
-ccl_device_inline float ensure_finite(float v)
-{
-  return isfinite_safe(v) ? v : 0.0f;
-}
-
-ccl_device_inline int clamp(int a, int mn, int mx)
-{
-  return min(max(a, mn), mx);
-}
-
-ccl_device_inline float clamp(float a, float mn, float mx)
-{
-  return min(max(a, mn), mx);
-}
-
-ccl_device_inline float mix(float a, float b, float t)
-{
-  return a + t * (b - a);
-}
-
-ccl_device_inline float smoothstep(float edge0, float edge1, float x)
-{
-  float result;
-  if (x < edge0)
-    result = 0.0f;
-  else if (x >= edge1)
-    result = 1.0f;
-  else {
-    float t = (x - edge0) / (edge1 - edge0);
-    result = (3.0f - 2.0f * t) * (t * t);
-  }
-  return result;
-}
-
-#ifndef __KERNEL_CUDA__
-ccl_device_inline float saturate(float a)
-{
-  return clamp(a, 0.0f, 1.0f);
-}
-#endif /* __KERNEL_CUDA__ */
-
-ccl_device_inline int float_to_int(float f)
-{
-  return (int)f;
-}
-
-ccl_device_inline int floor_to_int(float f)
-{
-  return float_to_int(floorf(f));
-}
-
-ccl_device_inline int quick_floor_to_int(float x)
-{
-  return float_to_int(x) - ((x < 0) ? 1 : 0);
-}
-
-ccl_device_inline float floorfrac(float x, ccl_private int *i)
-{
-  *i = quick_floor_to_int(x);
-  return x - *i;
-}
-
-ccl_device_inline int ceil_to_int(float f)
-{
-  return float_to_int(ceilf(f));
-}
-
-ccl_device_inline float fractf(float x)
-{
-  return x - floorf(x);
-}
-
-/* Adapted from godot-engine math_funcs.h. */
-ccl_device_inline float wrapf(float value, float max, float min)
-{
-  float range = max - min;
-  return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min;
-}
-
-ccl_device_inline float pingpongf(float a, float b)
-{
-  return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f;
-}
-
-ccl_device_inline float smoothminf(float a, float b, float k)
-{
-  if (k != 0.0f) {
-    float h = fmaxf(k - fabsf(a - b), 0.0f) / k;
-    return fminf(a, b) - h * h * h * k * (1.0f / 6.0f);
-  }
-  else {
-    return fminf(a, b);
-  }
-}
-
-ccl_device_inline float signf(float f)
-{
-  return (f < 0.0f) ? -1.0f : 1.0f;
-}
-
-ccl_device_inline float nonzerof(float f, float eps)
-{
-  if (fabsf(f) < eps)
-    return signf(f) * eps;
-  else
-    return f;
-}
-
-/* `signum` function testing for zero. Matches GLSL and OSL functions. */
-ccl_device_inline float compatible_signf(float f)
-{
-  if (f == 0.0f) {
-    return 0.0f;
-  }
-  else {
-    return signf(f);
-  }
-}
-
-ccl_device_inline float smoothstepf(float f)
-{
-  float ff = f * f;
-  return (3.0f * ff - 2.0f * ff * f);
-}
-
-ccl_device_inline int mod(int x, int m)
-{
-  return (x % m + m) % m;
-}
-
-ccl_device_inline float3 float2_to_float3(const float2 a)
-{
-  return make_float3(a.x, a.y, 0.0f);
-}
-
-ccl_device_inline float3 float4_to_float3(const float4 a)
-{
-  return make_float3(a.x, a.y, a.z);
-}
-
-ccl_device_inline float4 float3_to_float4(const float3 a)
-{
-  return make_float4(a.x, a.y, a.z, 1.0f);
-}
-
-ccl_device_inline float inverse_lerp(float a, float b, float x)
-{
-  return (x - a) / (b - a);
-}
-
-/* Cubic interpolation between b and c, a and d are the previous and next point. */
-ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
-{
-  return 0.5f *
-             (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x +
-              (c - a)) *
-             x +
-         b;
-}
-
-CCL_NAMESPACE_END
-
-#include "util/util_math_int2.h"
-#include "util/util_math_int3.h"
-#include "util/util_math_int4.h"
-
-#include "util/util_math_float2.h"
-#include "util/util_math_float3.h"
-#include "util/util_math_float4.h"
-
-#include "util/util_rect.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Interpolation */
-
-template<class A, class B> A lerp(const A &a, const A &b, const B &t)
-{
-  return (A)(a * ((B)1 - t) + b * t);
-}
-
-/* Triangle */
-
-ccl_device_inline float triangle_area(ccl_private const float3 &v1,
-                                      ccl_private const float3 &v2,
-                                      ccl_private const float3 &v3)
-{
-  return len(cross(v3 - v2, v1 - v2)) * 0.5f;
-}
-
-/* Orthonormal vectors */
-
-ccl_device_inline void make_orthonormals(const float3 N,
-                                         ccl_private float3 *a,
-                                         ccl_private float3 *b)
-{
-#if 0
-  if (fabsf(N.y) >= 0.999f) {
-    *a = make_float3(1, 0, 0);
-    *b = make_float3(0, 0, 1);
-    return;
-  }
-  if (fabsf(N.z) >= 0.999f) {
-    *a = make_float3(1, 0, 0);
-    *b = make_float3(0, 1, 0);
-    return;
-  }
-#endif
-
-  if (N.x != N.y || N.x != N.z)
-    *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x);  //(1,1,1)x N
-  else
-    *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x);  //(-1,1,1)x N
-
-  *a = normalize(*a);
-  *b = cross(N, *a);
-}
-
-/* Color division */
-
-ccl_device_inline float3 safe_invert_color(float3 a)
-{
-  float x, y, z;
-
-  x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f;
-  y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f;
-  z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f;
-
-  return make_float3(x, y, z);
-}
-
-ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
-{
-  float x, y, z;
-
-  x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
-  y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
-  z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
-
-  return make_float3(x, y, z);
-}
-
-ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
-{
-  float x, y, z;
-
-  x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
-  y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
-  z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
-
-  /* try to get gray even if b is zero */
-  if (b.x == 0.0f) {
-    if (b.y == 0.0f) {
-      x = z;
-      y = z;
-    }
-    else if (b.z == 0.0f) {
-      x = y;
-      z = y;
-    }
-    else
-      x = 0.5f * (y + z);
-  }
-  else if (b.y == 0.0f) {
-    if (b.z == 0.0f) {
-      y = x;
-      z = x;
-    }
-    else
-      y = 0.5f * (x + z);
-  }
-  else if (b.z == 0.0f) {
-    z = 0.5f * (x + y);
-  }
-
-  return make_float3(x, y, z);
-}
-
-/* Rotation of point around axis and angle */
-
-ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
-{
-  float costheta = cosf(angle);
-  float sintheta = sinf(angle);
-  float3 r;
-
-  r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
-        (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
-        (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
-
-  r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
-        ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
-        (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
-
-  r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
-        (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
-        ((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
-
-  return r;
-}
-
-/* NaN-safe math ops */
-
-ccl_device_inline float safe_sqrtf(float f)
-{
-  return sqrtf(max(f, 0.0f));
-}
-
-ccl_device_inline float inversesqrtf(float f)
-{
-  return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f;
-}
-
-ccl_device float safe_asinf(float a)
-{
-  return asinf(clamp(a, -1.0f, 1.0f));
-}
-
-ccl_device float safe_acosf(float a)
-{
-  return acosf(clamp(a, -1.0f, 1.0f));
-}
-
-ccl_device float compatible_powf(float x, float y)
-{
-#ifdef __KERNEL_GPU__
-  if (y == 0.0f) /* x^0 -> 1, including 0^0 */
-    return 1.0f;
-
-  /* GPU pow doesn't accept negative x, do manual checks here */
-  if (x < 0.0f) {
-    if (fmodf(-y, 2.0f) == 0.0f)
-      return powf(-x, y);
-    else
-      return -powf(-x, y);
-  }
-  else if (x == 0.0f)
-    return 0.0f;
-#endif
-  return powf(x, y);
-}
-
-ccl_device float safe_powf(float a, float b)
-{
-  if (UNLIKELY(a < 0.0f && b != float_to_int(b)))
-    return 0.0f;
-
-  return compatible_powf(a, b);
-}
-
-ccl_device float safe_divide(float a, float b)
-{
-  return (b != 0.0f) ? a / b : 0.0f;
-}
-
-ccl_device float safe_logf(float a, float b)
-{
-  if (UNLIKELY(a <= 0.0f || b <= 0.0f))
-    return 0.0f;
-
-  return safe_divide(logf(a), logf(b));
-}
-
-ccl_device float safe_modulo(float a, float b)
-{
-  return (b != 0.0f) ? fmodf(a, b) : 0.0f;
-}
-
-ccl_device_inline float sqr(float a)
-{
-  return a * a;
-}
-
-ccl_device_inline float pow20(float a)
-{
-  return sqr(sqr(sqr(sqr(a)) * a));
-}
-
-ccl_device_inline float pow22(float a)
-{
-  return sqr(a * sqr(sqr(sqr(a)) * a));
-}
-
-ccl_device_inline float beta(float x, float y)
-{
-  return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
-}
-
-ccl_device_inline float xor_signmask(float x, int y)
-{
-  return __int_as_float(__float_as_int(x) ^ y);
-}
-
-ccl_device float bits_to_01(uint bits)
-{
-  return bits * (1.0f / (float)0xFFFFFFFF);
-}
-
-ccl_device_inline uint count_leading_zeros(uint x)
-{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
-  return __clz(x);
-#else
-  assert(x != 0);
-#  ifdef _MSC_VER
-  unsigned long leading_zero = 0;
-  _BitScanReverse(&leading_zero, x);
-  return (31 - leading_zero);
-#  else
-  return __builtin_clz(x);
-#  endif
-#endif
-}
-
-ccl_device_inline uint count_trailing_zeros(uint x)
-{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
-  return (__ffs(x) - 1);
-#else
-  assert(x != 0);
-#  ifdef _MSC_VER
-  unsigned long ctz = 0;
-  _BitScanForward(&ctz, x);
-  return ctz;
-#  else
-  return __builtin_ctz(x);
-#  endif
-#endif
-}
-
-ccl_device_inline uint find_first_set(uint x)
-{
-#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
-  return __ffs(x);
-#else
-#  ifdef _MSC_VER
-  return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
-#  else
-  return __builtin_ffs(x);
-#  endif
-#endif
-}
-
-/* projections */
-ccl_device_inline float2 map_to_tube(const float3 co)
-{
-  float len, u, v;
-  len = sqrtf(co.x * co.x + co.y * co.y);
-  if (len > 0.0f) {
-    u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
-    v = (co.z + 1.0f) * 0.5f;
-  }
-  else {
-    u = v = 0.0f;
-  }
-  return make_float2(u, v);
-}
-
-ccl_device_inline float2 map_to_sphere(const float3 co)
-{
-  float l = len(co);
-  float u, v;
-  if (l > 0.0f) {
-    if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
-      u = 0.0f; /* Otherwise domain error. */
-    }
-    else {
-      u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
-    }
-    v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
-  }
-  else {
-    u = v = 0.0f;
-  }
-  return make_float2(u, v);
-}
-
-/* Compares two floats.
- * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
- * or their relative difference is less than ulp_diff ULPs.
- * Based on
- * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
- */
-
-ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff)
-{
-  if (fabsf(a - b) < abs_diff) {
-    return true;
-  }
-
-  if ((a < 0.0f) != (b < 0.0f)) {
-    return false;
-  }
-
-  return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
-}
-
-/* Calculate the angle between the two vectors a and b.
- * The usual approach `acos(dot(a, b))` has severe precision issues for small angles,
- * which are avoided by this method.
- * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
- */
-ccl_device_inline float precise_angle(float3 a, float3 b)
-{
-  return 2.0f * atan2f(len(a - b), len(a + b));
-}
-
-/* Return value which is greater than the given one and is a power of two. */
-ccl_device_inline uint next_power_of_two(uint x)
-{
-  return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x));
-}
-
-/* Return value which is lower than the given one and is a power of two. */
-ccl_device_inline uint prev_power_of_two(uint x)
-{
-  return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1));
-}
-
-#ifndef __has_builtin
-#  define __has_builtin(v) 0
-#endif
-
-/* Reverses the bits of a 32 bit integer. */
-ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
-{
-  /* Use a native instruction if it exists. */
-#if defined(__arm__) || defined(__aarch64__)
-  __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
-  return x;
-#elif defined(__KERNEL_CUDA__)
-  return __brev(x);
-#elif __has_builtin(__builtin_bitreverse32)
-  return __builtin_bitreverse32(x);
-#else
-  /* Flip pairwise. */
-  x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
-  /* Flip pairs. */
-  x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
-  /* Flip nibbles. */
-  x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
-  /* Flip bytes. CPUs have an instruction for that, pretty fast one. */
-#  ifdef _MSC_VER
-  return _byteswap_ulong(x);
-#  elif defined(__INTEL_COMPILER)
-  return (uint32_t)_bswap((int)x);
-#  else
-  /* Assuming gcc or clang. */
-  return __builtin_bswap32(x);
-#  endif
-#endif
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_math_cdf.cpp b/intern/cycles/util/util_math_cdf.cpp
deleted file mode 100644
index a58bab188ef..00000000000
--- a/intern/cycles/util/util_math_cdf.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_math_cdf.h"
-
-#include "util/util_algorithm.h"
-#include "util/util_math.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Invert pre-calculated CDF function. */
-void util_cdf_invert(const int resolution,
-                     const float from,
-                     const float to,
-                     const vector<float> &cdf,
-                     const bool make_symmetric,
-                     vector<float> &inv_cdf)
-{
-  const float inv_resolution = 1.0f / (float)resolution;
-  const float range = to - from;
-  inv_cdf.resize(resolution);
-  if (make_symmetric) {
-    const int half_size = (resolution - 1) / 2;
-    for (int i = 0; i <= half_size; i++) {
-      float x = i / (float)half_size;
-      int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin();
-      float t;
-      if (index < cdf.size() - 1) {
-        t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]);
-      }
-      else {
-        t = 0.0f;
-        index = cdf.size() - 1;
-      }
-      float y = ((index + t) / (resolution - 1)) * (2.0f * range);
-      inv_cdf[half_size + i] = 0.5f * (1.0f + y);
-      inv_cdf[half_size - i] = 0.5f * (1.0f - y);
-    }
-  }
-  else {
-    for (int i = 0; i < resolution; i++) {
-      float x = from + range * (float)i * inv_resolution;
-      int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin();
-      float t;
-      if (index < cdf.size() - 1) {
-        t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]);
-      }
-      else {
-        t = 0.0f;
-        index = resolution;
-      }
-      inv_cdf[i] = (index + t) * inv_resolution;
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_math_cdf.h b/intern/cycles/util/util_math_cdf.h
deleted file mode 100644
index 43995204263..00000000000
--- a/intern/cycles/util/util_math_cdf.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_CDF_H__
-#define __UTIL_MATH_CDF_H__
-
-#include "util/util_algorithm.h"
-#include "util/util_math.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Evaluate CDF of a given functor with given range and resolution. */
-template<typename Functor>
-void util_cdf_evaluate(
-    const int resolution, const float from, const float to, Functor functor, vector<float> &cdf)
-{
-  const int cdf_count = resolution + 1;
-  const float range = to - from;
-  cdf.resize(cdf_count);
-  cdf[0] = 0.0f;
-  /* Actual CDF evaluation. */
-  for (int i = 0; i < resolution; ++i) {
-    float x = from + range * (float)i / (resolution - 1);
-    float y = functor(x);
-    cdf[i + 1] = cdf[i] + fabsf(y);
-  }
-  /* Normalize the CDF. */
-  for (int i = 0; i <= resolution; i++) {
-    cdf[i] /= cdf[resolution];
-  }
-}
-
-/* Invert pre-calculated CDF function. */
-void util_cdf_invert(const int resolution,
-                     const float from,
-                     const float to,
-                     const vector<float> &cdf,
-                     const bool make_symmetric,
-                     vector<float> &inv_cdf);
-
-/* Evaluate inverted CDF of a given functor with given range and resolution. */
-template<typename Functor>
-void util_cdf_inverted(const int resolution,
-                       const float from,
-                       const float to,
-                       Functor functor,
-                       const bool make_symmetric,
-                       vector<float> &inv_cdf)
-{
-  vector<float> cdf;
-  /* There is no much smartness going around lower resolution for the CDF table,
-   * this just to match the old code from pixel filter so it all stays exactly
-   * the same and no regression tests are failed.
-   */
-  util_cdf_evaluate(resolution - 1, from, to, functor, cdf);
-  util_cdf_invert(resolution, from, to, cdf, make_symmetric, inv_cdf);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_H_CDF__ */
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
deleted file mode 100644
index cc924f36a71..00000000000
--- a/intern/cycles/util/util_math_fast.h
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
- * Adapted from OpenImageIO library with this license:
- *
- * Copyright 2008-2014 Larry Gritz and the other authors and contributors.
- * All Rights Reserved.
-
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in the
- *   documentation and/or other materials provided with the distribution.
- * * Neither the name of the software's owners nor the names of its
- *   contributors may be used to endorse or promote products derived from
- *   this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * (This is the Modified BSD License)
- *
- * A few bits here are based upon code from NVIDIA that was also released
- * under the same modified BSD license, and marked as:
- *    Copyright 2004 NVIDIA Corporation. All Rights Reserved.
- *
- * Some parts of this file were first open-sourced in Open Shading Language,
- * then later moved here. The original copyright notice was:
- *    Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al.
- *
- * Many of the math functions were copied from or inspired by other
- * public domain sources or open source packages with compatible licenses.
- * The individual functions give references were applicable.
- */
-
-#ifndef __UTIL_FAST_MATH__
-#define __UTIL_FAST_MATH__
-
-CCL_NAMESPACE_BEGIN
-
-ccl_device_inline float madd(const float a, const float b, const float c)
-{
-  /* NOTE: In the future we may want to explicitly ask for a fused
-   * multiply-add in a specialized version for float.
-   *
-   * NOTE: GCC/ICC will turn this (for float) into a FMA unless
-   * explicitly asked not to, clang seems to leave the code alone.
-   */
-  return a * b + c;
-}
-
-ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c)
-{
-  return a * b + c;
-}
-
-/*
- * FAST & APPROXIMATE MATH
- *
- * The functions named "fast_*" provide a set of replacements to libm that
- * are much faster at the expense of some accuracy and robust handling of
- * extreme values. One design goal for these approximation was to avoid
- * branches as much as possible and operate on single precision values only
- * so that SIMD versions should be straightforward ports We also try to
- * implement "safe" semantics (ie: clamp to valid range where possible)
- * natively since wrapping these inline calls in another layer would be
- * wasteful.
- *
- * Some functions are fast_safe_*, which is both a faster approximation as
- * well as clamped input domain to ensure no NaN, Inf, or divide by zero.
- */
-
-/* Round to nearest integer, returning as an int. */
-ccl_device_inline int fast_rint(float x)
-{
-  /* used by sin/cos/tan range reduction. */
-#ifdef __KERNEL_SSE4__
-  /* Single `roundps` instruction on SSE4.1+ (for gcc/clang at least). */
-  return float_to_int(rintf(x));
-#else
-  /* emulate rounding by adding/subtracting 0.5. */
-  return float_to_int(x + copysignf(0.5f, x));
-#endif
-}
-
-ccl_device float fast_sinf(float x)
-{
-  /* Very accurate argument reduction from SLEEF,
-   * starts failing around x=262000
-   *
-   * Results on: [-2pi,2pi].
-   *
-   * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp,
-   * 1.19209e-07 max error
-   */
-  int q = fast_rint(x * M_1_PI_F);
-  float qf = (float)q;
-  x = madd(qf, -0.78515625f * 4, x);
-  x = madd(qf, -0.00024187564849853515625f * 4, x);
-  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
-  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
-  x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */
-  float s = x * x;
-  if ((q & 1) != 0)
-    x = -x;
-  /* This polynomial approximation has very low error on [-pi/2,+pi/2]
-   * 1.19209e-07 max error in total over [-2pi,+2pi]. */
-  float u = 2.6083159809786593541503e-06f;
-  u = madd(u, s, -0.0001981069071916863322258f);
-  u = madd(u, s, +0.00833307858556509017944336f);
-  u = madd(u, s, -0.166666597127914428710938f);
-  u = madd(s, u * x, x);
-  /* For large x, the argument reduction can fail and the polynomial can be
-   * evaluated with arguments outside the valid internal. Just clamp the bad
-   * values away (setting to 0.0f means no branches need to be generated). */
-  if (fabsf(u) > 1.0f) {
-    u = 0.0f;
-  }
-  return u;
-}
-
-ccl_device float fast_cosf(float x)
-{
-  /* Same argument reduction as fast_sinf(). */
-  int q = fast_rint(x * M_1_PI_F);
-  float qf = (float)q;
-  x = madd(qf, -0.78515625f * 4, x);
-  x = madd(qf, -0.00024187564849853515625f * 4, x);
-  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
-  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
-  x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */
-  float s = x * x;
-  /* Polynomial from SLEEF's sincosf, max error is
-   * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */
-  float u = -2.71811842367242206819355e-07f;
-  u = madd(u, s, +2.47990446951007470488548e-05f);
-  u = madd(u, s, -0.00138888787478208541870117f);
-  u = madd(u, s, +0.0416666641831398010253906f);
-  u = madd(u, s, -0.5f);
-  u = madd(u, s, +1.0f);
-  if ((q & 1) != 0) {
-    u = -u;
-  }
-  if (fabsf(u) > 1.0f) {
-    u = 0.0f;
-  }
-  return u;
-}
-
-ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
-{
-  /* Same argument reduction as fast_sin. */
-  int q = fast_rint(x * M_1_PI_F);
-  float qf = (float)q;
-  x = madd(qf, -0.78515625f * 4, x);
-  x = madd(qf, -0.00024187564849853515625f * 4, x);
-  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
-  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
-  x = M_PI_2_F - (M_PI_2_F - x);  // crush denormals
-  float s = x * x;
-  /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */
-  if ((q & 1) != 0) {
-    x = -x;
-  }
-  float su = 2.6083159809786593541503e-06f;
-  su = madd(su, s, -0.0001981069071916863322258f);
-  su = madd(su, s, +0.00833307858556509017944336f);
-  su = madd(su, s, -0.166666597127914428710938f);
-  su = madd(s, su * x, x);
-  float cu = -2.71811842367242206819355e-07f;
-  cu = madd(cu, s, +2.47990446951007470488548e-05f);
-  cu = madd(cu, s, -0.00138888787478208541870117f);
-  cu = madd(cu, s, +0.0416666641831398010253906f);
-  cu = madd(cu, s, -0.5f);
-  cu = madd(cu, s, +1.0f);
-  if ((q & 1) != 0) {
-    cu = -cu;
-  }
-  if (fabsf(su) > 1.0f) {
-    su = 0.0f;
-  }
-  if (fabsf(cu) > 1.0f) {
-    cu = 0.0f;
-  }
-  *sine = su;
-  *cosine = cu;
-}
-
-/* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts
- * becoming really poor outside of this range because the reciprocal amplifies
- * errors.
- */
-ccl_device float fast_tanf(float x)
-{
-  /* Derived from SLEEF implementation.
-   *
-   * Note that we cannot apply the "denormal crush" trick everywhere because
-   * we sometimes need to take the reciprocal of the polynomial
-   */
-  int q = fast_rint(x * 2.0f * M_1_PI_F);
-  float qf = (float)q;
-  x = madd(qf, -0.78515625f * 2, x);
-  x = madd(qf, -0.00024187564849853515625f * 2, x);
-  x = madd(qf, -3.7747668102383613586e-08f * 2, x);
-  x = madd(qf, -1.2816720341285448015e-12f * 2, x);
-  if ((q & 1) == 0) {
-    /* Crush denormals (only if we aren't inverting the result later). */
-    x = M_PI_4_F - (M_PI_4_F - x);
-  }
-  float s = x * x;
-  float u = 0.00927245803177356719970703f;
-  u = madd(u, s, 0.00331984995864331722259521f);
-  u = madd(u, s, 0.0242998078465461730957031f);
-  u = madd(u, s, 0.0534495301544666290283203f);
-  u = madd(u, s, 0.133383005857467651367188f);
-  u = madd(u, s, 0.333331853151321411132812f);
-  u = madd(s, u * x, x);
-  if ((q & 1) != 0) {
-    u = -1.0f / u;
-  }
-  return u;
-}
-
-/* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611.
- *
- * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773
- */
-ccl_device float fast_sinpif(float x)
-{
-  /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */
-  const float z = x - ((x + 25165824.0f) - 25165824.0f);
-  const float y = z - z * fabsf(z);
-  const float Q = 3.10396624f;
-  const float P = 3.584135056f; /* P = 16-4*Q */
-  return y * (Q + P * fabsf(y));
-
-  /* The original article used inferior constants for Q and P and
-   * so had max error 1.091e-3.
-   *
-   * The optimal value for Q was determined by exhaustive search, minimizing
-   * the absolute numerical error relative to float(std::sin(double(phi*M_PI)))
-   * over the interval [0,2] (which is where most of the invocations happen).
-   *
-   * The basic idea of this approximation starts with the coarse approximation:
-   *      sin(pi*x) ~= f(x) =  4 * (x - x * abs(x))
-   *
-   * This approximation always _over_ estimates the target. On the other hand,
-   * the curve:
-   *      sin(pi*x) ~= f(x) * abs(f(x)) / 4
-   *
-   * always lies _under_ the target. Thus we can simply numerically search for
-   * the optimal constant to LERP these curves into a more precise
-   * approximation.
-   *
-   * After folding the constants together and simplifying the resulting math,
-   * we end up with the compact implementation above.
-   *
-   * NOTE: this function actually computes sin(x * pi) which avoids one or two
-   * mults in many cases and guarantees exact values at integer periods.
-   */
-}
-
-/* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */
-ccl_device_inline float fast_cospif(float x)
-{
-  return fast_sinpif(x + 0.5f);
-}
-
-ccl_device float fast_acosf(float x)
-{
-  const float f = fabsf(x);
-  /* clamp and crush denormals. */
-  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
-  /* Based on http://www.pouet.net/topic.php?which=9132&page=2
-   * 85% accurate (ulp 0)
-   * Examined 2130706434 values of acos:
-   *   15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
-   * Examined 2130706434 values of acos:
-   *   15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
-   */
-  const float a = sqrtf(1.0f - m) *
-                  (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
-  return x < 0 ? M_PI_F - a : a;
-}
-
-ccl_device float fast_asinf(float x)
-{
-  /* Based on acosf approximation above.
-   * Max error is 4.51133e-05 (ulps are higher because we are consistently off
-   * by a little amount).
-   */
-  const float f = fabsf(x);
-  /* Clamp and crush denormals. */
-  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
-  const float a = M_PI_2_F -
-                  sqrtf(1.0f - m) * (1.5707963267f +
-                                     m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
-  return copysignf(a, x);
-}
-
-ccl_device float fast_atanf(float x)
-{
-  const float a = fabsf(x);
-  const float k = a > 1.0f ? 1 / a : a;
-  const float s = 1.0f - (1.0f - k); /* Crush denormals. */
-  const float t = s * s;
-  /* http://mathforum.org/library/drmath/view/62672.html
-   * Examined 4278190080 values of atan:
-   *   2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error      // (with  denormals)
-   * Examined 4278190080 values of atan:
-   *   171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
-   */
-  float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
-  if (a > 1.0f) {
-    r = M_PI_2_F - r;
-  }
-  return copysignf(r, x);
-}
-
-ccl_device float fast_atan2f(float y, float x)
-{
-  /* Based on atan approximation above.
-   *
-   * The special cases around 0 and infinity were tested explicitly.
-   *
-   * The only case not handled correctly is x=NaN,y=0 which returns 0 instead
-   * of nan.
-   */
-  const float a = fabsf(x);
-  const float b = fabsf(y);
-
-  const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a));
-  const float s = 1.0f - (1.0f - k); /* Crush denormals */
-  const float t = s * s;
-
-  float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
-
-  if (b > a) {
-    /* Account for arg reduction. */
-    r = M_PI_2_F - r;
-  }
-  /* Test sign bit of x. */
-  if (__float_as_uint(x) & 0x80000000u) {
-    r = M_PI_F - r;
-  }
-  return copysignf(r, y);
-}
-
-/* Based on:
- *
- *   https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h
- */
-ccl_device float fast_log2f(float x)
-{
-  /* NOTE: clamp to avoid special cases and make result "safe" from large
-   * negative values/NAN's. */
-  x = clamp(x, FLT_MIN, FLT_MAX);
-  unsigned bits = __float_as_uint(x);
-  int exponent = (int)(bits >> 23) - 127;
-  float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
-  /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
-   * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error.
-   * ulp histogram:
-   *  0  = 97.46%
-   *  1  =  2.29%
-   *  2  =  0.11%
-   */
-  float f2 = f * f;
-  float f4 = f2 * f2;
-  float hi = madd(f, -0.00931049621349f, 0.05206469089414f);
-  float lo = madd(f, 0.47868480909345f, -0.72116591947498f);
-  hi = madd(f, hi, -0.13753123777116f);
-  hi = madd(f, hi, 0.24187369696082f);
-  hi = madd(f, hi, -0.34730547155299f);
-  lo = madd(f, lo, 1.442689881667200f);
-  return ((f4 * hi) + (f * lo)) + exponent;
-}
-
-ccl_device_inline float fast_logf(float x)
-{
-  /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]:
-   * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error.
-   */
-  return fast_log2f(x) * M_LN2_F;
-}
-
-ccl_device_inline float fast_log10(float x)
-{
-  /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]:
-   * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error.
-   */
-  return fast_log2f(x) * M_LN2_F / M_LN10_F;
-}
-
-ccl_device float fast_logb(float x)
-{
-  /* Don't bother with denormals. */
-  x = fabsf(x);
-  x = clamp(x, FLT_MIN, FLT_MAX);
-  unsigned bits = __float_as_uint(x);
-  return (float)((int)(bits >> 23) - 127);
-}
-
-ccl_device float fast_exp2f(float x)
-{
-  /* Clamp to safe range for final addition. */
-  x = clamp(x, -126.0f, 126.0f);
-  /* Range reduction. */
-  int m = (int)x;
-  x -= m;
-  x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */
-  /* 5th degree polynomial generated with sollya
-   * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff,
-   * 232 max ulp.
-   *
-   * ulp histogram:
-   *  0  = 87.81%
-   *  1  =  4.18%
-   */
-  float r = 1.33336498402e-3f;
-  r = madd(x, r, 9.810352697968e-3f);
-  r = madd(x, r, 5.551834031939e-2f);
-  r = madd(x, r, 0.2401793301105f);
-  r = madd(x, r, 0.693144857883f);
-  r = madd(x, r, 1.0f);
-  /* Multiply by 2 ^ m by adding in the exponent. */
-  /* NOTE: left-shift of negative number is undefined behavior. */
-  return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23));
-}
-
-ccl_device_inline float fast_expf(float x)
-{
-  /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]:
-   * 2.6666452 avg ulp diff, 230 max ulp.
-   */
-  return fast_exp2f(x / M_LN2_F);
-}
-
-#if defined(__KERNEL_CPU__) && !defined(_MSC_VER)
-/* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see
- * T78047 and T78869 for details. Just disable for now, it only makes
- * a small difference in denoising performance. */
-ccl_device float4 fast_exp2f4(float4 x)
-{
-  const float4 one = make_float4(1.0f);
-  const float4 limit = make_float4(126.0f);
-  x = clamp(x, -limit, limit);
-  int4 m = make_int4(x);
-  x = one - (one - (x - make_float4(m)));
-  float4 r = make_float4(1.33336498402e-3f);
-  r = madd4(x, r, make_float4(9.810352697968e-3f));
-  r = madd4(x, r, make_float4(5.551834031939e-2f));
-  r = madd4(x, r, make_float4(0.2401793301105f));
-  r = madd4(x, r, make_float4(0.693144857883f));
-  r = madd4(x, r, make_float4(1.0f));
-  return __int4_as_float4(__float4_as_int4(r) + (m << 23));
-}
-
-ccl_device_inline float4 fast_expf4(float4 x)
-{
-  return fast_exp2f4(x / M_LN2_F);
-}
-#else
-ccl_device_inline float4 fast_expf4(float4 x)
-{
-  return make_float4(fast_expf(x.x), fast_expf(x.y), fast_expf(x.z), fast_expf(x.w));
-}
-#endif
-
-ccl_device_inline float fast_exp10(float x)
-{
-  /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
-   * 2.71732409 avg ulp diff, 232 max ulp.
-   */
-  return fast_exp2f(x * M_LN10_F / M_LN2_F);
-}
-
-ccl_device_inline float fast_expm1f(float x)
-{
-  if (fabsf(x) < 1e-5f) {
-    x = 1.0f - (1.0f - x); /* Crush denormals. */
-    return madd(0.5f, x * x, x);
-  }
-  else {
-    return fast_expf(x) - 1.0f;
-  }
-}
-
-ccl_device float fast_sinhf(float x)
-{
-  float a = fabsf(x);
-  if (a > 1.0f) {
-    /* Examined 53389559 values of sinh on [1,87.3300018]:
-     * 33.6886442 avg ulp diff, 178 max ulp. */
-    float e = fast_expf(a);
-    return copysignf(0.5f * e - 0.5f / e, x);
-  }
-  else {
-    a = 1.0f - (1.0f - a); /* Crush denorms. */
-    float a2 = a * a;
-    /* Degree 7 polynomial generated with sollya. */
-    /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */
-    float r = 2.03945513931e-4f;
-    r = madd(r, a2, 8.32990277558e-3f);
-    r = madd(r, a2, 0.1666673421859f);
-    r = madd(r * a, a2, a);
-    return copysignf(r, x);
-  }
-}
-
-ccl_device_inline float fast_coshf(float x)
-{
-  /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]:
-   * 1.78256726 avg ulp diff, 178 max ulp.
-   */
-  float e = fast_expf(fabsf(x));
-  return 0.5f * e + 0.5f / e;
-}
-
-ccl_device_inline float fast_tanhf(float x)
-{
-  /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]:
-   * 3.12924e-06 max error.
-   */
-  /* NOTE: ulp error is high because of sub-optimal handling around the origin. */
-  float e = fast_expf(2.0f * fabsf(x));
-  return copysignf(1.0f - 2.0f / (1.0f + e), x);
-}
-
-ccl_device float fast_safe_powf(float x, float y)
-{
-  if (y == 0)
-    return 1.0f; /* x^1=1 */
-  if (x == 0)
-    return 0.0f; /* 0^y=0 */
-  float sign = 1.0f;
-  if (x < 0.0f) {
-    /* if x is negative, only deal with integer powers
-     * powf returns NaN for non-integers, we will return 0 instead.
-     */
-    int ybits = __float_as_int(y) & 0x7fffffff;
-    if (ybits >= 0x4b800000) {
-      // always even int, keep positive
-    }
-    else if (ybits >= 0x3f800000) {
-      /* Bigger than 1, check. */
-      int k = (ybits >> 23) - 127;    /* Get exponent. */
-      int j = ybits >> (23 - k);      /* Shift out possible fractional bits. */
-      if ((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */
-        /* +1 for even, -1 for odd. */
-        sign = __int_as_float(0x3f800000 | (j << 31));
-      }
-      else {
-        /* Not an integer. */
-        return 0.0f;
-      }
-    }
-    else {
-      /* Not an integer. */
-      return 0.0f;
-    }
-  }
-  return sign * fast_exp2f(y * fast_log2f(fabsf(x)));
-}
-
-/* TODO(sergey): Check speed  with our erf functions implementation from
- * bsdf_microfacet.h.
- */
-
-ccl_device_inline float fast_erff(float x)
-{
-  /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */
-  /* Abramowitz and Stegun, 7.1.28. */
-  const float a1 = 0.0705230784f;
-  const float a2 = 0.0422820123f;
-  const float a3 = 0.0092705272f;
-  const float a4 = 0.0001520143f;
-  const float a5 = 0.0002765672f;
-  const float a6 = 0.0000430638f;
-  const float a = fabsf(x);
-  if (a >= 12.3f) {
-    return copysignf(1.0f, x);
-  }
-  const float b = 1.0f - (1.0f - a); /* Crush denormals. */
-  const float r = madd(
-      madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f);
-  const float s = r * r; /* ^2 */
-  const float t = s * s; /* ^4 */
-  const float u = t * t; /* ^8 */
-  const float v = u * u; /* ^16 */
-  return copysignf(1.0f - 1.0f / v, x);
-}
-
-ccl_device_inline float fast_erfcf(float x)
-{
-  /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error.
-   *
-   * ulp histogram:
-   *
-   *  0  = 80.30%
-   */
-  return 1.0f - fast_erff(x);
-}
-
-ccl_device_inline float fast_ierff(float x)
-{
-  /* From: Approximating the `erfinv` function by Mike Giles. */
-  /* To avoid trouble at the limit, clamp input to 1-eps. */
-  float a = fabsf(x);
-  if (a > 0.99999994f) {
-    a = 0.99999994f;
-  }
-  float w = -fast_logf((1.0f - a) * (1.0f + a)), p;
-  if (w < 5.0f) {
-    w = w - 2.5f;
-    p = 2.81022636e-08f;
-    p = madd(p, w, 3.43273939e-07f);
-    p = madd(p, w, -3.5233877e-06f);
-    p = madd(p, w, -4.39150654e-06f);
-    p = madd(p, w, 0.00021858087f);
-    p = madd(p, w, -0.00125372503f);
-    p = madd(p, w, -0.00417768164f);
-    p = madd(p, w, 0.246640727f);
-    p = madd(p, w, 1.50140941f);
-  }
-  else {
-    w = sqrtf(w) - 3.0f;
-    p = -0.000200214257f;
-    p = madd(p, w, 0.000100950558f);
-    p = madd(p, w, 0.00134934322f);
-    p = madd(p, w, -0.00367342844f);
-    p = madd(p, w, 0.00573950773f);
-    p = madd(p, w, -0.0076224613f);
-    p = madd(p, w, 0.00943887047f);
-    p = madd(p, w, 1.00167406f);
-    p = madd(p, w, 2.83297682f);
-  }
-  return p * x;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_FAST_MATH__ */
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
deleted file mode 100644
index 25eda840214..00000000000
--- a/intern/cycles/util/util_math_float2.h
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_FLOAT2_H__
-#define __UTIL_MATH_FLOAT2_H__
-
-#ifndef __UTIL_MATH_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/*******************************************************************************
- * Declaration.
- */
-
-ccl_device_inline float2 operator-(const float2 &a);
-ccl_device_inline float2 operator*(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator*(const float2 &a, float f);
-ccl_device_inline float2 operator*(float f, const float2 &a);
-ccl_device_inline float2 operator/(float f, const float2 &a);
-ccl_device_inline float2 operator/(const float2 &a, float f);
-ccl_device_inline float2 operator/(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator+(const float2 &a, const float f);
-ccl_device_inline float2 operator+(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator-(const float2 &a, const float f);
-ccl_device_inline float2 operator-(const float2 &a, const float2 &b);
-ccl_device_inline float2 operator+=(float2 &a, const float2 &b);
-ccl_device_inline float2 operator*=(float2 &a, const float2 &b);
-ccl_device_inline float2 operator*=(float2 &a, float f);
-ccl_device_inline float2 operator/=(float2 &a, const float2 &b);
-ccl_device_inline float2 operator/=(float2 &a, float f);
-
-ccl_device_inline bool operator==(const float2 &a, const float2 &b);
-ccl_device_inline bool operator!=(const float2 &a, const float2 &b);
-
-ccl_device_inline bool is_zero(const float2 &a);
-ccl_device_inline float average(const float2 &a);
-ccl_device_inline float distance(const float2 &a, const float2 &b);
-ccl_device_inline float dot(const float2 &a, const float2 &b);
-ccl_device_inline float cross(const float2 &a, const float2 &b);
-ccl_device_inline float len(const float2 &a);
-ccl_device_inline float2 normalize(const float2 &a);
-ccl_device_inline float2 normalize_len(const float2 &a, float *t);
-ccl_device_inline float2 safe_normalize(const float2 &a);
-ccl_device_inline float2 min(const float2 &a, const float2 &b);
-ccl_device_inline float2 max(const float2 &a, const float2 &b);
-ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx);
-ccl_device_inline float2 fabs(const float2 &a);
-ccl_device_inline float2 as_float2(const float4 &a);
-ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t);
-ccl_device_inline float2 floor(const float2 &a);
-
-ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b);
-
-/*******************************************************************************
- * Definition.
- */
-
-ccl_device_inline float2 zero_float2()
-{
-  return make_float2(0.0f, 0.0f);
-}
-
-ccl_device_inline float2 one_float2()
-{
-  return make_float2(1.0f, 1.0f);
-}
-
-ccl_device_inline float2 operator-(const float2 &a)
-{
-  return make_float2(-a.x, -a.y);
-}
-
-ccl_device_inline float2 operator*(const float2 &a, const float2 &b)
-{
-  return make_float2(a.x * b.x, a.y * b.y);
-}
-
-ccl_device_inline float2 operator*(const float2 &a, float f)
-{
-  return make_float2(a.x * f, a.y * f);
-}
-
-ccl_device_inline float2 operator*(float f, const float2 &a)
-{
-  return make_float2(a.x * f, a.y * f);
-}
-
-ccl_device_inline float2 operator/(float f, const float2 &a)
-{
-  return make_float2(f / a.x, f / a.y);
-}
-
-ccl_device_inline float2 operator/(const float2 &a, float f)
-{
-  float invf = 1.0f / f;
-  return make_float2(a.x * invf, a.y * invf);
-}
-
-ccl_device_inline float2 operator/(const float2 &a, const float2 &b)
-{
-  return make_float2(a.x / b.x, a.y / b.y);
-}
-
-ccl_device_inline float2 operator+(const float2 &a, const float f)
-{
-  return a + make_float2(f, f);
-}
-
-ccl_device_inline float2 operator+(const float2 &a, const float2 &b)
-{
-  return make_float2(a.x + b.x, a.y + b.y);
-}
-
-ccl_device_inline float2 operator-(const float2 &a, const float f)
-{
-  return a - make_float2(f, f);
-}
-
-ccl_device_inline float2 operator-(const float2 &a, const float2 &b)
-{
-  return make_float2(a.x - b.x, a.y - b.y);
-}
-
-ccl_device_inline float2 operator+=(float2 &a, const float2 &b)
-{
-  return a = a + b;
-}
-
-ccl_device_inline float2 operator*=(float2 &a, const float2 &b)
-{
-  return a = a * b;
-}
-
-ccl_device_inline float2 operator*=(float2 &a, float f)
-{
-  return a = a * f;
-}
-
-ccl_device_inline float2 operator/=(float2 &a, const float2 &b)
-{
-  return a = a / b;
-}
-
-ccl_device_inline float2 operator/=(float2 &a, float f)
-{
-  float invf = 1.0f / f;
-  return a = a * invf;
-}
-
-ccl_device_inline bool operator==(const float2 &a, const float2 &b)
-{
-  return (a.x == b.x && a.y == b.y);
-}
-
-ccl_device_inline bool operator!=(const float2 &a, const float2 &b)
-{
-  return !(a == b);
-}
-
-ccl_device_inline bool is_zero(const float2 &a)
-{
-  return (a.x == 0.0f && a.y == 0.0f);
-}
-
-ccl_device_inline float average(const float2 &a)
-{
-  return (a.x + a.y) * (1.0f / 2.0f);
-}
-
-ccl_device_inline float distance(const float2 &a, const float2 &b)
-{
-  return len(a - b);
-}
-
-ccl_device_inline float dot(const float2 &a, const float2 &b)
-{
-  return a.x * b.x + a.y * b.y;
-}
-
-ccl_device_inline float cross(const float2 &a, const float2 &b)
-{
-  return (a.x * b.y - a.y * b.x);
-}
-
-ccl_device_inline float len(const float2 &a)
-{
-  return sqrtf(dot(a, a));
-}
-
-ccl_device_inline float2 normalize(const float2 &a)
-{
-  return a / len(a);
-}
-
-ccl_device_inline float2 normalize_len(const float2 &a, ccl_private float *t)
-{
-  *t = len(a);
-  return a / (*t);
-}
-
-ccl_device_inline float2 safe_normalize(const float2 &a)
-{
-  float t = len(a);
-  return (t != 0.0f) ? a / t : a;
-}
-
-ccl_device_inline float2 min(const float2 &a, const float2 &b)
-{
-  return make_float2(min(a.x, b.x), min(a.y, b.y));
-}
-
-ccl_device_inline float2 max(const float2 &a, const float2 &b)
-{
-  return make_float2(max(a.x, b.x), max(a.y, b.y));
-}
-
-ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx)
-{
-  return min(max(a, mn), mx);
-}
-
-ccl_device_inline float2 fabs(const float2 &a)
-{
-  return make_float2(fabsf(a.x), fabsf(a.y));
-}
-
-ccl_device_inline float2 as_float2(const float4 &a)
-{
-  return make_float2(a.x, a.y);
-}
-
-ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t)
-{
-  return a + t * (b - a);
-}
-
-ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t)
-{
-  return a + t * (b - a);
-}
-
-ccl_device_inline float2 floor(const float2 &a)
-{
-  return make_float2(floorf(a.x), floorf(a.y));
-}
-
-ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
-{
-  return (b != 0.0f) ? a / b : zero_float2();
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
deleted file mode 100644
index c3230a8068c..00000000000
--- a/intern/cycles/util/util_math_float3.h
+++ /dev/null
@@ -1,530 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_FLOAT3_H__
-#define __UTIL_MATH_FLOAT3_H__
-
-#ifndef __UTIL_MATH_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/*******************************************************************************
- * Declaration.
- */
-
-ccl_device_inline float3 operator-(const float3 &a);
-ccl_device_inline float3 operator*(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator*(const float3 &a, const float f);
-ccl_device_inline float3 operator*(const float f, const float3 &a);
-ccl_device_inline float3 operator/(const float f, const float3 &a);
-ccl_device_inline float3 operator/(const float3 &a, const float f);
-ccl_device_inline float3 operator/(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator+(const float3 &a, const float f);
-ccl_device_inline float3 operator+(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator-(const float3 &a, const float f);
-ccl_device_inline float3 operator-(const float3 &a, const float3 &b);
-ccl_device_inline float3 operator+=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator-=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator*=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator*=(float3 &a, float f);
-ccl_device_inline float3 operator/=(float3 &a, const float3 &b);
-ccl_device_inline float3 operator/=(float3 &a, float f);
-
-ccl_device_inline bool operator==(const float3 &a, const float3 &b);
-ccl_device_inline bool operator!=(const float3 &a, const float3 &b);
-
-ccl_device_inline float distance(const float3 &a, const float3 &b);
-ccl_device_inline float dot(const float3 &a, const float3 &b);
-ccl_device_inline float dot_xy(const float3 &a, const float3 &b);
-ccl_device_inline float3 cross(const float3 &a, const float3 &b);
-ccl_device_inline float3 normalize(const float3 &a);
-ccl_device_inline float3 min(const float3 &a, const float3 &b);
-ccl_device_inline float3 max(const float3 &a, const float3 &b);
-ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx);
-ccl_device_inline float3 fabs(const float3 &a);
-ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t);
-ccl_device_inline float3 rcp(const float3 &a);
-ccl_device_inline float3 sqrt(const float3 &a);
-ccl_device_inline float3 floor(const float3 &a);
-ccl_device_inline float3 ceil(const float3 &a);
-
-ccl_device_inline float min3(float3 a);
-ccl_device_inline float max3(float3 a);
-ccl_device_inline float len(const float3 a);
-ccl_device_inline float len_squared(const float3 a);
-
-ccl_device_inline float3 reflect(const float3 incident, const float3 normal);
-ccl_device_inline float3 project(const float3 v, const float3 v_proj);
-
-ccl_device_inline float3 saturate3(float3 a);
-ccl_device_inline float3 safe_normalize(const float3 a);
-ccl_device_inline float3 normalize_len(const float3 a, float *t);
-ccl_device_inline float3 safe_normalize_len(const float3 a, float *t);
-ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b);
-ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b);
-ccl_device_inline float3 interp(float3 a, float3 b, float t);
-ccl_device_inline float3 sqr3(float3 a);
-
-ccl_device_inline bool is_zero(const float3 a);
-ccl_device_inline float reduce_add(const float3 a);
-ccl_device_inline float average(const float3 a);
-ccl_device_inline bool isequal_float3(const float3 a, const float3 b);
-
-/*******************************************************************************
- * Definition.
- */
-
-ccl_device_inline float3 zero_float3()
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_setzero_ps());
-#else
-  return make_float3(0.0f, 0.0f, 0.0f);
-#endif
-}
-
-ccl_device_inline float3 one_float3()
-{
-  return make_float3(1.0f, 1.0f, 1.0f);
-}
-
-ccl_device_inline float3 operator-(const float3 &a)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
-#else
-  return make_float3(-a.x, -a.y, -a.z);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float3 &a, const float3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_mul_ps(a.m128, b.m128));
-#else
-  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float3 &a, const float f)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
-#else
-  return make_float3(a.x * f, a.y * f, a.z * f);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float f, const float3 &a)
-{
-#if defined(__KERNEL_SSE__)
-  return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
-#else
-  return make_float3(a.x * f, a.y * f, a.z * f);
-#endif
-}
-
-ccl_device_inline float3 operator/(const float f, const float3 &a)
-{
-#if defined(__KERNEL_SSE__)
-  return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
-#else
-  return make_float3(f / a.x, f / a.y, f / a.z);
-#endif
-}
-
-ccl_device_inline float3 operator/(const float3 &a, const float f)
-{
-  float invf = 1.0f / f;
-  return a * invf;
-}
-
-ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
-{
-#if defined(__KERNEL_SSE__)
-  return float3(_mm_div_ps(a.m128, b.m128));
-#else
-  return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-#endif
-}
-
-ccl_device_inline float3 operator+(const float3 &a, const float f)
-{
-  return a + make_float3(f, f, f);
-}
-
-ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_add_ps(a.m128, b.m128));
-#else
-  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-#endif
-}
-
-ccl_device_inline float3 operator-(const float3 &a, const float f)
-{
-  return a - make_float3(f, f, f);
-}
-
-ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_sub_ps(a.m128, b.m128));
-#else
-  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-#endif
-}
-
-ccl_device_inline float3 operator+=(float3 &a, const float3 &b)
-{
-  return a = a + b;
-}
-
-ccl_device_inline float3 operator-=(float3 &a, const float3 &b)
-{
-  return a = a - b;
-}
-
-ccl_device_inline float3 operator*=(float3 &a, const float3 &b)
-{
-  return a = a * b;
-}
-
-ccl_device_inline float3 operator*=(float3 &a, float f)
-{
-  return a = a * f;
-}
-
-ccl_device_inline float3 operator/=(float3 &a, const float3 &b)
-{
-  return a = a / b;
-}
-
-ccl_device_inline float3 operator/=(float3 &a, float f)
-{
-  float invf = 1.0f / f;
-  return a = a * invf;
-}
-
-ccl_device_inline bool operator==(const float3 &a, const float3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
-#else
-  return (a.x == b.x && a.y == b.y && a.z == b.z);
-#endif
-}
-
-ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
-{
-  return !(a == b);
-}
-
-ccl_device_inline float distance(const float3 &a, const float3 &b)
-{
-  return len(a - b);
-}
-
-ccl_device_inline float dot(const float3 &a, const float3 &b)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-  return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
-#else
-  return a.x * b.x + a.y * b.y + a.z * b.z;
-#endif
-}
-
-ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-  return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b));
-#else
-  return a.x * b.x + a.y * b.y;
-#endif
-}
-
-ccl_device_inline float3 cross(const float3 &a, const float3 &b)
-{
-  float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
-  return r;
-}
-
-ccl_device_inline float3 normalize(const float3 &a)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-  __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
-  return float3(_mm_div_ps(a.m128, norm));
-#else
-  return a / len(a);
-#endif
-}
-
-ccl_device_inline float3 min(const float3 &a, const float3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_min_ps(a.m128, b.m128));
-#else
-  return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-#endif
-}
-
-ccl_device_inline float3 max(const float3 &a, const float3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_max_ps(a.m128, b.m128));
-#else
-  return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-#endif
-}
-
-ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx)
-{
-  return min(max(a, mn), mx);
-}
-
-ccl_device_inline float3 fabs(const float3 &a)
-{
-#ifdef __KERNEL_SSE__
-#  ifdef __KERNEL_NEON__
-  return float3(vabsq_f32(a.m128));
-#  else
-  __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
-  return float3(_mm_and_ps(a.m128, mask));
-#  endif
-#else
-  return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
-#endif
-}
-
-ccl_device_inline float3 sqrt(const float3 &a)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_sqrt_ps(a));
-#else
-  return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z));
-#endif
-}
-
-ccl_device_inline float3 floor(const float3 &a)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_floor_ps(a));
-#else
-  return make_float3(floorf(a.x), floorf(a.y), floorf(a.z));
-#endif
-}
-
-ccl_device_inline float3 ceil(const float3 &a)
-{
-#ifdef __KERNEL_SSE__
-  return float3(_mm_ceil_ps(a));
-#else
-  return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z));
-#endif
-}
-
-ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
-{
-  return a + t * (b - a);
-}
-
-ccl_device_inline float3 rcp(const float3 &a)
-{
-#ifdef __KERNEL_SSE__
-  /* Don't use _mm_rcp_ps due to poor precision. */
-  return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-#else
-  return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
-#endif
-}
-
-ccl_device_inline float min3(float3 a)
-{
-  return min(min(a.x, a.y), a.z);
-}
-
-ccl_device_inline float max3(float3 a)
-{
-  return max(max(a.x, a.y), a.z);
-}
-
-ccl_device_inline float len(const float3 a)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-  return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
-#else
-  return sqrtf(dot(a, a));
-#endif
-}
-
-ccl_device_inline float len_squared(const float3 a)
-{
-  return dot(a, a);
-}
-
-ccl_device_inline float3 reflect(const float3 incident, const float3 normal)
-{
-  float3 unit_normal = normalize(normal);
-  return incident - 2.0f * unit_normal * dot(incident, unit_normal);
-}
-
-ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta)
-{
-  float k = 1.0f - eta * eta * (1.0f - dot(normal, incident) * dot(normal, incident));
-  if (k < 0.0f)
-    return zero_float3();
-  else
-    return eta * incident - (eta * dot(normal, incident) + sqrt(k)) * normal;
-}
-
-ccl_device_inline float3 faceforward(const float3 vector,
-                                     const float3 incident,
-                                     const float3 reference)
-{
-  return (dot(reference, incident) < 0.0f) ? vector : -vector;
-}
-
-ccl_device_inline float3 project(const float3 v, const float3 v_proj)
-{
-  float len_squared = dot(v_proj, v_proj);
-  return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj : zero_float3();
-}
-
-ccl_device_inline float3 saturate3(float3 a)
-{
-  return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
-}
-
-ccl_device_inline float3 normalize_len(const float3 a, ccl_private float *t)
-{
-  *t = len(a);
-  float x = 1.0f / *t;
-  return a * x;
-}
-
-ccl_device_inline float3 safe_normalize(const float3 a)
-{
-  float t = len(a);
-  return (t != 0.0f) ? a * (1.0f / t) : a;
-}
-
-ccl_device_inline float3 safe_normalize_len(const float3 a, ccl_private float *t)
-{
-  *t = len(a);
-  return (*t != 0.0f) ? a / (*t) : a;
-}
-
-ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b)
-{
-  return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f,
-                     (b.y != 0.0f) ? a.y / b.y : 0.0f,
-                     (b.z != 0.0f) ? a.z / b.z : 0.0f);
-}
-
-ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b)
-{
-  return (b != 0.0f) ? a / b : zero_float3();
-}
-
-ccl_device_inline float3 interp(float3 a, float3 b, float t)
-{
-  return a + t * (b - a);
-}
-
-ccl_device_inline float3 sqr3(float3 a)
-{
-  return a * a;
-}
-
-ccl_device_inline bool is_zero(const float3 a)
-{
-#ifdef __KERNEL_SSE__
-  return a == make_float3(0.0f);
-#else
-  return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
-#endif
-}
-
-ccl_device_inline float reduce_add(const float3 a)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_NEON__)
-  __m128 t = a.m128;
-  t[3] = 0.0f;
-  return vaddvq_f32(t);
-#else
-  return (a.x + a.y + a.z);
-#endif
-}
-
-ccl_device_inline float average(const float3 a)
-{
-  return reduce_add(a) * (1.0f / 3.0f);
-}
-
-ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
-{
-  return a == b;
-}
-
-ccl_device_inline float3 pow3(float3 v, float e)
-{
-  return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e));
-}
-
-ccl_device_inline float3 exp3(float3 v)
-{
-  return make_float3(expf(v.x), expf(v.y), expf(v.z));
-}
-
-ccl_device_inline float3 log3(float3 v)
-{
-  return make_float3(logf(v.x), logf(v.y), logf(v.z));
-}
-
-ccl_device_inline int3 quick_floor_to_int3(const float3 a)
-{
-#ifdef __KERNEL_SSE__
-  int3 b = int3(_mm_cvttps_epi32(a.m128));
-  int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f))));
-  /* Unsaturated add 0xffffffff is the same as subtract -1. */
-  return b + isneg;
-#else
-  return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z));
-#endif
-}
-
-ccl_device_inline bool isfinite3_safe(float3 v)
-{
-  return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);
-}
-
-ccl_device_inline float3 ensure_finite3(float3 v)
-{
-  if (!isfinite_safe(v.x))
-    v.x = 0.0f;
-  if (!isfinite_safe(v.y))
-    v.y = 0.0f;
-  if (!isfinite_safe(v.z))
-    v.z = 0.0f;
-  return v;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_FLOAT3_H__ */
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
deleted file mode 100644
index f30a78cfc69..00000000000
--- a/intern/cycles/util/util_math_float4.h
+++ /dev/null
@@ -1,536 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_FLOAT4_H__
-#define __UTIL_MATH_FLOAT4_H__
-
-#ifndef __UTIL_MATH_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/*******************************************************************************
- * Declaration.
- */
-
-ccl_device_inline float4 operator-(const float4 &a);
-ccl_device_inline float4 operator*(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator*(const float4 &a, float f);
-ccl_device_inline float4 operator*(float f, const float4 &a);
-ccl_device_inline float4 operator/(const float4 &a, float f);
-ccl_device_inline float4 operator/(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator+(const float4 &a, const float f);
-ccl_device_inline float4 operator+(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator-(const float4 &a, const float f);
-ccl_device_inline float4 operator-(const float4 &a, const float4 &b);
-ccl_device_inline float4 operator+=(float4 &a, const float4 &b);
-ccl_device_inline float4 operator*=(float4 &a, const float4 &b);
-ccl_device_inline float4 operator*=(float4 &a, float f);
-ccl_device_inline float4 operator/=(float4 &a, float f);
-
-ccl_device_inline int4 operator<(const float4 &a, const float4 &b);
-ccl_device_inline int4 operator>=(const float4 &a, const float4 &b);
-ccl_device_inline int4 operator<=(const float4 &a, const float4 &b);
-ccl_device_inline bool operator==(const float4 &a, const float4 &b);
-
-ccl_device_inline float distance(const float4 &a, const float4 &b);
-ccl_device_inline float dot(const float4 &a, const float4 &b);
-ccl_device_inline float len_squared(const float4 &a);
-ccl_device_inline float4 rcp(const float4 &a);
-ccl_device_inline float4 sqrt(const float4 &a);
-ccl_device_inline float4 sqr(const float4 &a);
-ccl_device_inline float4 cross(const float4 &a, const float4 &b);
-ccl_device_inline bool is_zero(const float4 &a);
-ccl_device_inline float average(const float4 &a);
-ccl_device_inline float len(const float4 &a);
-ccl_device_inline float4 normalize(const float4 &a);
-ccl_device_inline float4 safe_normalize(const float4 &a);
-ccl_device_inline float4 min(const float4 &a, const float4 &b);
-ccl_device_inline float4 max(const float4 &a, const float4 &b);
-ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx);
-ccl_device_inline float4 fabs(const float4 &a);
-ccl_device_inline float4 floor(const float4 &a);
-ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t);
-
-ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b);
-
-#ifdef __KERNEL_SSE__
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &b);
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &a, const float4 &b);
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b);
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b);
-template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b);
-
-#  ifdef __KERNEL_SSE3__
-template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b);
-template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b);
-#  endif
-#endif /* __KERNEL_SSE__ */
-
-#ifndef __KERNEL_GPU__
-ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b);
-ccl_device_inline float4 reduce_min(const float4 &a);
-ccl_device_inline float4 reduce_max(const float4 &a);
-ccl_device_inline float4 reduce_add(const float4 &a);
-#endif /* !__KERNEL_GPU__ */
-
-/*******************************************************************************
- * Definition.
- */
-
-ccl_device_inline float4 zero_float4()
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_setzero_ps());
-#else
-  return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-#endif
-}
-
-ccl_device_inline float4 one_float4()
-{
-  return make_float4(1.0f, 1.0f, 1.0f, 1.0f);
-}
-
-ccl_device_inline float4 operator-(const float4 &a)
-{
-#ifdef __KERNEL_SSE__
-  __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
-  return float4(_mm_xor_ps(a.m128, mask));
-#else
-  return make_float4(-a.x, -a.y, -a.z, -a.w);
-#endif
-}
-
-ccl_device_inline float4 operator*(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_mul_ps(a.m128, b.m128));
-#else
-  return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-#endif
-}
-
-ccl_device_inline float4 operator*(const float4 &a, float f)
-{
-#if defined(__KERNEL_SSE__)
-  return a * make_float4(f);
-#else
-  return make_float4(a.x * f, a.y * f, a.z * f, a.w * f);
-#endif
-}
-
-ccl_device_inline float4 operator*(float f, const float4 &a)
-{
-  return a * f;
-}
-
-ccl_device_inline float4 operator/(const float4 &a, float f)
-{
-  return a * (1.0f / f);
-}
-
-ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_div_ps(a.m128, b.m128));
-#else
-  return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-#endif
-}
-
-ccl_device_inline float4 operator+(const float4 &a, const float f)
-{
-  return a + make_float4(f, f, f, f);
-}
-
-ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_add_ps(a.m128, b.m128));
-#else
-  return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-#endif
-}
-
-ccl_device_inline float4 operator-(const float4 &a, const float f)
-{
-  return a - make_float4(f, f, f, f);
-}
-
-ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_sub_ps(a.m128, b.m128));
-#else
-  return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-#endif
-}
-
-ccl_device_inline float4 operator+=(float4 &a, const float4 &b)
-{
-  return a = a + b;
-}
-
-ccl_device_inline float4 operator-=(float4 &a, const float4 &b)
-{
-  return a = a - b;
-}
-
-ccl_device_inline float4 operator*=(float4 &a, const float4 &b)
-{
-  return a = a * b;
-}
-
-ccl_device_inline float4 operator*=(float4 &a, float f)
-{
-  return a = a * f;
-}
-
-ccl_device_inline float4 operator/=(float4 &a, float f)
-{
-  return a = a / f;
-}
-
-ccl_device_inline int4 operator<(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128)));
-#else
-  return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
-#endif
-}
-
-ccl_device_inline int4 operator>=(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128)));
-#else
-  return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
-#endif
-}
-
-ccl_device_inline int4 operator<=(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128)));
-#else
-  return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
-#endif
-}
-
-ccl_device_inline bool operator==(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
-#else
-  return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
-#endif
-}
-
-ccl_device_inline float distance(const float4 &a, const float4 &b)
-{
-  return len(a - b);
-}
-
-ccl_device_inline float dot(const float4 &a, const float4 &b)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-#  if defined(__KERNEL_NEON__)
-  __m128 t = vmulq_f32(a, b);
-  return vaddvq_f32(t);
-#  else
-  return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
-#  endif
-#else
-  return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
-#endif
-}
-
-ccl_device_inline float len_squared(const float4 &a)
-{
-  return dot(a, a);
-}
-
-ccl_device_inline float4 rcp(const float4 &a)
-{
-#ifdef __KERNEL_SSE__
-  /* Don't use _mm_rcp_ps due to poor precision. */
-  return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-#else
-  return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
-#endif
-}
-
-ccl_device_inline float4 sqrt(const float4 &a)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_sqrt_ps(a.m128));
-#else
-  return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
-#endif
-}
-
-ccl_device_inline float4 sqr(const float4 &a)
-{
-  return a * a;
-}
-
-ccl_device_inline float4 cross(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) -
-         (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b));
-#else
-  return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f);
-#endif
-}
-
-ccl_device_inline bool is_zero(const float4 &a)
-{
-#ifdef __KERNEL_SSE__
-  return a == make_float4(0.0f);
-#else
-  return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
-#endif
-}
-
-ccl_device_inline float4 reduce_add(const float4 &a)
-{
-#if defined(__KERNEL_SSE__)
-#  if defined(__KERNEL_NEON__)
-  return float4(vdupq_n_f32(vaddvq_f32(a)));
-#  elif defined(__KERNEL_SSE3__)
-  float4 h(_mm_hadd_ps(a.m128, a.m128));
-  return float4(_mm_hadd_ps(h.m128, h.m128));
-#  else
-  float4 h(shuffle<1, 0, 3, 2>(a) + a);
-  return shuffle<2, 3, 0, 1>(h) + h;
-#  endif
-#else
-  float sum = (a.x + a.y) + (a.z + a.w);
-  return make_float4(sum, sum, sum, sum);
-#endif
-}
-
-ccl_device_inline float average(const float4 &a)
-{
-  return reduce_add(a).x * 0.25f;
-}
-
-ccl_device_inline float len(const float4 &a)
-{
-  return sqrtf(dot(a, a));
-}
-
-ccl_device_inline float4 normalize(const float4 &a)
-{
-  return a / len(a);
-}
-
-ccl_device_inline float4 safe_normalize(const float4 &a)
-{
-  float t = len(a);
-  return (t != 0.0f) ? a / t : a;
-}
-
-ccl_device_inline float4 min(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_min_ps(a.m128, b.m128));
-#else
-  return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-#endif
-}
-
-ccl_device_inline float4 max(const float4 &a, const float4 &b)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_max_ps(a.m128, b.m128));
-#else
-  return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-#endif
-}
-
-ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx)
-{
-  return min(max(a, mn), mx);
-}
-
-ccl_device_inline float4 fabs(const float4 &a)
-{
-#if defined(__KERNEL_SSE__)
-#  if defined(__KERNEL_NEON__)
-  return float4(vabsq_f32(a));
-#  else
-  return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
-#  endif
-#else
-  return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
-#endif
-}
-
-ccl_device_inline float4 floor(const float4 &a)
-{
-#ifdef __KERNEL_SSE__
-  return float4(_mm_floor_ps(a));
-#else
-  return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
-#endif
-}
-
-ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
-{
-  return a + t * (b - a);
-}
-
-#ifdef __KERNEL_SSE__
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &b)
-{
-#  if defined(__KERNEL_NEON__)
-  return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(b.m128));
-#  else
-  return float4(_mm_castsi128_ps(
-      _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0))));
-#  endif
-}
-
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4 &a, const float4 &b)
-{
-#  if defined(__KERNEL_NEON__)
-  return float4(shuffle_neon<__m128, index_0, index_1, index_2, index_3>(a.m128, b.m128));
-#  else
-  return float4(_mm_shuffle_ps(a.m128, b.m128, _MM_SHUFFLE(index_3, index_2, index_1, index_0)));
-#  endif
-}
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b)
-{
-  return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))));
-}
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b)
-{
-  return float4(_mm_movelh_ps(a.m128, b.m128));
-}
-
-template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b)
-{
-  return float4(_mm_movehl_ps(b.m128, a.m128));
-}
-
-#  ifdef __KERNEL_SSE3__
-template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b)
-{
-  return float4(_mm_moveldup_ps(b));
-}
-
-template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b)
-{
-  return float4(_mm_movehdup_ps(b));
-}
-#  endif /* __KERNEL_SSE3__ */
-#endif   /* __KERNEL_SSE__ */
-
-#ifndef __KERNEL_GPU__
-ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b)
-{
-#  ifdef __KERNEL_SSE__
-  return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128)));
-#  else
-  return make_float4(
-      (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
-#  endif
-}
-
-ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
-{
-  /* Replace elements of x with zero where mask isn't set. */
-  return select(mask, a, make_float4(0.0f));
-}
-
-ccl_device_inline float4 reduce_min(const float4 &a)
-{
-#  if defined(__KERNEL_SSE__)
-#    if defined(__KERNEL_NEON__)
-  return float4(vdupq_n_f32(vminvq_f32(a)));
-#    else
-  float4 h = min(shuffle<1, 0, 3, 2>(a), a);
-  return min(shuffle<2, 3, 0, 1>(h), h);
-#    endif
-#  else
-  return make_float4(min(min(a.x, a.y), min(a.z, a.w)));
-#  endif
-}
-
-ccl_device_inline float4 reduce_max(const float4 &a)
-{
-#  if defined(__KERNEL_SSE__)
-#    if defined(__KERNEL_NEON__)
-  return float4(vdupq_n_f32(vmaxvq_f32(a)));
-#    else
-  float4 h = max(shuffle<1, 0, 3, 2>(a), a);
-  return max(shuffle<2, 3, 0, 1>(h), h);
-#    endif
-#  else
-  return make_float4(max(max(a.x, a.y), max(a.z, a.w)));
-#  endif
-}
-
-ccl_device_inline float4 load_float4(ccl_private const float *v)
-{
-#  ifdef __KERNEL_SSE__
-  return float4(_mm_loadu_ps(v));
-#  else
-  return make_float4(v[0], v[1], v[2], v[3]);
-#  endif
-}
-
-#endif /* !__KERNEL_GPU__ */
-
-ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b)
-{
-  return (b != 0.0f) ? a / b : zero_float4();
-}
-
-ccl_device_inline bool isfinite4_safe(float4 v)
-{
-  return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z) && isfinite_safe(v.w);
-}
-
-ccl_device_inline float4 ensure_finite4(float4 v)
-{
-  if (!isfinite_safe(v.x))
-    v.x = 0.0f;
-  if (!isfinite_safe(v.y))
-    v.y = 0.0f;
-  if (!isfinite_safe(v.z))
-    v.z = 0.0f;
-  if (!isfinite_safe(v.w))
-    v.w = 0.0f;
-  return v;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h
deleted file mode 100644
index 5782b878801..00000000000
--- a/intern/cycles/util/util_math_int2.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_INT2_H__
-#define __UTIL_MATH_INT2_H__
-
-#ifndef __UTIL_MATH_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/*******************************************************************************
- * Declaration.
- */
-
-ccl_device_inline bool operator==(const int2 a, const int2 b);
-ccl_device_inline int2 operator+(const int2 &a, const int2 &b);
-ccl_device_inline int2 operator+=(int2 &a, const int2 &b);
-ccl_device_inline int2 operator-(const int2 &a, const int2 &b);
-ccl_device_inline int2 operator*(const int2 &a, const int2 &b);
-ccl_device_inline int2 operator/(const int2 &a, const int2 &b);
-
-/*******************************************************************************
- * Definition.
- */
-
-ccl_device_inline bool operator==(const int2 a, const int2 b)
-{
-  return (a.x == b.x && a.y == b.y);
-}
-
-ccl_device_inline int2 operator+(const int2 &a, const int2 &b)
-{
-  return make_int2(a.x + b.x, a.y + b.y);
-}
-
-ccl_device_inline int2 operator+=(int2 &a, const int2 &b)
-{
-  return a = a + b;
-}
-
-ccl_device_inline int2 operator-(const int2 &a, const int2 &b)
-{
-  return make_int2(a.x - b.x, a.y - b.y);
-}
-
-ccl_device_inline int2 operator*(const int2 &a, const int2 &b)
-{
-  return make_int2(a.x * b.x, a.y * b.y);
-}
-
-ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
-{
-  return make_int2(a.x / b.x, a.y / b.y);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_INT2_H__ */
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
deleted file mode 100644
index e0dfae7c015..00000000000
--- a/intern/cycles/util/util_math_int3.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_INT3_H__
-#define __UTIL_MATH_INT3_H__
-
-#ifndef __UTIL_MATH_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/*******************************************************************************
- * Declaration.
- */
-
-ccl_device_inline int3 min(int3 a, int3 b);
-ccl_device_inline int3 max(int3 a, int3 b);
-ccl_device_inline int3 clamp(const int3 &a, int mn, int mx);
-ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx);
-
-/*******************************************************************************
- * Definition.
- */
-
-ccl_device_inline int3 min(int3 a, int3 b)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-  return int3(_mm_min_epi32(a.m128, b.m128));
-#else
-  return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-#endif
-}
-
-ccl_device_inline int3 max(int3 a, int3 b)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-  return int3(_mm_max_epi32(a.m128, b.m128));
-#else
-  return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-#endif
-}
-
-ccl_device_inline int3 clamp(const int3 &a, int mn, int mx)
-{
-#ifdef __KERNEL_SSE__
-  return min(max(a, make_int3(mn)), make_int3(mx));
-#else
-  return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
-#endif
-}
-
-ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx)
-{
-#ifdef __KERNEL_SSE__
-  return min(max(a, mn), make_int3(mx));
-#else
-  return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
-#endif
-}
-
-ccl_device_inline bool operator==(const int3 &a, const int3 &b)
-{
-  return a.x == b.x && a.y == b.y && a.z == b.z;
-}
-
-ccl_device_inline bool operator!=(const int3 &a, const int3 &b)
-{
-  return !(a == b);
-}
-
-ccl_device_inline bool operator<(const int3 &a, const int3 &b)
-{
-  return a.x < b.x && a.y < b.y && a.z < b.z;
-}
-
-ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return int3(_mm_add_epi32(a.m128, b.m128));
-#else
-  return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-#endif
-}
-
-ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
-{
-#ifdef __KERNEL_SSE__
-  return int3(_mm_sub_epi32(a.m128, b.m128));
-#else
-  return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
-#endif
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_INT3_H__ */
diff --git a/intern/cycles/util/util_math_int4.h b/intern/cycles/util/util_math_int4.h
deleted file mode 100644
index 186cc58489b..00000000000
--- a/intern/cycles/util/util_math_int4.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_INT4_H__
-#define __UTIL_MATH_INT4_H__
-
-#ifndef __UTIL_MATH_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/*******************************************************************************
- * Declaration.
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device_inline int4 operator+(const int4 &a, const int4 &b);
-ccl_device_inline int4 operator+=(int4 &a, const int4 &b);
-ccl_device_inline int4 operator>>(const int4 &a, int i);
-ccl_device_inline int4 operator<<(const int4 &a, int i);
-ccl_device_inline int4 operator<(const int4 &a, const int4 &b);
-ccl_device_inline int4 operator>=(const int4 &a, const int4 &b);
-ccl_device_inline int4 operator&(const int4 &a, const int4 &b);
-ccl_device_inline int4 min(int4 a, int4 b);
-ccl_device_inline int4 max(int4 a, int4 b);
-ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx);
-ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b);
-#endif /* __KERNEL_GPU__ */
-
-/*******************************************************************************
- * Definition.
- */
-
-#ifndef __KERNEL_GPU__
-ccl_device_inline int4 operator+(const int4 &a, const int4 &b)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_add_epi32(a.m128, b.m128));
-#  else
-  return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-#  endif
-}
-
-ccl_device_inline int4 operator+=(int4 &a, const int4 &b)
-{
-  return a = a + b;
-}
-
-ccl_device_inline int4 operator>>(const int4 &a, int i)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_srai_epi32(a.m128, i));
-#  else
-  return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
-#  endif
-}
-
-ccl_device_inline int4 operator<<(const int4 &a, int i)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_slli_epi32(a.m128, i));
-#  else
-  return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
-#  endif
-}
-
-ccl_device_inline int4 operator<(const int4 &a, const int4 &b)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_cmplt_epi32(a.m128, b.m128));
-#  else
-  return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
-#  endif
-}
-
-ccl_device_inline int4 operator>=(const int4 &a, const int4 &b)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
-#  else
-  return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
-#  endif
-}
-
-ccl_device_inline int4 operator&(const int4 &a, const int4 &b)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_and_si128(a.m128, b.m128));
-#  else
-  return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
-#  endif
-}
-
-ccl_device_inline int4 min(int4 a, int4 b)
-{
-#  if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-  return int4(_mm_min_epi32(a.m128, b.m128));
-#  else
-  return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-#  endif
-}
-
-ccl_device_inline int4 max(int4 a, int4 b)
-{
-#  if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-  return int4(_mm_max_epi32(a.m128, b.m128));
-#  else
-  return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-#  endif
-}
-
-ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx)
-{
-  return min(max(a, mn), mx);
-}
-
-ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b)
-{
-#  ifdef __KERNEL_SSE__
-  const __m128 m = _mm_cvtepi32_ps(mask);
-  /* TODO(sergey): avoid cvt. */
-  return int4(_mm_castps_si128(
-      _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b)))));
-#  else
-  return make_int4(
-      (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
-#  endif
-}
-
-ccl_device_inline int4 load_int4(const int *v)
-{
-#  ifdef __KERNEL_SSE__
-  return int4(_mm_loadu_si128((__m128i *)v));
-#  else
-  return make_int4(v[0], v[1], v[2], v[3]);
-#  endif
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_INT4_H__ */
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
deleted file mode 100644
index 0c431a36afb..00000000000
--- a/intern/cycles/util/util_math_intersect.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_INTERSECT_H__
-#define __UTIL_MATH_INTERSECT_H__
-
-CCL_NAMESPACE_BEGIN
-
-/* Ray Intersection */
-
-ccl_device bool ray_sphere_intersect(float3 ray_P,
-                                     float3 ray_D,
-                                     float ray_t,
-                                     float3 sphere_P,
-                                     float sphere_radius,
-                                     ccl_private float3 *isect_P,
-                                     ccl_private float *isect_t)
-{
-  const float3 d = sphere_P - ray_P;
-  const float radiussq = sphere_radius * sphere_radius;
-  const float tsq = dot(d, d);
-
-  if (tsq > radiussq) {
-    /* Ray origin outside sphere. */
-    const float tp = dot(d, ray_D);
-    if (tp < 0.0f) {
-      /* Ray  points away from sphere. */
-      return false;
-    }
-    const float dsq = tsq - tp * tp; /* Pythagoras. */
-    if (dsq > radiussq) {
-      /* Closest point on ray outside sphere. */
-      return false;
-    }
-    const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */
-    if (t < ray_t) {
-      *isect_t = t;
-      *isect_P = ray_P + ray_D * t;
-      return true;
-    }
-  }
-  return false;
-}
-
-ccl_device bool ray_aligned_disk_intersect(float3 ray_P,
-                                           float3 ray_D,
-                                           float ray_t,
-                                           float3 disk_P,
-                                           float disk_radius,
-                                           ccl_private float3 *isect_P,
-                                           ccl_private float *isect_t)
-{
-  /* Aligned disk normal. */
-  float disk_t;
-  const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
-  const float div = dot(ray_D, disk_N);
-  if (UNLIKELY(div == 0.0f)) {
-    return false;
-  }
-  /* Compute t to intersection point. */
-  const float t = -disk_t / div;
-  if (t < 0.0f || t > ray_t) {
-    return false;
-  }
-  /* Test if within radius. */
-  float3 P = ray_P + ray_D * t;
-  if (len_squared(P - disk_P) > disk_radius * disk_radius) {
-    return false;
-  }
-  *isect_P = P;
-  *isect_t = t;
-  return true;
-}
-
-ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
-                                                   float3 ray_dir,
-                                                   float ray_t,
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-                                                   const ssef *ssef_verts,
-#else
-                                                   const float3 tri_a,
-                                                   const float3 tri_b,
-                                                   const float3 tri_c,
-#endif
-                                                   ccl_private float *isect_u,
-                                                   ccl_private float *isect_v,
-                                                   ccl_private float *isect_t)
-{
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  typedef ssef float3;
-  const float3 tri_a(ssef_verts[0]);
-  const float3 tri_b(ssef_verts[1]);
-  const float3 tri_c(ssef_verts[2]);
-  const float3 P(ray_P);
-  const float3 dir(ray_dir);
-#else
-#  define dot3(a, b) dot(a, b)
-  const float3 P = ray_P;
-  const float3 dir = ray_dir;
-#endif
-
-  /* Calculate vertices relative to ray origin. */
-  const float3 v0 = tri_c - P;
-  const float3 v1 = tri_a - P;
-  const float3 v2 = tri_b - P;
-
-  /* Calculate triangle edges. */
-  const float3 e0 = v2 - v0;
-  const float3 e1 = v0 - v1;
-  const float3 e2 = v1 - v2;
-
-  /* Perform edge tests. */
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  const float3 crossU = cross(v2 + v0, e0);
-  const float3 crossV = cross(v0 + v1, e1);
-  const float3 crossW = cross(v1 + v2, e2);
-
-  ssef crossX(crossU);
-  ssef crossY(crossV);
-  ssef crossZ(crossW);
-  ssef zero = _mm_setzero_ps();
-  _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero);
-
-  const ssef dirX(ray_dir.x);
-  const ssef dirY(ray_dir.y);
-  const ssef dirZ(ray_dir.z);
-
-  ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ));
-#else  /* __KERNEL_SSE2__ */
-  const float U = dot(cross(v2 + v0, e0), ray_dir);
-  const float V = dot(cross(v0 + v1, e1), ray_dir);
-  const float W = dot(cross(v1 + v2, e2), ray_dir);
-#endif /* __KERNEL_SSE2__ */
-
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  int uvw_sign = movemask(UVWW) & 0x7;
-  if (uvw_sign != 0) {
-    if (uvw_sign != 0x7) {
-      return false;
-    }
-  }
-#else
-  const float minUVW = min(U, min(V, W));
-  const float maxUVW = max(U, max(V, W));
-
-  if (minUVW < 0.0f && maxUVW > 0.0f) {
-    return false;
-  }
-#endif
-
-  /* Calculate geometry normal and denominator. */
-  const float3 Ng1 = cross(e1, e0);
-  // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
-  const float3 Ng = Ng1 + Ng1;
-  const float den = dot3(Ng, dir);
-  /* Avoid division by 0. */
-  if (UNLIKELY(den == 0.0f)) {
-    return false;
-  }
-
-  /* Perform depth test. */
-  const float T = dot3(v0, Ng);
-  const int sign_den = (__float_as_int(den) & 0x80000000);
-  const float sign_T = xor_signmask(T, sign_den);
-  if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) {
-    return false;
-  }
-
-  const float inv_den = 1.0f / den;
-#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
-  UVWW *= inv_den;
-  _mm_store_ss(isect_u, UVWW);
-  _mm_store_ss(isect_v, shuffle<1, 1, 3, 3>(UVWW));
-#else
-  *isect_u = U * inv_den;
-  *isect_v = V * inv_den;
-#endif
-  *isect_t = T * inv_den;
-  return true;
-
-#undef dot3
-}
-
-/* Tests for an intersection between a ray and a quad defined by
- * its midpoint, normal and sides.
- * If ellipse is true, hits outside the ellipse that's enclosed by the
- * quad are rejected.
- */
-ccl_device bool ray_quad_intersect(float3 ray_P,
-                                   float3 ray_D,
-                                   float ray_mint,
-                                   float ray_maxt,
-                                   float3 quad_P,
-                                   float3 quad_u,
-                                   float3 quad_v,
-                                   float3 quad_n,
-                                   ccl_private float3 *isect_P,
-                                   ccl_private float *isect_t,
-                                   ccl_private float *isect_u,
-                                   ccl_private float *isect_v,
-                                   bool ellipse)
-{
-  /* Perform intersection test. */
-  float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n);
-  if (t < ray_mint || t > ray_maxt) {
-    return false;
-  }
-  const float3 hit = ray_P + t * ray_D;
-  const float3 inplane = hit - quad_P;
-  const float u = dot(inplane, quad_u) / dot(quad_u, quad_u);
-  if (u < -0.5f || u > 0.5f) {
-    return false;
-  }
-  const float v = dot(inplane, quad_v) / dot(quad_v, quad_v);
-  if (v < -0.5f || v > 0.5f) {
-    return false;
-  }
-  if (ellipse && (u * u + v * v > 0.25f)) {
-    return false;
-  }
-  /* Store the result. */
-  /* TODO(sergey): Check whether we can avoid some checks here. */
-  if (isect_P != NULL)
-    *isect_P = hit;
-  if (isect_t != NULL)
-    *isect_t = t;
-  if (isect_u != NULL)
-    *isect_u = u + 0.5f;
-  if (isect_v != NULL)
-    *isect_v = v + 0.5f;
-  return true;
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_INTERSECT_H__ */
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
deleted file mode 100644
index bff7ddb4cee..00000000000
--- a/intern/cycles/util/util_math_matrix.h
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MATH_MATRIX_H__
-#define __UTIL_MATH_MATRIX_H__
-
-CCL_NAMESPACE_BEGIN
-
-#define MAT(A, size, row, col) A[(row) * (size) + (col)]
-
-/* Variants that use a constant stride on GPUS. */
-#ifdef __KERNEL_GPU__
-#  define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)]
-/* Element access when only the lower-triangular elements are stored. */
-#  define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)]
-#  define VECS(V, i, s) V[(i) * (s)]
-#else
-#  define MATS(A, n, r, c, s) MAT(A, n, r, c)
-#  define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)]
-#  define VECS(V, i, s) V[i]
-#endif
-
-/* Zeroing helpers. */
-
-ccl_device_inline void math_vector_zero(ccl_private float *v, int n)
-{
-  for (int i = 0; i < n; i++) {
-    v[i] = 0.0f;
-  }
-}
-
-ccl_device_inline void math_matrix_zero(ccl_private float *A, int n)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      MAT(A, n, row, col) = 0.0f;
-    }
-  }
-}
-
-/* Elementary vector operations. */
-
-ccl_device_inline void math_vector_add(ccl_private float *a,
-                                       ccl_private const float *ccl_restrict b,
-                                       int n)
-{
-  for (int i = 0; i < n; i++) {
-    a[i] += b[i];
-  }
-}
-
-ccl_device_inline void math_vector_mul(ccl_private float *a,
-                                       ccl_private const float *ccl_restrict b,
-                                       int n)
-{
-  for (int i = 0; i < n; i++) {
-    a[i] *= b[i];
-  }
-}
-
-ccl_device_inline void math_vector_mul_strided(ccl_global float *a,
-                                               ccl_private const float *ccl_restrict b,
-                                               int astride,
-                                               int n)
-{
-  for (int i = 0; i < n; i++) {
-    a[i * astride] *= b[i];
-  }
-}
-
-ccl_device_inline void math_vector_scale(ccl_private float *a, float b, int n)
-{
-  for (int i = 0; i < n; i++) {
-    a[i] *= b;
-  }
-}
-
-ccl_device_inline void math_vector_max(ccl_private float *a,
-                                       ccl_private const float *ccl_restrict b,
-                                       int n)
-{
-  for (int i = 0; i < n; i++) {
-    a[i] = max(a[i], b[i]);
-  }
-}
-
-ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private float *x, float3 w)
-{
-  for (int i = 0; i < n; i++) {
-    v[i] += w * x[i];
-  }
-}
-
-ccl_device_inline void math_vec3_add_strided(
-    ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride)
-{
-  for (int i = 0; i < n; i++) {
-    ccl_global float *elem = (ccl_global float *)(v + i * stride);
-    atomic_add_and_fetch_float(elem + 0, w.x * x[i]);
-    atomic_add_and_fetch_float(elem + 1, w.y * x[i]);
-    atomic_add_and_fetch_float(elem + 2, w.z * x[i]);
-  }
-}
-
-/* Elementary matrix operations.
- * Note: TriMatrix refers to a square matrix that is symmetric,
- * and therefore its upper-triangular part isn't stored. */
-
-ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A,
-                                                   int n,
-                                                   float val,
-                                                   int stride)
-{
-  for (int row = 0; row < n; row++) {
-    MATHS(A, row, row, stride) += val;
-  }
-}
-
-/* Add Gramian matrix of v to A.
- * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian(ccl_private float *A,
-                                               int n,
-                                               ccl_private const float *ccl_restrict v,
-                                               float weight)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      MAT(A, n, row, col) += v[row] * v[col] * weight;
-    }
-  }
-}
-
-/* Add Gramian matrix of v to A.
- * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_trimatrix_add_gramian_strided(
-    ccl_global float *A, int n, ccl_private const float *ccl_restrict v, float weight, int stride)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight);
-    }
-  }
-}
-
-ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A,
-                                                  int n,
-                                                  ccl_private const float *ccl_restrict v,
-                                                  float weight)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      MATHS(A, row, col, 1) += v[row] * v[col] * weight;
-    }
-  }
-}
-
-/* Transpose matrix A in place. */
-ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride)
-{
-  for (int i = 0; i < n; i++) {
-    for (int j = 0; j < i; j++) {
-      float temp = MATS(A, n, i, j, stride);
-      MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride);
-      MATS(A, n, j, i, stride) = temp;
-    }
-  }
-}
-
-/* Solvers for matrix problems */
-
-/* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A
- * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L.
- * Also, only the lower triangular part of A is ever accessed. */
-ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      float sum_col = MATHS(A, row, col, stride);
-      for (int k = 0; k < col; k++) {
-        sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride);
-      }
-      if (row == col) {
-        sum_col = sqrtf(max(sum_col, 0.0f));
-      }
-      else {
-        sum_col /= MATHS(A, col, col, stride);
-      }
-      MATHS(A, row, col, stride) = sum_col;
-    }
-  }
-}
-
-/* Solve A*S=y for S given A and y,
- * where A is symmetrical positive-semi-definite and both inputs are destroyed in the process.
- *
- * We can apply Cholesky decomposition to find a lower triangular L so that L*Lt = A.
- * With that we get (L*Lt)*S = L*(Lt*S) = L*b = y, defining b as Lt*S.
- * Since L is lower triangular, finding b is relatively easy since y is known.
- * Then, the remaining problem is Lt*S = b, which again can be solved easily.
- *
- * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is
- * symmetrical positive-semidefinite by construction,
- * so we can just use this function with A=Xt*W*X and y=Xt*W*y. */
-ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A,
-                                                 ccl_global float3 *y,
-                                                 int n,
-                                                 int stride)
-{
-  /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good
-   * heuristic for the amount of pixels considered (with weighting),
-   * therefore the amount of correction is scaled based on it. */
-  math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */
-  math_trimatrix_cholesky(A, n, stride);                   /* Replace A with L so that L*Lt = A. */
-
-  /* Use forward substitution to solve L*b = y, replacing y by b. */
-  for (int row = 0; row < n; row++) {
-    float3 sum = VECS(y, row, stride);
-    for (int col = 0; col < row; col++)
-      sum -= MATHS(A, row, col, stride) * VECS(y, col, stride);
-    VECS(y, row, stride) = sum / MATHS(A, row, row, stride);
-  }
-
-  /* Use backward substitution to solve Lt*S = b, replacing b by S. */
-  for (int row = n - 1; row >= 0; row--) {
-    float3 sum = VECS(y, row, stride);
-    for (int col = row + 1; col < n; col++)
-      sum -= MATHS(A, col, row, stride) * VECS(y, col, stride);
-    VECS(y, row, stride) = sum / MATHS(A, row, row, stride);
-  }
-}
-
-/* Perform the Jacobi Eigenvalue Method on matrix A.
- * A is assumed to be a symmetrical matrix, therefore only the lower-triangular part is ever
- * accessed. The algorithm overwrites the contents of A.
- *
- * After returning, A will be overwritten with D, which is (almost) diagonal,
- * and V will contain the eigenvectors of the original A in its rows (!),
- * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A.
- */
-ccl_device void math_matrix_jacobi_eigendecomposition(ccl_private float *A,
-                                                      ccl_global float *V,
-                                                      int n,
-                                                      int v_stride)
-{
-  const float singular_epsilon = 1e-9f;
-
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col < n; col++) {
-      MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f;
-    }
-  }
-
-  for (int sweep = 0; sweep < 8; sweep++) {
-    float off_diagonal = 0.0f;
-    for (int row = 1; row < n; row++) {
-      for (int col = 0; col < row; col++) {
-        off_diagonal += fabsf(MAT(A, n, row, col));
-      }
-    }
-    if (off_diagonal < 1e-7f) {
-      /* The matrix has nearly reached diagonal form.
-       * Since the eigenvalues are only used to determine truncation, their exact values aren't
-       * required - a relative error of a few ULPs won't matter at all. */
-      break;
-    }
-
-    /* Set the threshold for the small element rotation skip in the first sweep:
-     * Skip all elements that are less than a tenth of the average off-diagonal element. */
-    float threshold = 0.2f * off_diagonal / (n * n);
-
-    for (int row = 1; row < n; row++) {
-      for (int col = 0; col < row; col++) {
-        /* Perform a Jacobi rotation on this element that reduces it to zero. */
-        float element = MAT(A, n, row, col);
-        float abs_element = fabsf(element);
-
-        /* If we're in a later sweep and the element already is very small,
-         * just set it to zero and skip the rotation. */
-        if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) &&
-            abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) {
-          MAT(A, n, row, col) = 0.0f;
-          continue;
-        }
-
-        if (element == 0.0f) {
-          continue;
-        }
-
-        /* If we're in one of the first sweeps and the element is smaller than the threshold,
-         * skip it. */
-        if (sweep < 3 && (abs_element < threshold)) {
-          continue;
-        }
-
-        /* Determine rotation: The rotation is characterized by its angle phi - or,
-         * in the actual implementation, sin(phi) and cos(phi).
-         * To find those, we first compute their ratio - that might be unstable if the angle
-         * approaches 90°, so there's a fallback for that case.
-         * Then, we compute sin(phi) and cos(phi) themselves. */
-        float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col);
-        float ratio;
-        if (abs_element > singular_epsilon * fabsf(singular_diff)) {
-          float cot_2phi = 0.5f * singular_diff / element;
-          ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi * cot_2phi));
-          if (cot_2phi < 0.0f)
-            ratio = -ratio; /* Copy sign. */
-        }
-        else {
-          ratio = element / singular_diff;
-        }
-
-        float c = 1.0f / sqrtf(1.0f + ratio * ratio);
-        float s = ratio * c;
-        /* To improve numerical stability by avoiding cancellation, the update equations are
-         * reformulized to use sin(phi) and tan(phi/2) instead. */
-        float tan_phi_2 = s / (1.0f + c);
-
-        /* Update the singular values in the diagonal. */
-        float singular_delta = ratio * element;
-        MAT(A, n, row, row) += singular_delta;
-        MAT(A, n, col, col) -= singular_delta;
-
-        /* Set the element itself to zero. */
-        MAT(A, n, row, col) = 0.0f;
-
-        /* Perform the actual rotations on the matrices. */
-#define ROT(M, r1, c1, r2, c2, stride) \
-  { \
-    float M1 = MATS(M, n, r1, c1, stride); \
-    float M2 = MATS(M, n, r2, c2, stride); \
-    MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \
-    MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \
-  }
-
-        /* Split into three parts to ensure correct accesses since we only store the
-         * lower-triangular part of A. */
-        for (int i = 0; i < col; i++)
-          ROT(A, col, i, row, i, 1);
-        for (int i = col + 1; i < row; i++)
-          ROT(A, i, col, row, i, 1);
-        for (int i = row + 1; i < n; i++)
-          ROT(A, i, col, i, row, 1);
-
-        for (int i = 0; i < n; i++)
-          ROT(V, col, i, row, i, v_stride);
-#undef ROT
-      }
-    }
-  }
-
-  /* Sort eigenvalues and the associated eigenvectors. */
-  for (int i = 0; i < n - 1; i++) {
-    float v = MAT(A, n, i, i);
-    int k = i;
-    for (int j = i; j < n; j++) {
-      if (MAT(A, n, j, j) >= v) {
-        v = MAT(A, n, j, j);
-        k = j;
-      }
-    }
-    if (k != i) {
-      /* Swap eigenvalues. */
-      MAT(A, n, k, k) = MAT(A, n, i, i);
-      MAT(A, n, i, i) = v;
-      /* Swap eigenvectors. */
-      for (int j = 0; j < n; j++) {
-        float v = MATS(V, n, i, j, v_stride);
-        MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride);
-        MATS(V, n, k, j, v_stride) = v;
-      }
-    }
-  }
-}
-
-#ifdef __KERNEL_SSE3__
-ccl_device_inline void math_vector_zero_sse(float4 *A, int n)
-{
-  for (int i = 0; i < n; i++) {
-    A[i] = make_float4(0.0f);
-  }
-}
-
-ccl_device_inline void math_matrix_zero_sse(float4 *A, int n)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      MAT(A, n, row, col) = make_float4(0.0f);
-    }
-  }
-}
-
-/* Add Gramian matrix of v to A.
- * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */
-ccl_device_inline void math_matrix_add_gramian_sse(float4 *A,
-                                                   int n,
-                                                   const float4 *ccl_restrict v,
-                                                   float4 weight)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight;
-    }
-  }
-}
-
-ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a)
-{
-  for (int i = 0; i < n; i++) {
-    V[i] += a[i];
-  }
-}
-
-ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a)
-{
-  for (int i = 0; i < n; i++) {
-    V[i] *= a[i];
-  }
-}
-
-ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n)
-{
-  for (int i = 0; i < n; i++) {
-    a[i] = max(a[i], b[i]);
-  }
-}
-
-ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B)
-{
-  for (int row = 0; row < n; row++) {
-    for (int col = 0; col <= row; col++) {
-      MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0];
-    }
-  }
-}
-#endif
-
-#undef MAT
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MATH_MATRIX_H__ */
diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp
deleted file mode 100644
index 0df521c2b58..00000000000
--- a/intern/cycles/util/util_md5.cpp
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
- *
- * This software is provided 'as-is', without any express or implied
- * warranty.  In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- *  claim that you wrote the original software. If you use this software
- *  in a product, an acknowledgment in the product documentation would be
- *  appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- *  misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- *
- * L. Peter Deutsch
- * ghost@aladdin.com
- */
-
-/* Minor modifications done to remove some code and change style. */
-
-#include "util_md5.h"
-#include "util_path.h"
-
-#include <stdio.h>
-#include <string.h>
-
-CCL_NAMESPACE_BEGIN
-
-#define T_MASK ((uint32_t)~0)
-#define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87)
-#define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9)
-#define T3 0x242070db
-#define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111)
-#define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050)
-#define T6 0x4787c62a
-#define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec)
-#define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe)
-#define T9 0x698098d8
-#define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850)
-#define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e)
-#define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841)
-#define T13 0x6b901122
-#define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c)
-#define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71)
-#define T16 0x49b40821
-#define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d)
-#define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf)
-#define T19 0x265e5a51
-#define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855)
-#define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2)
-#define T22 0x02441453
-#define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e)
-#define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437)
-#define T25 0x21e1cde6
-#define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829)
-#define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278)
-#define T28 0x455a14ed
-#define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa)
-#define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07)
-#define T31 0x676f02d9
-#define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375)
-#define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd)
-#define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e)
-#define T35 0x6d9d6122
-#define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3)
-#define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb)
-#define T38 0x4bdecfa9
-#define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f)
-#define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f)
-#define T41 0x289b7ec6
-#define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805)
-#define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a)
-#define T44 0x04881d05
-#define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6)
-#define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a)
-#define T47 0x1fa27cf8
-#define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a)
-#define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb)
-#define T50 0x432aff97
-#define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58)
-#define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6)
-#define T53 0x655b59c3
-#define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d)
-#define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82)
-#define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e)
-#define T57 0x6fa87e4f
-#define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f)
-#define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb)
-#define T60 0x4e0811a1
-#define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d)
-#define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca)
-#define T63 0x2ad7d2bb
-#define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e)
-
-void MD5Hash::process(const uint8_t *data /*[64]*/)
-{
-  uint32_t a = abcd[0], b = abcd[1], c = abcd[2], d = abcd[3];
-  uint32_t t;
-  /* Define storage for little-endian or both types of CPUs. */
-  uint32_t xbuf[16];
-  const uint32_t *X;
-
-  {
-    /*
-     * Determine dynamically whether this is a big-endian or
-     * little-endian machine, since we can use a more efficient
-     * algorithm on the latter.
-     */
-    static const int w = 1;
-
-    if (*((const uint8_t *)&w)) /* dynamic little-endian */
-    {
-      /*
-       * On little-endian machines, we can process properly aligned
-       * data without copying it.
-       */
-      if (!((data - (const uint8_t *)0) & 3)) {
-        /* data are properly aligned */
-        X = (const uint32_t *)data;
-      }
-      else {
-        /* not aligned */
-        memcpy(xbuf, data, 64);
-        X = xbuf;
-      }
-    }
-    else { /* dynamic big-endian */
-      /*
-       * On big-endian machines, we must arrange the bytes in the
-       * right order.
-       */
-      const uint8_t *xp = data;
-      int i;
-
-      X = xbuf; /* (dynamic only) */
-      for (i = 0; i < 16; ++i, xp += 4)
-        xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24);
-    }
-  }
-
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
-
-  /* Round 1. */
-  /* Let [abcd k s i] denote the operation
-   * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
-#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
-#define SET(a, b, c, d, k, s, Ti) \
-  t = a + F(b, c, d) + X[k] + Ti; \
-  a = ROTATE_LEFT(t, s) + b
-  /* Do the following 16 operations. */
-  SET(a, b, c, d, 0, 7, T1);
-  SET(d, a, b, c, 1, 12, T2);
-  SET(c, d, a, b, 2, 17, T3);
-  SET(b, c, d, a, 3, 22, T4);
-  SET(a, b, c, d, 4, 7, T5);
-  SET(d, a, b, c, 5, 12, T6);
-  SET(c, d, a, b, 6, 17, T7);
-  SET(b, c, d, a, 7, 22, T8);
-  SET(a, b, c, d, 8, 7, T9);
-  SET(d, a, b, c, 9, 12, T10);
-  SET(c, d, a, b, 10, 17, T11);
-  SET(b, c, d, a, 11, 22, T12);
-  SET(a, b, c, d, 12, 7, T13);
-  SET(d, a, b, c, 13, 12, T14);
-  SET(c, d, a, b, 14, 17, T15);
-  SET(b, c, d, a, 15, 22, T16);
-#undef SET
-
-  /* Round 2. */
-  /* Let [abcd k s i] denote the operation
-   * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
-#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
-#define SET(a, b, c, d, k, s, Ti) \
-  t = a + G(b, c, d) + X[k] + Ti; \
-  a = ROTATE_LEFT(t, s) + b
-  /* Do the following 16 operations. */
-  SET(a, b, c, d, 1, 5, T17);
-  SET(d, a, b, c, 6, 9, T18);
-  SET(c, d, a, b, 11, 14, T19);
-  SET(b, c, d, a, 0, 20, T20);
-  SET(a, b, c, d, 5, 5, T21);
-  SET(d, a, b, c, 10, 9, T22);
-  SET(c, d, a, b, 15, 14, T23);
-  SET(b, c, d, a, 4, 20, T24);
-  SET(a, b, c, d, 9, 5, T25);
-  SET(d, a, b, c, 14, 9, T26);
-  SET(c, d, a, b, 3, 14, T27);
-  SET(b, c, d, a, 8, 20, T28);
-  SET(a, b, c, d, 13, 5, T29);
-  SET(d, a, b, c, 2, 9, T30);
-  SET(c, d, a, b, 7, 14, T31);
-  SET(b, c, d, a, 12, 20, T32);
-#undef SET
-
-  /* Round 3. */
-  /* Let [abcd k s t] denote the operation
-   * a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define SET(a, b, c, d, k, s, Ti) \
-  t = a + H(b, c, d) + X[k] + Ti; \
-  a = ROTATE_LEFT(t, s) + b
-  /* Do the following 16 operations. */
-  SET(a, b, c, d, 5, 4, T33);
-  SET(d, a, b, c, 8, 11, T34);
-  SET(c, d, a, b, 11, 16, T35);
-  SET(b, c, d, a, 14, 23, T36);
-  SET(a, b, c, d, 1, 4, T37);
-  SET(d, a, b, c, 4, 11, T38);
-  SET(c, d, a, b, 7, 16, T39);
-  SET(b, c, d, a, 10, 23, T40);
-  SET(a, b, c, d, 13, 4, T41);
-  SET(d, a, b, c, 0, 11, T42);
-  SET(c, d, a, b, 3, 16, T43);
-  SET(b, c, d, a, 6, 23, T44);
-  SET(a, b, c, d, 9, 4, T45);
-  SET(d, a, b, c, 12, 11, T46);
-  SET(c, d, a, b, 15, 16, T47);
-  SET(b, c, d, a, 2, 23, T48);
-#undef SET
-
-  /* Round 4. */
-  /* Let [abcd k s t] denote the operation
-   * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
-#define I(x, y, z) ((y) ^ ((x) | ~(z)))
-#define SET(a, b, c, d, k, s, Ti) \
-  t = a + I(b, c, d) + X[k] + Ti; \
-  a = ROTATE_LEFT(t, s) + b
-  /* Do the following 16 operations. */
-  SET(a, b, c, d, 0, 6, T49);
-  SET(d, a, b, c, 7, 10, T50);
-  SET(c, d, a, b, 14, 15, T51);
-  SET(b, c, d, a, 5, 21, T52);
-  SET(a, b, c, d, 12, 6, T53);
-  SET(d, a, b, c, 3, 10, T54);
-  SET(c, d, a, b, 10, 15, T55);
-  SET(b, c, d, a, 1, 21, T56);
-  SET(a, b, c, d, 8, 6, T57);
-  SET(d, a, b, c, 15, 10, T58);
-  SET(c, d, a, b, 6, 15, T59);
-  SET(b, c, d, a, 13, 21, T60);
-  SET(a, b, c, d, 4, 6, T61);
-  SET(d, a, b, c, 11, 10, T62);
-  SET(c, d, a, b, 2, 15, T63);
-  SET(b, c, d, a, 9, 21, T64);
-#undef SET
-
-  /* Then perform the following additions. (That is increment each
-   * of the four registers by the value it had before this block
-   * was started.) */
-  abcd[0] += a;
-  abcd[1] += b;
-  abcd[2] += c;
-  abcd[3] += d;
-}
-
-MD5Hash::MD5Hash()
-{
-  count[0] = count[1] = 0;
-  abcd[0] = 0x67452301;
-  abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476;
-  abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301;
-  abcd[3] = 0x10325476;
-}
-
-MD5Hash::~MD5Hash()
-{
-}
-
-void MD5Hash::append(const uint8_t *data, int nbytes)
-{
-  const uint8_t *p = data;
-  int left = nbytes;
-  int offset = (count[0] >> 3) & 63;
-  uint32_t nbits = (uint32_t)(nbytes << 3);
-
-  if (nbytes <= 0)
-    return;
-
-  /* Update the message length. */
-  count[1] += nbytes >> 29;
-  count[0] += nbits;
-  if (count[0] < nbits)
-    count[1]++;
-
-  /* Process an initial partial block. */
-  if (offset) {
-    int copy = (offset + nbytes > 64 ? 64 - offset : nbytes);
-
-    memcpy(buf + offset, p, copy);
-    if (offset + copy < 64)
-      return;
-    p += copy;
-    left -= copy;
-    process(buf);
-  }
-
-  /* Process full blocks. */
-  for (; left >= 64; p += 64, left -= 64)
-    process(p);
-
-  /* Process a final partial block. */
-  if (left)
-    memcpy(buf, p, left);
-}
-
-void MD5Hash::append(const string &str)
-{
-  if (str.size()) {
-    append((const uint8_t *)str.c_str(), str.size());
-  }
-}
-
-bool MD5Hash::append_file(const string &filepath)
-{
-  FILE *f = path_fopen(filepath, "rb");
-
-  if (!f) {
-    fprintf(stderr, "MD5: failed to open file %s\n", filepath.c_str());
-    return false;
-  }
-
-  const size_t buffer_size = 1024;
-  uint8_t buffer[buffer_size];
-  size_t n;
-
-  do {
-    n = fread(buffer, 1, buffer_size, f);
-    append(buffer, n);
-  } while (n == buffer_size);
-
-  bool success = (ferror(f) == 0);
-
-  fclose(f);
-
-  return success;
-}
-
-void MD5Hash::finish(uint8_t digest[16])
-{
-  static const uint8_t pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                  0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                  0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                  0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
-  uint8_t data[8];
-  int i;
-
-  /* Save the length before padding. */
-  for (i = 0; i < 8; ++i)
-    data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3));
-
-  /* Pad to 56 bytes mod 64. */
-  append(pad, ((55 - (count[0] >> 3)) & 63) + 1);
-  /* Append the length. */
-  append(data, 8);
-
-  for (i = 0; i < 16; ++i)
-    digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3));
-}
-
-string MD5Hash::get_hex()
-{
-  uint8_t digest[16];
-  char buf[16 * 2 + 1];
-
-  finish(digest);
-
-  for (int i = 0; i < 16; i++)
-    sprintf(buf + i * 2, "%02X", (unsigned int)digest[i]);
-  buf[sizeof(buf) - 1] = '\0';
-
-  return string(buf);
-}
-
-string util_md5_string(const string &str)
-{
-  MD5Hash md5;
-  md5.append((uint8_t *)str.c_str(), str.size());
-  return md5.get_hex();
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_md5.h b/intern/cycles/util/util_md5.h
deleted file mode 100644
index 3102a0f4bad..00000000000
--- a/intern/cycles/util/util_md5.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 1999, 2002 Aladdin Enterprises.  All rights reserved.
- *
- * This software is provided 'as-is', without any express or implied
- * warranty.  In no event will the authors be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- *    claim that you wrote the original software. If you use this software
- *    in a product, an acknowledgment in the product documentation would be
- *    appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- *    misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- *
- * L. Peter Deutsch
- * ghost@aladdin.com
- */
-
-/* MD5
- *
- * Simply MD5 hash computation, used by disk cache. Adapted from external
- * code, with minor code modifications done to remove some unused code and
- * change code style. */
-
-#ifndef __UTIL_MD5_H__
-#define __UTIL_MD5_H__
-
-#include "util/util_string.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-class MD5Hash {
- public:
-  MD5Hash();
-  ~MD5Hash();
-
-  void append(const uint8_t *data, int size);
-  void append(const string &str);
-  bool append_file(const string &filepath);
-  string get_hex();
-
- protected:
-  void process(const uint8_t *data);
-  void finish(uint8_t digest[16]);
-
-  uint32_t count[2]; /* message length in bits, LSW first. */
-  uint32_t abcd[4];  /* digest buffer */
-  uint8_t buf[64];   /* accumulate block */
-};
-
-string util_md5_string(const string &str);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MD5_H__ */
diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp
deleted file mode 100644
index 5d728769fe9..00000000000
--- a/intern/cycles/util/util_murmurhash.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright 2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is taken from alShaders/Cryptomatte/MurmurHash3.h:
- *
- * MurmurHash3 was written by Austin Appleby, and is placed in the public
- * domain. The author hereby disclaims copyright to this source code.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "util/util_algorithm.h"
-#include "util/util_murmurhash.h"
-
-#if defined(_MSC_VER)
-#  define ROTL32(x, y) _rotl(x, y)
-#  define ROTL64(x, y) _rotl64(x, y)
-#  define BIG_CONSTANT(x) (x)
-#else
-ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r)
-{
-  return (x << r) | (x >> (32 - r));
-}
-#  define ROTL32(x, y) rotl32(x, y)
-#  define BIG_CONSTANT(x) (x##LLU)
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Block read - if your platform needs to do endian-swapping or can only
- * handle aligned reads, do the conversion here. */
-ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i)
-{
-  return p[i];
-}
-
-/* Finalization mix - force all bits of a hash block to avalanche */
-ccl_device_inline uint32_t mm_hash_fmix32(uint32_t h)
-{
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-  return h;
-}
-
-uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed)
-{
-  const uint8_t *data = (const uint8_t *)key;
-  const int nblocks = len / 4;
-
-  uint32_t h1 = seed;
-
-  const uint32_t c1 = 0xcc9e2d51;
-  const uint32_t c2 = 0x1b873593;
-
-  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
-
-  for (int i = -nblocks; i; i++) {
-    uint32_t k1 = mm_hash_getblock32(blocks, i);
-
-    k1 *= c1;
-    k1 = ROTL32(k1, 15);
-    k1 *= c2;
-
-    h1 ^= k1;
-    h1 = ROTL32(h1, 13);
-    h1 = h1 * 5 + 0xe6546b64;
-  }
-
-  const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
-
-  uint32_t k1 = 0;
-
-  switch (len & 3) {
-    case 3:
-      k1 ^= tail[2] << 16;
-      ATTR_FALLTHROUGH;
-    case 2:
-      k1 ^= tail[1] << 8;
-      ATTR_FALLTHROUGH;
-    case 1:
-      k1 ^= tail[0];
-      k1 *= c1;
-      k1 = ROTL32(k1, 15);
-      k1 *= c2;
-      h1 ^= k1;
-  }
-
-  h1 ^= len;
-  h1 = mm_hash_fmix32(h1);
-  return h1;
-}
-
-/* This is taken from the cryptomatte specification 1.0 */
-float util_hash_to_float(uint32_t hash)
-{
-  uint32_t mantissa = hash & ((1 << 23) - 1);
-  uint32_t exponent = (hash >> 23) & ((1 << 8) - 1);
-  exponent = max(exponent, (uint32_t)1);
-  exponent = min(exponent, (uint32_t)254);
-  exponent = exponent << 23;
-  uint32_t sign = (hash >> 31);
-  sign = sign << 31;
-  uint32_t float_bits = sign | exponent | mantissa;
-  float f;
-  memcpy(&f, &float_bits, sizeof(uint32_t));
-  return f;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h
deleted file mode 100644
index 2ec87efd87a..00000000000
--- a/intern/cycles/util/util_murmurhash.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_MURMURHASH_H__
-#define __UTIL_MURMURHASH_H__
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed);
-float util_hash_to_float(uint32_t hash);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_MURMURHASH_H__ */
diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h
deleted file mode 100644
index 7a8d5eec1f9..00000000000
--- a/intern/cycles/util/util_opengl.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_OPENGL_H__
-#define __UTIL_OPENGL_H__
-
-/* OpenGL header includes, used everywhere we use OpenGL, to deal with
- * platform differences in one central place. */
-
-#include <GL/glew.h>
-
-#endif /* __UTIL_OPENGL_H__ */
diff --git a/intern/cycles/util/util_openimagedenoise.h b/intern/cycles/util/util_openimagedenoise.h
deleted file mode 100644
index 898c634141e..00000000000
--- a/intern/cycles/util/util_openimagedenoise.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_OPENIMAGEDENOISE_H__
-#define __UTIL_OPENIMAGEDENOISE_H__
-
-#ifdef WITH_OPENIMAGEDENOISE
-#  include <OpenImageDenoise/oidn.hpp>
-#endif
-
-#include "util_system.h"
-
-CCL_NAMESPACE_BEGIN
-
-static inline bool openimagedenoise_supported()
-{
-#ifdef WITH_OPENIMAGEDENOISE
-#  ifdef __APPLE__
-  /* Always supported through Accelerate framework BNNS. */
-  return true;
-#  else
-  return system_cpu_support_sse41();
-#  endif
-#else
-  return false;
-#endif
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_OPENIMAGEDENOISE_H__ */
diff --git a/intern/cycles/util/util_openvdb.h b/intern/cycles/util/util_openvdb.h
deleted file mode 100644
index ae5326e3199..00000000000
--- a/intern/cycles/util/util_openvdb.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_OPENVDB_H__
-#define __UTIL_OPENVDB_H__
-
-#ifdef WITH_OPENVDB
-#  include <openvdb/openvdb.h>
-
-namespace openvdb {
-
-using Vec4fTree = tree::Tree4<Vec4f, 5, 4, 3>::Type;
-using Vec4fGrid = Grid<Vec4fTree>;
-
-/* Apply operation to known grid types. */
-template<typename OpType>
-bool grid_type_operation(const openvdb::GridBase::ConstPtr &grid, OpType &&op)
-{
-  if (grid->isType<openvdb::FloatGrid>()) {
-    return op.template operator()<openvdb::FloatGrid, openvdb::FloatGrid, float, 1>(grid);
-  }
-  else if (grid->isType<openvdb::Vec3fGrid>()) {
-    return op.template operator()<openvdb::Vec3fGrid, openvdb::Vec3fGrid, openvdb::Vec3f, 3>(grid);
-  }
-  else if (grid->isType<openvdb::BoolGrid>()) {
-    return op.template operator()<openvdb::BoolGrid, openvdb::FloatGrid, float, 1>(grid);
-  }
-  else if (grid->isType<openvdb::DoubleGrid>()) {
-    return op.template operator()<openvdb::DoubleGrid, openvdb::FloatGrid, float, 1>(grid);
-  }
-  else if (grid->isType<openvdb::Int32Grid>()) {
-    return op.template operator()<openvdb::Int32Grid, openvdb::FloatGrid, float, 1>(grid);
-  }
-  else if (grid->isType<openvdb::Int64Grid>()) {
-    return op.template operator()<openvdb::Int64Grid, openvdb::FloatGrid, float, 1>(grid);
-  }
-  else if (grid->isType<openvdb::Vec3IGrid>()) {
-    return op.template operator()<openvdb::Vec3IGrid, openvdb::Vec3fGrid, openvdb::Vec3f, 3>(grid);
-  }
-  else if (grid->isType<openvdb::Vec3dGrid>()) {
-    return op.template operator()<openvdb::Vec3dGrid, openvdb::Vec3fGrid, openvdb::Vec3f, 3>(grid);
-  }
-  else if (grid->isType<openvdb::MaskGrid>()) {
-    return op.template operator()<openvdb::MaskGrid, openvdb::FloatGrid, float, 1>(grid);
-  }
-  else {
-    return false;
-  }
-}
-
-};  // namespace openvdb
-
-#endif
-
-#endif /* __UTIL_OPENVDB_H__ */
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
deleted file mode 100644
index 7ecd3893cf4..00000000000
--- a/intern/cycles/util/util_optimization.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_OPTIMIZATION_H__
-#define __UTIL_OPTIMIZATION_H__
-
-#ifndef __KERNEL_GPU__
-
-/* x86
- *
- * Compile a regular, SSE2 and SSE3 kernel. */
-
-#  if defined(i386) || defined(_M_IX86)
-
-/* We require minimum SSE2 support on x86, so auto enable. */
-#    define __KERNEL_SSE2__
-#    ifdef WITH_KERNEL_SSE2
-#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-#    endif
-#    ifdef WITH_KERNEL_SSE3
-#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-#    endif
-
-/* x86-64
- *
- * Compile a regular (includes SSE2), SSE3, SSE 4.1, AVX and AVX2 kernel. */
-
-#  elif defined(__x86_64__) || defined(_M_X64)
-
-/* SSE2 is always available on x86-64 CPUs, so auto enable */
-#    define __KERNEL_SSE2__
-/* no SSE2 kernel on x86-64, part of regular kernel */
-#    ifdef WITH_KERNEL_SSE3
-#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-#    endif
-#    ifdef WITH_KERNEL_SSE41
-#      define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
-#    endif
-#    ifdef WITH_KERNEL_AVX
-#      define WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-#    endif
-#    ifdef WITH_KERNEL_AVX2
-#      define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-#    endif
-
-/* Arm Neon
- *
- * Compile a SSE4 kernel emulated with Neon. Most code is shared with
- * SSE, some specializations for performance and compatibility are made
- * made testing for __KERNEL_NEON__. */
-
-#  elif defined(__ARM_NEON) && defined(WITH_SSE2NEON)
-
-#    define __KERNEL_NEON__
-#    define __KERNEL_SSE__
-#    define __KERNEL_SSE2__
-#    define __KERNEL_SSE3__
-#    define __KERNEL_SSE41__
-
-#  endif
-
-#endif
-
-#endif /* __UTIL_OPTIMIZATION_H__ */
diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h
deleted file mode 100644
index 3f8e2d6d700..00000000000
--- a/intern/cycles/util/util_param.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_PARAM_H__
-#define __UTIL_PARAM_H__
-
-/* Parameter value lists from OpenImageIO are used to store custom properties
- * on various data, which can then later be used in shaders. */
-
-#include <OpenImageIO/paramlist.h>
-#include <OpenImageIO/typedesc.h>
-#include <OpenImageIO/ustring.h>
-
-CCL_NAMESPACE_BEGIN
-
-OIIO_NAMESPACE_USING
-
-static constexpr TypeDesc TypeFloat2(TypeDesc::FLOAT, TypeDesc::VEC2);
-static constexpr TypeDesc TypeRGBA(TypeDesc::FLOAT, TypeDesc::VEC4, TypeDesc::COLOR);
-static constexpr TypeDesc TypeFloatArray4(TypeDesc::FLOAT,
-                                          TypeDesc::SCALAR,
-                                          TypeDesc::NOSEMANTICS,
-                                          4);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_PARAM_H__ */
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
deleted file mode 100644
index c78f4615013..00000000000
--- a/intern/cycles/util/util_path.cpp
+++ /dev/null
@@ -1,781 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_path.h"
-#include "util/util_md5.h"
-#include "util/util_string.h"
-
-#include <OpenImageIO/filesystem.h>
-#include <OpenImageIO/strutil.h>
-#include <OpenImageIO/sysutil.h>
-
-OIIO_NAMESPACE_USING
-
-#include <stdio.h>
-
-#include <sys/stat.h>
-
-#if defined(_WIN32)
-#  define DIR_SEP '\\'
-#  define DIR_SEP_ALT '/'
-#  include <direct.h>
-#else
-#  define DIR_SEP '/'
-#  include <dirent.h>
-#  include <pwd.h>
-#  include <sys/types.h>
-#  include <unistd.h>
-#endif
-
-#ifdef HAVE_SHLWAPI_H
-#  include <shlwapi.h>
-#endif
-
-#include "util/util_map.h"
-#include "util/util_windows.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef _WIN32
-#  if defined(_MSC_VER) || defined(__MINGW64__)
-typedef struct _stat64 path_stat_t;
-#  elif defined(__MINGW32__)
-typedef struct _stati64 path_stat_t;
-#  else
-typedef struct _stat path_stat_t;
-#  endif
-#  ifndef S_ISDIR
-#    define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR)
-#  endif
-#else
-typedef struct stat path_stat_t;
-#endif
-
-static string cached_path = "";
-static string cached_user_path = "";
-static string cached_temp_path = "";
-static string cached_xdg_cache_path = "";
-
-namespace {
-
-#ifdef _WIN32
-class directory_iterator {
- public:
-  class path_info {
-   public:
-    path_info(const string &path, const WIN32_FIND_DATAW &find_data)
-        : path_(path), find_data_(find_data)
-    {
-    }
-
-    string path()
-    {
-      return path_join(path_, string_from_wstring(find_data_.cFileName));
-    }
-
-   protected:
-    const string &path_;
-    const WIN32_FIND_DATAW &find_data_;
-  };
-
-  directory_iterator() : path_info_("", find_data_), h_find_(INVALID_HANDLE_VALUE)
-  {
-  }
-
-  explicit directory_iterator(const string &path) : path_(path), path_info_(path, find_data_)
-  {
-    string wildcard = path;
-    if (wildcard[wildcard.size() - 1] != DIR_SEP) {
-      wildcard += DIR_SEP;
-    }
-    wildcard += "*";
-    h_find_ = FindFirstFileW(string_to_wstring(wildcard).c_str(), &find_data_);
-    if (h_find_ != INVALID_HANDLE_VALUE) {
-      skip_dots();
-    }
-  }
-
-  ~directory_iterator()
-  {
-    if (h_find_ != INVALID_HANDLE_VALUE) {
-      FindClose(h_find_);
-    }
-  }
-
-  directory_iterator &operator++()
-  {
-    step();
-    return *this;
-  }
-
-  path_info *operator->()
-  {
-    return &path_info_;
-  }
-
-  bool operator!=(const directory_iterator &other)
-  {
-    return h_find_ != other.h_find_;
-  }
-
- protected:
-  bool step()
-  {
-    if (do_step()) {
-      return skip_dots();
-    }
-    return false;
-  }
-
-  bool do_step()
-  {
-    if (h_find_ != INVALID_HANDLE_VALUE) {
-      bool result = FindNextFileW(h_find_, &find_data_) == TRUE;
-      if (!result) {
-        FindClose(h_find_);
-        h_find_ = INVALID_HANDLE_VALUE;
-      }
-      return result;
-    }
-    return false;
-  }
-
-  bool skip_dots()
-  {
-    while (wcscmp(find_data_.cFileName, L".") == 0 || wcscmp(find_data_.cFileName, L"..") == 0) {
-      if (!do_step()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  string path_;
-  path_info path_info_;
-  WIN32_FIND_DATAW find_data_;
-  HANDLE h_find_;
-};
-#else /* _WIN32 */
-
-class directory_iterator {
- public:
-  class path_info {
-   public:
-    explicit path_info(const string &path) : path_(path), entry_(NULL)
-    {
-    }
-
-    string path()
-    {
-      return path_join(path_, entry_->d_name);
-    }
-
-    void current_entry_set(const struct dirent *entry)
-    {
-      entry_ = entry;
-    }
-
-   protected:
-    const string &path_;
-    const struct dirent *entry_;
-  };
-
-  directory_iterator() : path_info_(""), name_list_(NULL), num_entries_(-1), cur_entry_(-1)
-  {
-  }
-
-  explicit directory_iterator(const string &path) : path_(path), path_info_(path_), cur_entry_(0)
-  {
-    num_entries_ = scandir(path.c_str(), &name_list_, NULL, alphasort);
-    if (num_entries_ < 0) {
-      perror("scandir");
-    }
-    else {
-      skip_dots();
-    }
-  }
-
-  ~directory_iterator()
-  {
-    destroy_name_list();
-  }
-
-  directory_iterator &operator++()
-  {
-    step();
-    return *this;
-  }
-
-  path_info *operator->()
-  {
-    path_info_.current_entry_set(name_list_[cur_entry_]);
-    return &path_info_;
-  }
-
-  bool operator!=(const directory_iterator &other)
-  {
-    return name_list_ != other.name_list_;
-  }
-
- protected:
-  bool step()
-  {
-    if (do_step()) {
-      return skip_dots();
-    }
-    return false;
-  }
-
-  bool do_step()
-  {
-    ++cur_entry_;
-    if (cur_entry_ >= num_entries_) {
-      destroy_name_list();
-      return false;
-    }
-    return true;
-  }
-
-  /* Skip . and .. folders. */
-  bool skip_dots()
-  {
-    while (strcmp(name_list_[cur_entry_]->d_name, ".") == 0 ||
-           strcmp(name_list_[cur_entry_]->d_name, "..") == 0) {
-      if (!step()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void destroy_name_list()
-  {
-    if (name_list_ == NULL) {
-      return;
-    }
-    for (int i = 0; i < num_entries_; ++i) {
-      free(name_list_[i]);
-    }
-    free(name_list_);
-    name_list_ = NULL;
-  }
-
-  string path_;
-  path_info path_info_;
-  struct dirent **name_list_;
-  int num_entries_, cur_entry_;
-};
-
-#endif /* _WIN32 */
-
-size_t find_last_slash(const string &path)
-{
-  for (size_t i = 0; i < path.size(); ++i) {
-    size_t index = path.size() - 1 - i;
-#ifdef _WIN32
-    if (path[index] == DIR_SEP || path[index] == DIR_SEP_ALT)
-#else
-    if (path[index] == DIR_SEP)
-#endif
-    {
-      return index;
-    }
-  }
-  return string::npos;
-}
-
-} /* namespace */
-
-static char *path_specials(const string &sub)
-{
-  static bool env_init = false;
-  static char *env_shader_path;
-  static char *env_source_path;
-  if (!env_init) {
-    env_shader_path = getenv("CYCLES_SHADER_PATH");
-    /* NOTE: It is KERNEL in env variable for compatibility reasons. */
-    env_source_path = getenv("CYCLES_KERNEL_PATH");
-    env_init = true;
-  }
-  if (env_shader_path != NULL && sub == "shader") {
-    return env_shader_path;
-  }
-  else if (env_shader_path != NULL && sub == "source") {
-    return env_source_path;
-  }
-  return NULL;
-}
-
-#if defined(__linux__) || defined(__APPLE__)
-static string path_xdg_cache_get()
-{
-  const char *home = getenv("XDG_CACHE_HOME");
-  if (home) {
-    return string(home);
-  }
-  else {
-    home = getenv("HOME");
-    if (home == NULL) {
-      home = getpwuid(getuid())->pw_dir;
-    }
-    return path_join(string(home), ".cache");
-  }
-}
-#endif
-
-void path_init(const string &path, const string &user_path, const string &temp_path)
-{
-  cached_path = path;
-  cached_user_path = user_path;
-  cached_temp_path = temp_path;
-
-#ifdef _MSC_VER
-  // workaround for https://svn.boost.org/trac/boost/ticket/6320
-  // indirectly init boost codec here since it's not thread safe, and can
-  // cause crashes when it happens in multithreaded image load
-  OIIO::Filesystem::exists(path);
-#endif
-}
-
-string path_get(const string &sub)
-{
-  char *special = path_specials(sub);
-  if (special != NULL)
-    return special;
-
-  if (cached_path == "")
-    cached_path = path_dirname(Sysutil::this_program_path());
-
-  return path_join(cached_path, sub);
-}
-
-string path_user_get(const string &sub)
-{
-  if (cached_user_path == "")
-    cached_user_path = path_dirname(Sysutil::this_program_path());
-
-  return path_join(cached_user_path, sub);
-}
-
-string path_cache_get(const string &sub)
-{
-#if defined(__linux__) || defined(__APPLE__)
-  if (cached_xdg_cache_path == "") {
-    cached_xdg_cache_path = path_xdg_cache_get();
-  }
-  string result = path_join(cached_xdg_cache_path, "cycles");
-  return path_join(result, sub);
-#else
-  /* TODO(sergey): What that should be on Windows? */
-  return path_user_get(path_join("cache", sub));
-#endif
-}
-
-string path_temp_get(const string &sub)
-{
-  if (cached_temp_path == "") {
-    cached_temp_path = Filesystem::temp_directory_path();
-  }
-
-  return path_join(cached_temp_path, sub);
-}
-
-#if defined(__linux__) || defined(__APPLE__)
-string path_xdg_home_get(const string &sub = "");
-#endif
-
-string path_filename(const string &path)
-{
-  size_t index = find_last_slash(path);
-  if (index != string::npos) {
-    /* Corner cases to match boost behavior. */
-#ifndef _WIN32
-    if (index == 0 && path.size() == 1) {
-      return path;
-    }
-#endif
-    if (index == path.size() - 1) {
-#ifdef _WIN32
-      if (index == 2) {
-        return string(1, DIR_SEP);
-      }
-#endif
-      return ".";
-    }
-    return path.substr(index + 1, path.size() - index - 1);
-  }
-  return path;
-}
-
-string path_dirname(const string &path)
-{
-  size_t index = find_last_slash(path);
-  if (index != string::npos) {
-#ifndef _WIN32
-    if (index == 0 && path.size() > 1) {
-      return string(1, DIR_SEP);
-    }
-#endif
-    return path.substr(0, index);
-  }
-  return "";
-}
-
-string path_join(const string &dir, const string &file)
-{
-  if (dir.size() == 0) {
-    return file;
-  }
-  if (file.size() == 0) {
-    return dir;
-  }
-  string result = dir;
-#ifndef _WIN32
-  if (result[result.size() - 1] != DIR_SEP && file[0] != DIR_SEP)
-#else
-  if (result[result.size() - 1] != DIR_SEP && result[result.size() - 1] != DIR_SEP_ALT &&
-      file[0] != DIR_SEP && file[0] != DIR_SEP_ALT)
-#endif
-  {
-    result += DIR_SEP;
-  }
-  result += file;
-  return result;
-}
-
-string path_escape(const string &path)
-{
-  string result = path;
-  string_replace(result, " ", "\\ ");
-  return result;
-}
-
-bool path_is_relative(const string &path)
-{
-#ifdef _WIN32
-#  ifdef HAVE_SHLWAPI_H
-  return PathIsRelative(path.c_str());
-#  else  /* HAVE_SHLWAPI_H */
-  if (path.size() >= 3) {
-    return !(((path[0] >= 'a' && path[0] <= 'z') || (path[0] >= 'A' && path[0] <= 'Z')) &&
-             path[1] == ':' && path[2] == DIR_SEP);
-  }
-  return true;
-#  endif /* HAVE_SHLWAPI_H */
-#else    /* _WIN32 */
-  if (path.size() == 0) {
-    return 1;
-  }
-  return path[0] != DIR_SEP;
-#endif   /* _WIN32 */
-}
-
-#ifdef _WIN32
-/* Add a slash if the UNC path points to a share. */
-static string path_unc_add_slash_to_share(const string &path)
-{
-  size_t slash_after_server = path.find(DIR_SEP, 2);
-  if (slash_after_server != string::npos) {
-    size_t slash_after_share = path.find(DIR_SEP, slash_after_server + 1);
-    if (slash_after_share == string::npos) {
-      return path + DIR_SEP;
-    }
-  }
-  return path;
-}
-
-/* Convert:
- *    \\?\UNC\server\share\folder\... to \\server\share\folder\...
- *    \\?\C:\ to C:\ and \\?\C:\folder\... to C:\folder\...
- */
-static string path_unc_to_short(const string &path)
-{
-  size_t len = path.size();
-  if ((len > 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP) && (path[2] == '?') &&
-      ((path[3] == DIR_SEP) || (path[3] == DIR_SEP_ALT))) {
-    if ((len > 5) && (path[5] == ':')) {
-      return path.substr(4, len - 4);
-    }
-    else if ((len > 7) && (path.substr(4, 3) == "UNC") &&
-             ((path[7] == DIR_SEP) || (path[7] == DIR_SEP_ALT))) {
-      return "\\\\" + path.substr(8, len - 8);
-    }
-  }
-  return path;
-}
-
-static string path_cleanup_unc(const string &path)
-{
-  string result = path_unc_to_short(path);
-  if (path.size() > 2) {
-    /* It's possible path is now a non-UNC. */
-    if (result[0] == DIR_SEP && result[1] == DIR_SEP) {
-      return path_unc_add_slash_to_share(result);
-    }
-  }
-  return result;
-}
-
-/* Make path compatible for stat() functions. */
-static string path_make_compatible(const string &path)
-{
-  string result = path;
-  /* In Windows stat() doesn't recognize dir ending on a slash. */
-  if (result.size() > 3 && result[result.size() - 1] == DIR_SEP) {
-    result.resize(result.size() - 1);
-  }
-  /* Clean up UNC path. */
-  if ((path.size() >= 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP)) {
-    result = path_cleanup_unc(result);
-  }
-  /* Make sure volume-only path ends up wit ha directory separator. */
-  if (result.size() == 2 && result[1] == ':') {
-    result += DIR_SEP;
-  }
-  return result;
-}
-
-static int path_wstat(const wstring &path_wc, path_stat_t *st)
-{
-#  if defined(_MSC_VER) || defined(__MINGW64__)
-  return _wstat64(path_wc.c_str(), st);
-#  elif defined(__MINGW32__)
-  return _wstati64(path_wc.c_str(), st);
-#  else
-  return _wstat(path_wc.c_str(), st);
-#  endif
-}
-
-static int path_stat(const string &path, path_stat_t *st)
-{
-  wstring path_wc = string_to_wstring(path);
-  return path_wstat(path_wc, st);
-}
-#else  /* _WIN32 */
-static int path_stat(const string &path, path_stat_t *st)
-{
-  return stat(path.c_str(), st);
-}
-#endif /* _WIN32 */
-
-size_t path_file_size(const string &path)
-{
-  path_stat_t st;
-  if (path_stat(path, &st) != 0) {
-    return -1;
-  }
-  return st.st_size;
-}
-
-bool path_exists(const string &path)
-{
-#ifdef _WIN32
-  string fixed_path = path_make_compatible(path);
-  wstring path_wc = string_to_wstring(fixed_path);
-  path_stat_t st;
-  if (path_wstat(path_wc, &st) != 0) {
-    return false;
-  }
-  return st.st_mode != 0;
-#else  /* _WIN32 */
-  struct stat st;
-  if (stat(path.c_str(), &st) != 0) {
-    return 0;
-  }
-  return st.st_mode != 0;
-#endif /* _WIN32 */
-}
-
-bool path_is_directory(const string &path)
-{
-  path_stat_t st;
-  if (path_stat(path, &st) != 0) {
-    return false;
-  }
-  return S_ISDIR(st.st_mode);
-}
-
-static void path_files_md5_hash_recursive(MD5Hash &hash, const string &dir)
-{
-  if (path_exists(dir)) {
-    directory_iterator it(dir), it_end;
-
-    for (; it != it_end; ++it) {
-      if (path_is_directory(it->path())) {
-        path_files_md5_hash_recursive(hash, it->path());
-      }
-      else {
-        string filepath = it->path();
-
-        hash.append((const uint8_t *)filepath.c_str(), filepath.size());
-        hash.append_file(filepath);
-      }
-    }
-  }
-}
-
-string path_files_md5_hash(const string &dir)
-{
-  /* computes md5 hash of all files in the directory */
-  MD5Hash hash;
-
-  path_files_md5_hash_recursive(hash, dir);
-
-  return hash.get_hex();
-}
-
-static bool create_directories_recursivey(const string &path)
-{
-  if (path_is_directory(path)) {
-    /* Directory already exists, nothing to do. */
-    return true;
-  }
-  if (path_exists(path)) {
-    /* File exists and it's not a directory. */
-    return false;
-  }
-
-  string parent = path_dirname(path);
-  if (parent.size() > 0 && parent != path) {
-    if (!create_directories_recursivey(parent)) {
-      return false;
-    }
-  }
-
-#ifdef _WIN32
-  wstring path_wc = string_to_wstring(path);
-  return _wmkdir(path_wc.c_str()) == 0;
-#else
-  return mkdir(path.c_str(), 0777) == 0;
-#endif
-}
-
-void path_create_directories(const string &filepath)
-{
-  string path = path_dirname(filepath);
-  create_directories_recursivey(path);
-}
-
-bool path_write_binary(const string &path, const vector<uint8_t> &binary)
-{
-  path_create_directories(path);
-
-  /* write binary file from memory */
-  FILE *f = path_fopen(path, "wb");
-
-  if (!f)
-    return false;
-
-  if (binary.size() > 0)
-    fwrite(&binary[0], sizeof(uint8_t), binary.size(), f);
-
-  fclose(f);
-
-  return true;
-}
-
-bool path_write_text(const string &path, string &text)
-{
-  vector<uint8_t> binary(text.length(), 0);
-  std::copy(text.begin(), text.end(), binary.begin());
-
-  return path_write_binary(path, binary);
-}
-
-bool path_read_binary(const string &path, vector<uint8_t> &binary)
-{
-  /* read binary file into memory */
-  FILE *f = path_fopen(path, "rb");
-
-  if (!f) {
-    binary.resize(0);
-    return false;
-  }
-
-  binary.resize(path_file_size(path));
-
-  if (binary.size() == 0) {
-    fclose(f);
-    return false;
-  }
-
-  if (fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) {
-    fclose(f);
-    return false;
-  }
-
-  fclose(f);
-
-  return true;
-}
-
-bool path_read_text(const string &path, string &text)
-{
-  vector<uint8_t> binary;
-
-  if (!path_exists(path) || !path_read_binary(path, binary))
-    return false;
-
-  const char *str = (const char *)&binary[0];
-  size_t size = binary.size();
-  text = string(str, size);
-
-  return true;
-}
-
-uint64_t path_modified_time(const string &path)
-{
-  path_stat_t st;
-  if (path_stat(path, &st) != 0) {
-    return 0;
-  }
-  return st.st_mtime;
-}
-
-bool path_remove(const string &path)
-{
-  return remove(path.c_str()) == 0;
-}
-
-FILE *path_fopen(const string &path, const string &mode)
-{
-#ifdef _WIN32
-  wstring path_wc = string_to_wstring(path);
-  wstring mode_wc = string_to_wstring(mode);
-  return _wfopen(path_wc.c_str(), mode_wc.c_str());
-#else
-  return fopen(path.c_str(), mode.c_str());
-#endif
-}
-
-void path_cache_clear_except(const string &name, const set<string> &except)
-{
-  string dir = path_user_get("cache");
-
-  if (path_exists(dir)) {
-    directory_iterator it(dir), it_end;
-
-    for (; it != it_end; ++it) {
-      string filename = path_filename(it->path());
-
-      if (string_startswith(filename, name.c_str()))
-        if (except.find(filename) == except.end())
-          path_remove(it->path());
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
deleted file mode 100644
index f899bc2e01c..00000000000
--- a/intern/cycles/util/util_path.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_PATH_H__
-#define __UTIL_PATH_H__
-
-/* Utility functions to get paths to files distributed with the program. For
- * the standalone apps, paths are relative to the executable, for dynamically
- * linked libraries, the path to the library may be set with path_init, which
- * then makes all paths relative to that. */
-
-#include <stdio.h>
-
-#include "util/util_set.h"
-#include "util/util_string.h"
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* program paths */
-void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = "");
-string path_get(const string &sub = "");
-string path_user_get(const string &sub = "");
-string path_temp_get(const string &sub = "");
-string path_cache_get(const string &sub = "");
-
-/* path string manipulation */
-string path_filename(const string &path);
-string path_dirname(const string &path);
-string path_join(const string &dir, const string &file);
-string path_escape(const string &path);
-bool path_is_relative(const string &path);
-
-/* file info */
-size_t path_file_size(const string &path);
-bool path_exists(const string &path);
-bool path_is_directory(const string &path);
-string path_files_md5_hash(const string &dir);
-uint64_t path_modified_time(const string &path);
-
-/* directory utility */
-void path_create_directories(const string &path);
-
-/* file read/write utilities */
-FILE *path_fopen(const string &path, const string &mode);
-
-bool path_write_binary(const string &path, const vector<uint8_t> &binary);
-bool path_write_text(const string &path, string &text);
-bool path_read_binary(const string &path, vector<uint8_t> &binary);
-bool path_read_text(const string &path, string &text);
-
-/* File manipulation. */
-bool path_remove(const string &path);
-
-/* cache utility */
-void path_cache_clear_except(const string &name, const set<string> &except);
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp
deleted file mode 100644
index 5343f076e22..00000000000
--- a/intern/cycles/util/util_profiling.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_profiling.h"
-#include "util/util_algorithm.h"
-#include "util/util_foreach.h"
-#include "util/util_set.h"
-
-CCL_NAMESPACE_BEGIN
-
-Profiler::Profiler() : do_stop_worker(true), worker(NULL)
-{
-}
-
-Profiler::~Profiler()
-{
-  assert(worker == NULL);
-}
-
-void Profiler::run()
-{
-  uint64_t updates = 0;
-  auto start_time = std::chrono::system_clock::now();
-  while (!do_stop_worker) {
-    thread_scoped_lock lock(mutex);
-    foreach (ProfilingState *state, states) {
-      uint32_t cur_event = state->event;
-      int32_t cur_shader = state->shader;
-      int32_t cur_object = state->object;
-
-      /* The state reads/writes should be atomic, but just to be sure
-       * check the values for validity anyways. */
-      if (cur_event < PROFILING_NUM_EVENTS) {
-        event_samples[cur_event]++;
-      }
-
-      if (cur_shader >= 0 && cur_shader < shader_samples.size()) {
-        shader_samples[cur_shader]++;
-      }
-
-      if (cur_object >= 0 && cur_object < object_samples.size()) {
-        object_samples[cur_object]++;
-      }
-    }
-    lock.unlock();
-
-    /* Relative waits always overshoot a bit, so just waiting 1ms every
-     * time would cause the sampling to drift over time.
-     * By keeping track of the absolute time, the wait times correct themselves -
-     * if one wait overshoots a lot, the next one will be shorter to compensate. */
-    updates++;
-    std::this_thread::sleep_until(start_time + updates * std::chrono::milliseconds(1));
-  }
-}
-
-void Profiler::reset(int num_shaders, int num_objects)
-{
-  bool running = (worker != NULL);
-  if (running) {
-    stop();
-  }
-
-  /* Resize and clear the accumulation vectors. */
-  shader_hits.assign(num_shaders, 0);
-  object_hits.assign(num_objects, 0);
-
-  event_samples.assign(PROFILING_NUM_EVENTS, 0);
-  shader_samples.assign(num_shaders, 0);
-  object_samples.assign(num_objects, 0);
-
-  if (running) {
-    start();
-  }
-}
-
-void Profiler::start()
-{
-  assert(worker == NULL);
-  do_stop_worker = false;
-  worker = new thread(function_bind(&Profiler::run, this));
-}
-
-void Profiler::stop()
-{
-  if (worker != NULL) {
-    do_stop_worker = true;
-
-    worker->join();
-    delete worker;
-    worker = NULL;
-  }
-}
-
-void Profiler::add_state(ProfilingState *state)
-{
-  thread_scoped_lock lock(mutex);
-
-  /* Add the ProfilingState from the list of sampled states. */
-  assert(std::find(states.begin(), states.end(), state) == states.end());
-  states.push_back(state);
-
-  /* Resize thread-local hit counters. */
-  state->shader_hits.assign(shader_hits.size(), 0);
-  state->object_hits.assign(object_hits.size(), 0);
-
-  /* Initialize the state. */
-  state->event = PROFILING_UNKNOWN;
-  state->shader = -1;
-  state->object = -1;
-  state->active = true;
-}
-
-void Profiler::remove_state(ProfilingState *state)
-{
-  thread_scoped_lock lock(mutex);
-
-  /* Remove the ProfilingState from the list of sampled states. */
-  states.erase(std::remove(states.begin(), states.end(), state), states.end());
-  state->active = false;
-
-  /* Merge thread-local hit counters. */
-  assert(shader_hits.size() == state->shader_hits.size());
-  for (int i = 0; i < shader_hits.size(); i++) {
-    shader_hits[i] += state->shader_hits[i];
-  }
-
-  assert(object_hits.size() == state->object_hits.size());
-  for (int i = 0; i < object_hits.size(); i++) {
-    object_hits[i] += state->object_hits[i];
-  }
-}
-
-uint64_t Profiler::get_event(ProfilingEvent event)
-{
-  assert(worker == NULL);
-  return event_samples[event];
-}
-
-bool Profiler::get_shader(int shader, uint64_t &samples, uint64_t &hits)
-{
-  assert(worker == NULL);
-  if (shader_samples[shader] == 0) {
-    return false;
-  }
-  samples = shader_samples[shader];
-  hits = shader_hits[shader];
-  return true;
-}
-
-bool Profiler::get_object(int object, uint64_t &samples, uint64_t &hits)
-{
-  assert(worker == NULL);
-  if (object_samples[object] == 0) {
-    return false;
-  }
-  samples = object_samples[object];
-  hits = object_hits[object];
-  return true;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h
deleted file mode 100644
index 96bb682c50e..00000000000
--- a/intern/cycles/util/util_profiling.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_PROFILING_H__
-#define __UTIL_PROFILING_H__
-
-#include <atomic>
-
-#include "util/util_map.h"
-#include "util/util_thread.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-enum ProfilingEvent : uint32_t {
-  PROFILING_UNKNOWN,
-  PROFILING_RAY_SETUP,
-
-  PROFILING_INTERSECT_CLOSEST,
-  PROFILING_INTERSECT_SUBSURFACE,
-  PROFILING_INTERSECT_SHADOW,
-  PROFILING_INTERSECT_VOLUME_STACK,
-
-  PROFILING_SHADE_SURFACE_SETUP,
-  PROFILING_SHADE_SURFACE_EVAL,
-  PROFILING_SHADE_SURFACE_DIRECT_LIGHT,
-  PROFILING_SHADE_SURFACE_INDIRECT_LIGHT,
-  PROFILING_SHADE_SURFACE_AO,
-  PROFILING_SHADE_SURFACE_PASSES,
-
-  PROFILING_SHADE_VOLUME_SETUP,
-  PROFILING_SHADE_VOLUME_INTEGRATE,
-  PROFILING_SHADE_VOLUME_DIRECT_LIGHT,
-  PROFILING_SHADE_VOLUME_INDIRECT_LIGHT,
-
-  PROFILING_SHADE_SHADOW_SETUP,
-  PROFILING_SHADE_SHADOW_SURFACE,
-  PROFILING_SHADE_SHADOW_VOLUME,
-
-  PROFILING_SHADE_LIGHT_SETUP,
-  PROFILING_SHADE_LIGHT_EVAL,
-
-  PROFILING_NUM_EVENTS,
-};
-
-/* Contains the current execution state of a worker thread.
- * These values are constantly updated by the worker.
- * Periodically the profiler thread will wake up, read them
- * and update its internal counters based on it.
- *
- * Atomics aren't needed here since we're only doing direct
- * writes and reads to (4-byte-aligned) uint32_t, which is
- * guaranteed to be atomic on x86 since the 486.
- * Memory ordering is not guaranteed but does not matter.
- *
- * And even on other architectures, the extremely rare corner
- * case of reading an intermediate state could at worst result
- * in a single incorrect sample. */
-struct ProfilingState {
-  volatile uint32_t event = PROFILING_UNKNOWN;
-  volatile int32_t shader = -1;
-  volatile int32_t object = -1;
-  volatile bool active = false;
-
-  vector<uint64_t> shader_hits;
-  vector<uint64_t> object_hits;
-};
-
-class Profiler {
- public:
-  Profiler();
-  ~Profiler();
-
-  void reset(int num_shaders, int num_objects);
-
-  void start();
-  void stop();
-
-  void add_state(ProfilingState *state);
-  void remove_state(ProfilingState *state);
-
-  uint64_t get_event(ProfilingEvent event);
-  bool get_shader(int shader, uint64_t &samples, uint64_t &hits);
-  bool get_object(int object, uint64_t &samples, uint64_t &hits);
-
- protected:
-  void run();
-
-  /* Tracks how often the worker was in each ProfilingEvent while sampling,
-   * so multiplying the values by the sample frequency (currently 1ms)
-   * gives the approximate time spent in each state. */
-  vector<uint64_t> event_samples;
-  vector<uint64_t> shader_samples;
-  vector<uint64_t> object_samples;
-
-  /* Tracks the total amounts every object/shader was hit.
-   * Used to evaluate relative cost, written by the render thread.
-   * Indexed by the shader and object IDs that the kernel also uses
-   * to index __object_flag and __shaders. */
-  vector<uint64_t> shader_hits;
-  vector<uint64_t> object_hits;
-
-  volatile bool do_stop_worker;
-  thread *worker;
-
-  thread_mutex mutex;
-  vector<ProfilingState *> states;
-};
-
-class ProfilingHelper {
- public:
-  ProfilingHelper(ProfilingState *state, ProfilingEvent event) : state(state)
-  {
-    previous_event = state->event;
-    state->event = event;
-  }
-
-  ~ProfilingHelper()
-  {
-    state->event = previous_event;
-  }
-
-  inline void set_event(ProfilingEvent event)
-  {
-    state->event = event;
-  }
-
- protected:
-  ProfilingState *state;
-  uint32_t previous_event;
-};
-
-class ProfilingWithShaderHelper : public ProfilingHelper {
- public:
-  ProfilingWithShaderHelper(ProfilingState *state, ProfilingEvent event)
-      : ProfilingHelper(state, event)
-  {
-  }
-
-  ~ProfilingWithShaderHelper()
-  {
-    state->object = -1;
-    state->shader = -1;
-  }
-
-  inline void set_shader(int object, int shader)
-  {
-    if (state->active) {
-      state->shader = shader;
-      state->object = object;
-
-      if (shader >= 0) {
-        assert(shader < state->shader_hits.size());
-        state->shader_hits[shader]++;
-      }
-
-      if (object >= 0) {
-        assert(object < state->object_hits.size());
-        state->object_hits[object]++;
-      }
-    }
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_PROFILING_H__ */
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
deleted file mode 100644
index 176ee11e1e9..00000000000
--- a/intern/cycles/util/util_progress.h
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_PROGRESS_H__
-#define __UTIL_PROGRESS_H__
-
-/* Progress
- *
- * Simple class to communicate progress status messages, timing information,
- * update notifications from a job running in another thread. All methods
- * except for the constructor/destructor are thread safe. */
-
-#include "util/util_function.h"
-#include "util/util_string.h"
-#include "util/util_thread.h"
-#include "util/util_time.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Progress {
- public:
-  Progress()
-  {
-    pixel_samples = 0;
-    total_pixel_samples = 0;
-    current_tile_sample = 0;
-    rendered_tiles = 0;
-    denoised_tiles = 0;
-    start_time = time_dt();
-    render_start_time = time_dt();
-    end_time = 0.0;
-    status = "Initializing";
-    substatus = "";
-    sync_status = "";
-    sync_substatus = "";
-    update_cb = function_null;
-    cancel = false;
-    cancel_message = "";
-    error = false;
-    error_message = "";
-    cancel_cb = function_null;
-  }
-
-  Progress(Progress &progress)
-  {
-    *this = progress;
-  }
-
-  Progress &operator=(Progress &progress)
-  {
-    thread_scoped_lock lock(progress.progress_mutex);
-
-    progress.get_status(status, substatus);
-
-    pixel_samples = progress.pixel_samples;
-    total_pixel_samples = progress.total_pixel_samples;
-    current_tile_sample = progress.get_current_sample();
-
-    return *this;
-  }
-
-  void reset()
-  {
-    pixel_samples = 0;
-    total_pixel_samples = 0;
-    current_tile_sample = 0;
-    rendered_tiles = 0;
-    denoised_tiles = 0;
-    start_time = time_dt();
-    render_start_time = time_dt();
-    end_time = 0.0;
-    status = "Initializing";
-    substatus = "";
-    sync_status = "";
-    sync_substatus = "";
-    cancel = false;
-    cancel_message = "";
-    error = false;
-    error_message = "";
-  }
-
-  /* cancel */
-  void set_cancel(const string &cancel_message_)
-  {
-    thread_scoped_lock lock(progress_mutex);
-    cancel_message = cancel_message_;
-    cancel = true;
-  }
-
-  bool get_cancel() const
-  {
-    if (!cancel && cancel_cb)
-      cancel_cb();
-
-    return cancel;
-  }
-
-  string get_cancel_message() const
-  {
-    thread_scoped_lock lock(progress_mutex);
-    return cancel_message;
-  }
-
-  void set_cancel_callback(function<void()> function)
-  {
-    cancel_cb = function;
-  }
-
-  /* error */
-  void set_error(const string &error_message_)
-  {
-    thread_scoped_lock lock(progress_mutex);
-    error_message = error_message_;
-    error = true;
-    /* If error happens we also stop rendering. */
-    cancel_message = error_message_;
-    cancel = true;
-  }
-
-  bool get_error() const
-  {
-    return error;
-  }
-
-  string get_error_message() const
-  {
-    thread_scoped_lock lock(progress_mutex);
-    return error_message;
-  }
-
-  /* tile and timing information */
-
-  void set_start_time()
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    start_time = time_dt();
-    end_time = 0.0;
-  }
-
-  void set_render_start_time()
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    render_start_time = time_dt();
-  }
-
-  void add_skip_time(const scoped_timer &start_timer, bool only_render)
-  {
-    double skip_time = time_dt() - start_timer.get_start();
-
-    render_start_time += skip_time;
-    if (!only_render) {
-      start_time += skip_time;
-    }
-  }
-
-  void get_time(double &total_time_, double &render_time_) const
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    double time = (end_time > 0) ? end_time : time_dt();
-
-    total_time_ = time - start_time;
-    render_time_ = time - render_start_time;
-  }
-
-  void set_end_time()
-  {
-    end_time = time_dt();
-  }
-
-  void reset_sample()
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    pixel_samples = 0;
-    current_tile_sample = 0;
-    rendered_tiles = 0;
-    denoised_tiles = 0;
-  }
-
-  void set_total_pixel_samples(uint64_t total_pixel_samples_)
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    total_pixel_samples = total_pixel_samples_;
-  }
-
-  float get_progress() const
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    if (total_pixel_samples > 0) {
-      return ((float)pixel_samples) / total_pixel_samples;
-    }
-    return 0.0f;
-  }
-
-  void add_samples(uint64_t pixel_samples_, int tile_sample)
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    pixel_samples += pixel_samples_;
-    current_tile_sample = tile_sample;
-  }
-
-  void add_samples_update(uint64_t pixel_samples_, int tile_sample)
-  {
-    add_samples(pixel_samples_, tile_sample);
-    set_update();
-  }
-
-  void add_finished_tile(bool denoised)
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    if (denoised) {
-      denoised_tiles++;
-    }
-    else {
-      rendered_tiles++;
-    }
-  }
-
-  int get_current_sample() const
-  {
-    thread_scoped_lock lock(progress_mutex);
-    /* Note that the value here always belongs to the last tile that updated,
-     * so it's only useful if there is only one active tile. */
-    return current_tile_sample;
-  }
-
-  int get_rendered_tiles() const
-  {
-    thread_scoped_lock lock(progress_mutex);
-    return rendered_tiles;
-  }
-
-  int get_denoised_tiles() const
-  {
-    thread_scoped_lock lock(progress_mutex);
-    return denoised_tiles;
-  }
-
-  /* status messages */
-
-  void set_status(const string &status_, const string &substatus_ = "")
-  {
-    {
-      thread_scoped_lock lock(progress_mutex);
-      status = status_;
-      substatus = substatus_;
-    }
-
-    set_update();
-  }
-
-  void set_substatus(const string &substatus_)
-  {
-    {
-      thread_scoped_lock lock(progress_mutex);
-      substatus = substatus_;
-    }
-
-    set_update();
-  }
-
-  void set_sync_status(const string &status_, const string &substatus_ = "")
-  {
-    {
-      thread_scoped_lock lock(progress_mutex);
-      sync_status = status_;
-      sync_substatus = substatus_;
-    }
-
-    set_update();
-  }
-
-  void set_sync_substatus(const string &substatus_)
-  {
-    {
-      thread_scoped_lock lock(progress_mutex);
-      sync_substatus = substatus_;
-    }
-
-    set_update();
-  }
-
-  void get_status(string &status_, string &substatus_) const
-  {
-    thread_scoped_lock lock(progress_mutex);
-
-    if (sync_status != "") {
-      status_ = sync_status;
-      substatus_ = sync_substatus;
-    }
-    else {
-      status_ = status;
-      substatus_ = substatus;
-    }
-  }
-
-  /* callback */
-
-  void set_update()
-  {
-    if (update_cb) {
-      thread_scoped_lock lock(update_mutex);
-      update_cb();
-    }
-  }
-
-  void set_update_callback(function<void()> function)
-  {
-    update_cb = function;
-  }
-
- protected:
-  mutable thread_mutex progress_mutex;
-  mutable thread_mutex update_mutex;
-  function<void()> update_cb;
-  function<void()> cancel_cb;
-
-  /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
-   * This makes the progress estimate more accurate when tiles with different sizes are used.
-   *
-   * total_pixel_samples is the total amount of pixel samples that will be rendered. */
-  uint64_t pixel_samples, total_pixel_samples;
-  /* Stores the current sample count of the last tile that called the update function.
-   * It's used to display the sample count if only one tile is active. */
-  int current_tile_sample;
-  /* Stores the number of tiles that's already finished.
-   * Used to determine whether all but the last tile are finished rendering,
-   * in which case the current_tile_sample is displayed. */
-  int rendered_tiles, denoised_tiles;
-
-  double start_time, render_start_time;
-  /* End time written when render is done, so it doesn't keep increasing on redraws. */
-  double end_time;
-
-  string status;
-  string substatus;
-
-  string sync_status;
-  string sync_substatus;
-
-  volatile bool cancel;
-  string cancel_message;
-
-  volatile bool error;
-  string error_message;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_PROGRESS_H__ */
diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h
deleted file mode 100644
index 04b4574d75b..00000000000
--- a/intern/cycles/util/util_projection.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright 2011-2018 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_PROJECTION_H__
-#define __UTIL_PROJECTION_H__
-
-#include "util/util_transform.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* 4x4 projection matrix, perspective or orthographic. */
-
-typedef struct ProjectionTransform {
-  float4 x, y, z, w; /* rows */
-
-#ifndef __KERNEL_GPU__
-  ProjectionTransform()
-  {
-  }
-
-  explicit ProjectionTransform(const Transform &tfm)
-      : x(tfm.x), y(tfm.y), z(tfm.z), w(make_float4(0.0f, 0.0f, 0.0f, 1.0f))
-  {
-  }
-#endif
-} ProjectionTransform;
-
-typedef struct PerspectiveMotionTransform {
-  ProjectionTransform pre;
-  ProjectionTransform post;
-} PerspectiveMotionTransform;
-
-/* Functions */
-
-ccl_device_inline float3 transform_perspective(ccl_private const ProjectionTransform *t,
-                                               const float3 a)
-{
-  float4 b = make_float4(a.x, a.y, a.z, 1.0f);
-  float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
-  float w = dot(t->w, b);
-
-  return (w != 0.0f) ? c / w : zero_float3();
-}
-
-ccl_device_inline float3 transform_perspective_direction(ccl_private const ProjectionTransform *t,
-                                                         const float3 a)
-{
-  float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
-                         a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
-                         a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
-
-  return c;
-}
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline Transform projection_to_transform(const ProjectionTransform &a)
-{
-  Transform tfm = {a.x, a.y, a.z};
-  return tfm;
-}
-
-ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform &a)
-{
-  ProjectionTransform t;
-
-  t.x.x = a.x.x;
-  t.x.y = a.y.x;
-  t.x.z = a.z.x;
-  t.x.w = a.w.x;
-  t.y.x = a.x.y;
-  t.y.y = a.y.y;
-  t.y.z = a.z.y;
-  t.y.w = a.w.y;
-  t.z.x = a.x.z;
-  t.z.y = a.y.z;
-  t.z.z = a.z.z;
-  t.z.w = a.w.z;
-  t.w.x = a.x.w;
-  t.w.y = a.y.w;
-  t.w.z = a.z.w;
-  t.w.w = a.w.w;
-
-  return t;
-}
-
-ProjectionTransform projection_inverse(const ProjectionTransform &a);
-
-ccl_device_inline ProjectionTransform make_projection(float a,
-                                                      float b,
-                                                      float c,
-                                                      float d,
-                                                      float e,
-                                                      float f,
-                                                      float g,
-                                                      float h,
-                                                      float i,
-                                                      float j,
-                                                      float k,
-                                                      float l,
-                                                      float m,
-                                                      float n,
-                                                      float o,
-                                                      float p)
-{
-  ProjectionTransform t;
-
-  t.x.x = a;
-  t.x.y = b;
-  t.x.z = c;
-  t.x.w = d;
-  t.y.x = e;
-  t.y.y = f;
-  t.y.z = g;
-  t.y.w = h;
-  t.z.x = i;
-  t.z.y = j;
-  t.z.z = k;
-  t.z.w = l;
-  t.w.x = m;
-  t.w.y = n;
-  t.w.z = o;
-  t.w.w = p;
-
-  return t;
-}
-ccl_device_inline ProjectionTransform projection_identity()
-{
-  return make_projection(1.0f,
-                         0.0f,
-                         0.0f,
-                         0.0f,
-                         0.0f,
-                         1.0f,
-                         0.0f,
-                         0.0f,
-                         0.0f,
-                         0.0f,
-                         1.0f,
-                         0.0f,
-                         0.0f,
-                         0.0f,
-                         0.0f,
-                         1.0f);
-}
-
-ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a,
-                                                const ProjectionTransform &b)
-{
-  ProjectionTransform c = projection_transpose(b);
-  ProjectionTransform t;
-
-  t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
-  t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
-  t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
-  t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
-
-  return t;
-}
-
-ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, const Transform &b)
-{
-  return a * ProjectionTransform(b);
-}
-
-ccl_device_inline ProjectionTransform operator*(const Transform &a, const ProjectionTransform &b)
-{
-  return ProjectionTransform(a) * b;
-}
-
-ccl_device_inline void print_projection(const char *label, const ProjectionTransform &t)
-{
-  print_float4(label, t.x);
-  print_float4(label, t.y);
-  print_float4(label, t.z);
-  print_float4(label, t.w);
-  printf("\n");
-}
-
-ccl_device_inline ProjectionTransform projection_perspective(float fov, float n, float f)
-{
-  ProjectionTransform persp = make_projection(
-      1, 0, 0, 0, 0, 1, 0, 0, 0, 0, f / (f - n), -f * n / (f - n), 0, 0, 1, 0);
-
-  float inv_angle = 1.0f / tanf(0.5f * fov);
-
-  Transform scale = transform_scale(inv_angle, inv_angle, 1);
-
-  return scale * persp;
-}
-
-ccl_device_inline ProjectionTransform projection_orthographic(float znear, float zfar)
-{
-  Transform t = transform_scale(1.0f, 1.0f, 1.0f / (zfar - znear)) *
-                transform_translate(0.0f, 0.0f, -znear);
-
-  return ProjectionTransform(t);
-}
-
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_PROJECTION_H__ */
diff --git a/intern/cycles/util/util_queue.h b/intern/cycles/util/util_queue.h
deleted file mode 100644
index 622f4fe3e47..00000000000
--- a/intern/cycles/util/util_queue.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_QUEUE_H__
-#define __UTIL_QUEUE_H__
-
-#include <queue>
-
-CCL_NAMESPACE_BEGIN
-
-using std::queue;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h
deleted file mode 100644
index 32df9327cbd..00000000000
--- a/intern/cycles/util/util_rect.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_RECT_H__
-#define __UTIL_RECT_H__
-
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Rectangles are represented as a int4 containing the coordinates of the lower-left and
- * upper-right corners in the order (x0, y0, x1, y1). */
-
-ccl_device_inline int4 rect_from_shape(int x0, int y0, int w, int h)
-{
-  return make_int4(x0, y0, x0 + w, y0 + h);
-}
-
-ccl_device_inline int4 rect_expand(int4 rect, int d)
-{
-  return make_int4(rect.x - d, rect.y - d, rect.z + d, rect.w + d);
-}
-
-/* Returns the intersection of two rects. */
-ccl_device_inline int4 rect_clip(int4 a, int4 b)
-{
-  return make_int4(max(a.x, b.x), max(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-}
-
-ccl_device_inline bool rect_is_valid(int4 rect)
-{
-  return (rect.z > rect.x) && (rect.w > rect.y);
-}
-
-/* Returns the local row-major index of the pixel inside the rect. */
-ccl_device_inline int coord_to_local_index(int4 rect, int x, int y)
-{
-  int w = rect.z - rect.x;
-  return (y - rect.y) * w + (x - rect.x);
-}
-
-/* Finds the coordinates of a pixel given by its row-major index in the rect,
- * and returns whether the pixel is inside it. */
-ccl_device_inline bool local_index_to_coord(int4 rect,
-                                            int idx,
-                                            ccl_private int *x,
-                                            ccl_private int *y)
-{
-  int w = rect.z - rect.x;
-  *x = (idx % w) + rect.x;
-  *y = (idx / w) + rect.y;
-  return (*y < rect.w);
-}
-
-ccl_device_inline int rect_size(int4 rect)
-{
-  return (rect.z - rect.x) * (rect.w - rect.y);
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_RECT_H__ */
diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h
deleted file mode 100644
index d995b0732b8..00000000000
--- a/intern/cycles/util/util_semaphore.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SEMAPHORE_H__
-#define __UTIL_SEMAPHORE_H__
-
-#include "util/util_thread.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Counting Semaphore
- *
- * To restrict concurrent access to a resource to a specified number
- * of threads. Similar to std::counting_semaphore from C++20. */
-
-class thread_counting_semaphore {
- public:
-  explicit thread_counting_semaphore(const int count) : count(count)
-  {
-  }
-
-  thread_counting_semaphore(const thread_counting_semaphore &) = delete;
-
-  void acquire()
-  {
-    thread_scoped_lock lock(mutex);
-    while (count == 0) {
-      condition.wait(lock);
-    }
-    count--;
-  }
-
-  void release()
-  {
-    thread_scoped_lock lock(mutex);
-    count++;
-    condition.notify_one();
-  }
-
- protected:
-  thread_mutex mutex;
-  thread_condition_variable condition;
-  int count;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_SEMAPHORE_H__ */
diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h
deleted file mode 100644
index 298e1f7729a..00000000000
--- a/intern/cycles/util/util_set.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SET_H__
-#define __UTIL_SET_H__
-
-#include <set>
-#include <unordered_set>
-
-#if defined(_MSC_VER) && (_MSC_VER >= 1900)
-#  include <iterator>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-using std::set;
-using std::unordered_set;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_SET_H__ */
diff --git a/intern/cycles/util/util_simd.cpp b/intern/cycles/util/util_simd.cpp
deleted file mode 100644
index 861dcf1fe36..00000000000
--- a/intern/cycles/util/util_simd.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if (defined(WITH_KERNEL_SSE2)) || (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__))
-
-#  define __KERNEL_SSE2__
-#  include "util/util_simd.h"
-
-CCL_NAMESPACE_BEGIN
-
-const __m128 _mm_lookupmask_ps[16] = {_mm_castsi128_ps(_mm_set_epi32(0, 0, 0, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, 0, 0, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, -1)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, 0)),
-                                      _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, -1))};
-
-CCL_NAMESPACE_END
-
-#endif  // WITH_KERNEL_SSE2
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
deleted file mode 100644
index b4a153c329f..00000000000
--- a/intern/cycles/util/util_simd.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/*
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0(the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SIMD_TYPES_H__
-#define __UTIL_SIMD_TYPES_H__
-
-#include <limits>
-#include <stdint.h>
-
-#include "util/util_defines.h"
-
-/* SSE Intrinsics includes
- *
- * We assume __KERNEL_SSEX__ flags to have been defined at this point.
- *
- * MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
- * Since we can't avoid including <windows.h>, better only include that */
-#if defined(FREE_WINDOWS64)
-#  include "util/util_windows.h"
-#elif defined(_MSC_VER)
-#  include <intrin.h>
-#elif (defined(__x86_64__) || defined(__i386__))
-#  include <x86intrin.h>
-#elif defined(__KERNEL_NEON__)
-#  define SSE2NEON_PRECISE_MINMAX 1
-#  include <sse2neon.h>
-#endif
-
-/* Floating Point Control, for Embree. */
-#if defined(__x86_64__) || defined(_M_X64)
-#  define SIMD_SET_FLUSH_TO_ZERO \
-    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
-    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
-#else
-#  define SIMD_SET_FLUSH_TO_ZERO
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Data structures used by SSE classes. */
-#ifdef __KERNEL_SSE2__
-
-extern const __m128 _mm_lookupmask_ps[16];
-
-static struct TrueTy {
-  __forceinline operator bool() const
-  {
-    return true;
-  }
-} True ccl_attr_maybe_unused;
-
-static struct FalseTy {
-  __forceinline operator bool() const
-  {
-    return false;
-  }
-} False ccl_attr_maybe_unused;
-
-static struct ZeroTy {
-  __forceinline operator float() const
-  {
-    return 0;
-  }
-  __forceinline operator int() const
-  {
-    return 0;
-  }
-} zero ccl_attr_maybe_unused;
-
-static struct OneTy {
-  __forceinline operator float() const
-  {
-    return 1;
-  }
-  __forceinline operator int() const
-  {
-    return 1;
-  }
-} one ccl_attr_maybe_unused;
-
-static struct NegInfTy {
-  __forceinline operator float() const
-  {
-    return -std::numeric_limits<float>::infinity();
-  }
-  __forceinline operator int() const
-  {
-    return std::numeric_limits<int>::min();
-  }
-} neg_inf ccl_attr_maybe_unused;
-
-static struct PosInfTy {
-  __forceinline operator float() const
-  {
-    return std::numeric_limits<float>::infinity();
-  }
-  __forceinline operator int() const
-  {
-    return std::numeric_limits<int>::max();
-  }
-} inf ccl_attr_maybe_unused, pos_inf ccl_attr_maybe_unused;
-
-static struct StepTy {
-} step ccl_attr_maybe_unused;
-
-#endif
-
-/* Utilities used by Neon */
-#if defined(__KERNEL_NEON__)
-template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const type &a)
-{
-  if (i0 == i1 && i0 == i2 && i0 == i3) {
-    return type(vdupq_laneq_s32(int32x4_t(a), i0));
-  }
-  static const uint8_t tbl[16] = {(i0 * 4) + 0,
-                                  (i0 * 4) + 1,
-                                  (i0 * 4) + 2,
-                                  (i0 * 4) + 3,
-                                  (i1 * 4) + 0,
-                                  (i1 * 4) + 1,
-                                  (i1 * 4) + 2,
-                                  (i1 * 4) + 3,
-                                  (i2 * 4) + 0,
-                                  (i2 * 4) + 1,
-                                  (i2 * 4) + 2,
-                                  (i2 * 4) + 3,
-                                  (i3 * 4) + 0,
-                                  (i3 * 4) + 1,
-                                  (i3 * 4) + 2,
-                                  (i3 * 4) + 3};
-
-  return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl));
-}
-
-template<class type, int i0, int i1, int i2, int i3>
-type shuffle_neon(const type &a, const type &b)
-{
-  if (&a == &b) {
-    static const uint8_t tbl[16] = {(i0 * 4) + 0,
-                                    (i0 * 4) + 1,
-                                    (i0 * 4) + 2,
-                                    (i0 * 4) + 3,
-                                    (i1 * 4) + 0,
-                                    (i1 * 4) + 1,
-                                    (i1 * 4) + 2,
-                                    (i1 * 4) + 3,
-                                    (i2 * 4) + 0,
-                                    (i2 * 4) + 1,
-                                    (i2 * 4) + 2,
-                                    (i2 * 4) + 3,
-                                    (i3 * 4) + 0,
-                                    (i3 * 4) + 1,
-                                    (i3 * 4) + 2,
-                                    (i3 * 4) + 3};
-
-    return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl));
-  }
-  else {
-
-    static const uint8_t tbl[16] = {(i0 * 4) + 0,
-                                    (i0 * 4) + 1,
-                                    (i0 * 4) + 2,
-                                    (i0 * 4) + 3,
-                                    (i1 * 4) + 0,
-                                    (i1 * 4) + 1,
-                                    (i1 * 4) + 2,
-                                    (i1 * 4) + 3,
-                                    (i2 * 4) + 0 + 16,
-                                    (i2 * 4) + 1 + 16,
-                                    (i2 * 4) + 2 + 16,
-                                    (i2 * 4) + 3 + 16,
-                                    (i3 * 4) + 0 + 16,
-                                    (i3 * 4) + 1 + 16,
-                                    (i3 * 4) + 2 + 16,
-                                    (i3 * 4) + 3 + 16};
-
-    return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl));
-  }
-}
-#endif /* __KERNEL_NEON */
-
-/* Intrinsics Functions
- *
- * For fast bit operations. */
-
-#if defined(__BMI__) && defined(__GNUC__)
-#  ifndef _tzcnt_u32
-#    define _tzcnt_u32 __tzcnt_u32
-#  endif
-#  ifndef _tzcnt_u64
-#    define _tzcnt_u64 __tzcnt_u64
-#  endif
-#endif
-
-#if defined(__LZCNT__)
-#  define _lzcnt_u32 __lzcnt32
-#  define _lzcnt_u64 __lzcnt64
-#endif
-
-#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__)
-/* Intrinsic functions on Windows. */
-__forceinline uint32_t __bsf(uint32_t v)
-{
-#  if defined(__KERNEL_AVX2__)
-  return _tzcnt_u32(v);
-#  else
-  unsigned long r = 0;
-  _BitScanForward(&r, v);
-  return r;
-#  endif
-}
-
-__forceinline uint32_t __bsr(uint32_t v)
-{
-  unsigned long r = 0;
-  _BitScanReverse(&r, v);
-  return r;
-}
-
-__forceinline uint32_t __btc(uint32_t v, uint32_t i)
-{
-  long r = v;
-  _bittestandcomplement(&r, i);
-  return r;
-}
-
-__forceinline uint32_t bitscan(uint32_t v)
-{
-#  if defined(__KERNEL_AVX2__)
-  return _tzcnt_u32(v);
-#  else
-  return __bsf(v);
-#  endif
-}
-
-#  if defined(__KERNEL_64_BIT__)
-
-__forceinline uint64_t __bsf(uint64_t v)
-{
-#    if defined(__KERNEL_AVX2__)
-  return _tzcnt_u64(v);
-#    else
-  unsigned long r = 0;
-  _BitScanForward64(&r, v);
-  return r;
-#    endif
-}
-
-__forceinline uint64_t __bsr(uint64_t v)
-{
-  unsigned long r = 0;
-  _BitScanReverse64(&r, v);
-  return r;
-}
-
-__forceinline uint64_t __btc(uint64_t v, uint64_t i)
-{
-  uint64_t r = v;
-  _bittestandcomplement64((__int64 *)&r, i);
-  return r;
-}
-
-__forceinline uint64_t bitscan(uint64_t v)
-{
-#    if defined(__KERNEL_AVX2__)
-#      if defined(__KERNEL_64_BIT__)
-  return _tzcnt_u64(v);
-#      else
-  return _tzcnt_u32(v);
-#      endif
-#    else
-  return __bsf(v);
-#    endif
-}
-
-#  endif /* __KERNEL_64_BIT__ */
-
-#elif (defined(__x86_64__) || defined(__i386__)) && defined(__KERNEL_SSE2__)
-/* Intrinsic functions with x86 SSE. */
-
-__forceinline uint32_t __bsf(const uint32_t v)
-{
-  uint32_t r = 0;
-  asm("bsf %1,%0" : "=r"(r) : "r"(v));
-  return r;
-}
-
-__forceinline uint32_t __bsr(const uint32_t v)
-{
-  uint32_t r = 0;
-  asm("bsr %1,%0" : "=r"(r) : "r"(v));
-  return r;
-}
-
-__forceinline uint32_t __btc(const uint32_t v, uint32_t i)
-{
-  uint32_t r = 0;
-  asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags");
-  return r;
-}
-
-#  if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \
-      !(defined(__ILP32__) && defined(__x86_64__))
-__forceinline uint64_t __bsf(const uint64_t v)
-{
-  uint64_t r = 0;
-  asm("bsf %1,%0" : "=r"(r) : "r"(v));
-  return r;
-}
-#  endif
-
-__forceinline uint64_t __bsr(const uint64_t v)
-{
-  uint64_t r = 0;
-  asm("bsr %1,%0" : "=r"(r) : "r"(v));
-  return r;
-}
-
-__forceinline uint64_t __btc(const uint64_t v, const uint64_t i)
-{
-  uint64_t r = 0;
-  asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags");
-  return r;
-}
-
-__forceinline uint32_t bitscan(uint32_t v)
-{
-#  if defined(__KERNEL_AVX2__)
-  return _tzcnt_u32(v);
-#  else
-  return __bsf(v);
-#  endif
-}
-
-#  if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \
-      !(defined(__ILP32__) && defined(__x86_64__))
-__forceinline uint64_t bitscan(uint64_t v)
-{
-#    if defined(__KERNEL_AVX2__)
-#      if defined(__KERNEL_64_BIT__)
-  return _tzcnt_u64(v);
-#      else
-  return _tzcnt_u32(v);
-#      endif
-#    else
-  return __bsf(v);
-#    endif
-}
-#  endif
-
-#else
-/* Intrinsic functions fallback for arbitrary processor. */
-__forceinline uint32_t __bsf(const uint32_t x)
-{
-  for (int i = 0; i < 32; i++) {
-    if (x & (1U << i))
-      return i;
-  }
-  return 32;
-}
-
-__forceinline uint32_t __bsr(const uint32_t x)
-{
-  for (int i = 0; i < 32; i++) {
-    if (x & (1U << (31 - i)))
-      return (31 - i);
-  }
-  return 32;
-}
-
-__forceinline uint32_t __btc(const uint32_t x, const uint32_t bit)
-{
-  uint32_t mask = 1U << bit;
-  return x & (~mask);
-}
-
-__forceinline uint32_t __bsf(const uint64_t x)
-{
-  for (int i = 0; i < 64; i++) {
-    if (x & (1UL << i))
-      return i;
-  }
-  return 64;
-}
-
-__forceinline uint32_t __bsr(const uint64_t x)
-{
-  for (int i = 0; i < 64; i++) {
-    if (x & (1UL << (63 - i)))
-      return (63 - i);
-  }
-  return 64;
-}
-
-__forceinline uint64_t __btc(const uint64_t x, const uint32_t bit)
-{
-  uint64_t mask = 1UL << bit;
-  return x & (~mask);
-}
-
-__forceinline uint32_t bitscan(uint32_t value)
-{
-  assert(value != 0);
-  uint32_t bit = 0;
-  while ((value & (1 << bit)) == 0) {
-    ++bit;
-  }
-  return bit;
-}
-
-__forceinline uint64_t bitscan(uint64_t value)
-{
-  assert(value != 0);
-  uint64_t bit = 0;
-  while ((value & (1 << bit)) == 0) {
-    ++bit;
-  }
-  return bit;
-}
-
-#endif /* Intrinsics */
-
-/* SSE compatibility.
- *
- * Various utilities to smooth over differences between SSE versions and
- * implementations. */
-#ifdef __KERNEL_SSE2__
-
-/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test
- * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other
- * platforms when compiling code outside the kernel. */
-#  if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__))
-
-/* Emulation of SSE4 functions with SSE2 */
-
-#    define _MM_FROUND_TO_NEAREST_INT 0x00
-#    define _MM_FROUND_TO_NEG_INF 0x01
-#    define _MM_FROUND_TO_POS_INF 0x02
-#    define _MM_FROUND_TO_ZERO 0x03
-#    define _MM_FROUND_CUR_DIRECTION 0x04
-
-#    undef _mm_blendv_ps
-#    define _mm_blendv_ps _mm_blendv_ps_emu
-__forceinline __m128 _mm_blendv_ps_emu(__m128 value, __m128 input, __m128 mask)
-{
-  __m128i isignmask = _mm_set1_epi32(0x80000000);
-  __m128 signmask = _mm_castsi128_ps(isignmask);
-  __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask));
-  __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask);
-  __m128 cmpmask = _mm_castsi128_ps(icmpmask);
-  return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value));
-}
-
-#    undef _mm_blend_ps
-#    define _mm_blend_ps _mm_blend_ps_emu
-__forceinline __m128 _mm_blend_ps_emu(__m128 value, __m128 input, const int mask)
-{
-  assert(mask < 0x10);
-  return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]);
-}
-
-#    undef _mm_blendv_epi8
-#    define _mm_blendv_epi8 _mm_blendv_epi8_emu
-__forceinline __m128i _mm_blendv_epi8_emu(__m128i value, __m128i input, __m128i mask)
-{
-  return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value));
-}
-
-#    undef _mm_min_epi32
-#    define _mm_min_epi32 _mm_min_epi32_emu
-__forceinline __m128i _mm_min_epi32_emu(__m128i value, __m128i input)
-{
-  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
-}
-
-#    undef _mm_max_epi32
-#    define _mm_max_epi32 _mm_max_epi32_emu
-__forceinline __m128i _mm_max_epi32_emu(__m128i value, __m128i input)
-{
-  return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input));
-}
-
-#    ifndef __KERNEL_NEON__
-#      undef _mm_extract_epi32
-#      define _mm_extract_epi32 _mm_extract_epi32_emu
-__forceinline int _mm_extract_epi32_emu(__m128i input, const int index)
-{
-  switch (index) {
-    case 0:
-      return _mm_cvtsi128_si32(input);
-    case 1:
-      return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1)));
-    case 2:
-      return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2)));
-    case 3:
-      return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3)));
-    default:
-      assert(false);
-      return 0;
-  }
-}
-#    endif
-
-#    undef _mm_insert_epi32
-#    define _mm_insert_epi32 _mm_insert_epi32_emu
-__forceinline __m128i _mm_insert_epi32_emu(__m128i value, int input, const int index)
-{
-  assert(index >= 0 && index < 4);
-  ((int *)&value)[index] = input;
-  return value;
-}
-
-#    undef _mm_insert_ps
-#    define _mm_insert_ps _mm_insert_ps_emu
-__forceinline __m128 _mm_insert_ps_emu(__m128 value, __m128 input, const int index)
-{
-  assert(index < 0x100);
-  ((float *)&value)[(index >> 4) & 0x3] = ((float *)&input)[index >> 6];
-  return _mm_andnot_ps(_mm_lookupmask_ps[index & 0xf], value);
-}
-
-#    undef _mm_round_ps
-#    define _mm_round_ps _mm_round_ps_emu
-__forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags)
-{
-  switch (flags) {
-    case _MM_FROUND_TO_NEAREST_INT:
-      return _mm_cvtepi32_ps(_mm_cvtps_epi32(value));
-    case _MM_FROUND_TO_NEG_INF:
-      return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f))));
-    case _MM_FROUND_TO_POS_INF:
-      return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(0.5f))));
-    case _MM_FROUND_TO_ZERO:
-      return _mm_cvtepi32_ps(_mm_cvttps_epi32(value));
-  }
-  return value;
-}
-
-#  endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
-
-/* Older GCC versions do not have _mm256_cvtss_f32 yet, so define it ourselves.
- * _mm256_castps256_ps128 generates no instructions so this is just as efficient. */
-#  if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-#    undef _mm256_cvtss_f32
-#    define _mm256_cvtss_f32(a) (_mm_cvtss_f32(_mm256_castps256_ps128(a)))
-#  endif
-
-#endif /* __KERNEL_SSE2__ */
-
-/* quiet unused define warnings */
-#if defined(__KERNEL_SSE2__) || defined(__KERNEL_SSE3__) || defined(__KERNEL_SSSE3__) || \
-    defined(__KERNEL_SSE41__) || defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-/* do nothing */
-#endif
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_SIMD_TYPES_H__ */
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
deleted file mode 100644
index 6afce4f8909..00000000000
--- a/intern/cycles/util/util_sseb.h
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0(the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SSEB_H__
-#define __UTIL_SSEB_H__
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_SSE2__
-
-struct ssei;
-struct ssef;
-
-/*! 4-wide SSE bool type. */
-struct sseb {
-  typedef sseb Mask;   // mask type
-  typedef ssei Int;    // int type
-  typedef ssef Float;  // float type
-
-  enum { size = 4 };  // number of SIMD elements
-  union {
-    __m128 m128;
-    int32_t v[4];
-  };  // data
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constructors, Assignment & Cast Operators
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline sseb()
-  {
-  }
-  __forceinline sseb(const sseb &other)
-  {
-    m128 = other.m128;
-  }
-  __forceinline sseb &operator=(const sseb &other)
-  {
-    m128 = other.m128;
-    return *this;
-  }
-
-  __forceinline sseb(const __m128 input) : m128(input)
-  {
-  }
-  __forceinline operator const __m128 &(void) const
-  {
-    return m128;
-  }
-  __forceinline operator const __m128i(void) const
-  {
-    return _mm_castps_si128(m128);
-  }
-  __forceinline operator const __m128d(void) const
-  {
-    return _mm_castps_pd(m128);
-  }
-
-  __forceinline sseb(bool a)
-      : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)])
-  {
-  }
-  __forceinline sseb(bool a, bool b)
-      : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)])
-  {
-  }
-  __forceinline sseb(bool a, bool b, bool c, bool d)
-      : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)])
-  {
-  }
-  __forceinline sseb(int mask)
-  {
-    assert(mask >= 0 && mask < 16);
-    m128 = _mm_lookupmask_ps[mask];
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constants
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline sseb(FalseTy) : m128(_mm_setzero_ps())
-  {
-  }
-  __forceinline sseb(TrueTy)
-      : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
-  {
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Array Access
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline bool operator[](const size_t i) const
-  {
-    assert(i < 4);
-    return (_mm_movemask_ps(m128) >> i) & 1;
-  }
-  __forceinline int32_t &operator[](const size_t i)
-  {
-    assert(i < 4);
-    return v[i];
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator!(const sseb &a)
-{
-  return _mm_xor_ps(a, sseb(True));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator&(const sseb &a, const sseb &b)
-{
-  return _mm_and_ps(a, b);
-}
-__forceinline const sseb operator|(const sseb &a, const sseb &b)
-{
-  return _mm_or_ps(a, b);
-}
-__forceinline const sseb operator^(const sseb &a, const sseb &b)
-{
-  return _mm_xor_ps(a, b);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator&=(sseb &a, const sseb &b)
-{
-  return a = a & b;
-}
-__forceinline const sseb operator|=(sseb &a, const sseb &b)
-{
-  return a = a | b;
-}
-__forceinline const sseb operator^=(sseb &a, const sseb &b)
-{
-  return a = a ^ b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator!=(const sseb &a, const sseb &b)
-{
-  return _mm_xor_ps(a, b);
-}
-__forceinline const sseb operator==(const sseb &a, const sseb &b)
-{
-  return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b));
-}
-
-__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f)
-{
-#  if defined(__KERNEL_SSE41__)
-  return _mm_blendv_ps(f, t, m);
-#  else
-  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
-#  endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb unpacklo(const sseb &a, const sseb &b)
-{
-  return _mm_unpacklo_ps(a, b);
-}
-__forceinline const sseb unpackhi(const sseb &a, const sseb &b)
-{
-  return _mm_unpackhi_ps(a, b);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const sseb shuffle(const sseb &a)
-{
-#  ifdef __KERNEL_NEON__
-  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a);
-#  else
-  return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)));
-#  endif
-}
-
-#  ifndef __KERNEL_NEON__
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a)
-{
-  return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a)
-{
-  return _mm_movehl_ps(a, a);
-}
-#  endif
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const sseb shuffle(const sseb &a, const sseb &b)
-{
-#  ifdef __KERNEL_NEON__
-  return shuffle_neon<int32x4_t, i0, i1, i2, i3>(a, b);
-#  else
-  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-#  endif
-}
-
-#  ifndef __KERNEL_NEON__
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b)
-{
-  return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b)
-{
-  return _mm_movehl_ps(b, a);
-}
-#  endif
-
-#  if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
-template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a)
-{
-  return _mm_moveldup_ps(a);
-}
-template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a)
-{
-  return _mm_movehdup_ps(a);
-}
-#  endif
-
-#  if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr>
-__forceinline const sseb insert(const sseb &a, const sseb &b)
-{
-#    ifdef __KERNEL_NEON__
-  sseb res = a;
-  if (clr)
-    res[dst] = 0;
-  else
-    res[dst] = b[src];
-  return res;
-#    else
-  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
-#    endif
-}
-template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b)
-{
-  return insert<dst, src, 0>(a, b);
-}
-template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b)
-{
-  return insert<dst, 0>(a, sseb(b));
-}
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reduction Operations
-////////////////////////////////////////////////////////////////////////////////
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline uint32_t popcnt(const sseb &a)
-{
-#    if defined(__KERNEL_NEON__)
-  const int32x4_t mask = {1, 1, 1, 1};
-  int32x4_t t = vandq_s32(vreinterpretq_s32_m128(a.m128), mask);
-  return vaddvq_s32(t);
-#    else
-  return _mm_popcnt_u32(_mm_movemask_ps(a));
-#    endif
-}
-#  else
-__forceinline uint32_t popcnt(const sseb &a)
-{
-  return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]);
-}
-#  endif
-
-__forceinline bool reduce_and(const sseb &a)
-{
-#  if defined(__KERNEL_NEON__)
-  return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) == -4;
-#  else
-  return _mm_movemask_ps(a) == 0xf;
-#  endif
-}
-__forceinline bool reduce_or(const sseb &a)
-{
-#  if defined(__KERNEL_NEON__)
-  return vaddvq_s32(vreinterpretq_s32_m128(a.m128)) != 0x0;
-#  else
-  return _mm_movemask_ps(a) != 0x0;
-#  endif
-}
-__forceinline bool all(const sseb &b)
-{
-#  if defined(__KERNEL_NEON__)
-  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == -4;
-#  else
-  return _mm_movemask_ps(b) == 0xf;
-#  endif
-}
-__forceinline bool any(const sseb &b)
-{
-#  if defined(__KERNEL_NEON__)
-  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) != 0x0;
-#  else
-  return _mm_movemask_ps(b) != 0x0;
-#  endif
-}
-__forceinline bool none(const sseb &b)
-{
-#  if defined(__KERNEL_NEON__)
-  return vaddvq_s32(vreinterpretq_s32_m128(b.m128)) == 0x0;
-#  else
-  return _mm_movemask_ps(b) == 0x0;
-#  endif
-}
-
-__forceinline uint32_t movemask(const sseb &a)
-{
-  return _mm_movemask_ps(a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_sseb(const char *label, const sseb &a)
-{
-  printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]);
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
deleted file mode 100644
index 0c81ed87553..00000000000
--- a/intern/cycles/util/util_ssef.h
+++ /dev/null
@@ -1,1104 +0,0 @@
-/*
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0(the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SSEF_H__
-#define __UTIL_SSEF_H__
-
-#include "util_ssei.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_SSE2__
-
-struct sseb;
-struct ssef;
-
-/*! 4-wide SSE float type. */
-struct ssef {
-  typedef sseb Mask;   // mask type
-  typedef ssei Int;    // int type
-  typedef ssef Float;  // float type
-
-  enum { size = 4 };  // number of SIMD elements
-  union {
-    __m128 m128;
-    float f[4];
-    int i[4];
-  };  // data
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constructors, Assignment & Cast Operators
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline ssef()
-  {
-  }
-  __forceinline ssef(const ssef &other)
-  {
-    m128 = other.m128;
-  }
-  __forceinline ssef &operator=(const ssef &other)
-  {
-    m128 = other.m128;
-    return *this;
-  }
-
-  __forceinline ssef(const __m128 a) : m128(a)
-  {
-  }
-  __forceinline operator const __m128 &() const
-  {
-    return m128;
-  }
-  __forceinline operator __m128 &()
-  {
-    return m128;
-  }
-
-  __forceinline ssef(float a) : m128(_mm_set1_ps(a))
-  {
-  }
-  __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d))
-  {
-  }
-
-  __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a))
-  {
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Loads and Stores
-  ////////////////////////////////////////////////////////////////////////////////
-
-#  if defined(__KERNEL_AVX__)
-  static __forceinline ssef broadcast(const void *const a)
-  {
-    return _mm_broadcast_ss((float *)a);
-  }
-#  else
-  static __forceinline ssef broadcast(const void *const a)
-  {
-    return _mm_set1_ps(*(float *)a);
-  }
-#  endif
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Array Access
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline const float &operator[](const size_t i) const
-  {
-    assert(i < 4);
-    return f[i];
-  }
-  __forceinline float &operator[](const size_t i)
-  {
-    assert(i < 4);
-    return f[i];
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef cast(const __m128i &a)
-{
-  return _mm_castsi128_ps(a);
-}
-__forceinline const ssef operator+(const ssef &a)
-{
-  return a;
-}
-__forceinline const ssef operator-(const ssef &a)
-{
-  return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
-}
-__forceinline const ssef abs(const ssef &a)
-{
-  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)));
-}
-#  if defined(__KERNEL_SSE41__)
-__forceinline const ssef sign(const ssef &a)
-{
-  return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f)));
-}
-#  endif
-__forceinline const ssef signmsk(const ssef &a)
-{
-  return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
-}
-
-__forceinline const ssef rcp(const ssef &a)
-{
-  const ssef r = _mm_rcp_ps(a.m128);
-  return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a));
-}
-__forceinline const ssef sqr(const ssef &a)
-{
-  return _mm_mul_ps(a, a);
-}
-__forceinline const ssef mm_sqrt(const ssef &a)
-{
-  return _mm_sqrt_ps(a.m128);
-}
-__forceinline const ssef rsqrt(const ssef &a)
-{
-  const ssef r = _mm_rsqrt_ps(a.m128);
-  return _mm_add_ps(
-      _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r),
-      _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r),
-                 _mm_mul_ps(r, r)));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef operator+(const ssef &a, const ssef &b)
-{
-  return _mm_add_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator+(const ssef &a, const float &b)
-{
-  return a + ssef(b);
-}
-__forceinline const ssef operator+(const float &a, const ssef &b)
-{
-  return ssef(a) + b;
-}
-
-__forceinline const ssef operator-(const ssef &a, const ssef &b)
-{
-  return _mm_sub_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator-(const ssef &a, const float &b)
-{
-  return a - ssef(b);
-}
-__forceinline const ssef operator-(const float &a, const ssef &b)
-{
-  return ssef(a) - b;
-}
-
-__forceinline const ssef operator*(const ssef &a, const ssef &b)
-{
-  return _mm_mul_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator*(const ssef &a, const float &b)
-{
-  return a * ssef(b);
-}
-__forceinline const ssef operator*(const float &a, const ssef &b)
-{
-  return ssef(a) * b;
-}
-
-__forceinline const ssef operator/(const ssef &a, const ssef &b)
-{
-  return _mm_div_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator/(const ssef &a, const float &b)
-{
-  return a / ssef(b);
-}
-__forceinline const ssef operator/(const float &a, const ssef &b)
-{
-  return ssef(a) / b;
-}
-
-__forceinline const ssef operator^(const ssef &a, const ssef &b)
-{
-  return _mm_xor_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator^(const ssef &a, const ssei &b)
-{
-  return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128));
-}
-
-__forceinline const ssef operator&(const ssef &a, const ssef &b)
-{
-  return _mm_and_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator&(const ssef &a, const ssei &b)
-{
-  return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128));
-}
-
-__forceinline const ssef operator|(const ssef &a, const ssef &b)
-{
-  return _mm_or_ps(a.m128, b.m128);
-}
-__forceinline const ssef operator|(const ssef &a, const ssei &b)
-{
-  return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128));
-}
-
-__forceinline const ssef andnot(const ssef &a, const ssef &b)
-{
-  return _mm_andnot_ps(a.m128, b.m128);
-}
-
-__forceinline const ssef min(const ssef &a, const ssef &b)
-{
-  return _mm_min_ps(a.m128, b.m128);
-}
-__forceinline const ssef min(const ssef &a, const float &b)
-{
-  return _mm_min_ps(a.m128, ssef(b));
-}
-__forceinline const ssef min(const float &a, const ssef &b)
-{
-  return _mm_min_ps(ssef(a), b.m128);
-}
-
-__forceinline const ssef max(const ssef &a, const ssef &b)
-{
-  return _mm_max_ps(a.m128, b.m128);
-}
-__forceinline const ssef max(const ssef &a, const float &b)
-{
-  return _mm_max_ps(a.m128, ssef(b));
-}
-__forceinline const ssef max(const float &a, const ssef &b)
-{
-  return _mm_max_ps(ssef(a), b.m128);
-}
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline ssef mini(const ssef &a, const ssef &b)
-{
-  const ssei ai = _mm_castps_si128(a);
-  const ssei bi = _mm_castps_si128(b);
-  const ssei ci = _mm_min_epi32(ai, bi);
-  return _mm_castsi128_ps(ci);
-}
-#  endif
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline ssef maxi(const ssef &a, const ssef &b)
-{
-  const ssei ai = _mm_castps_si128(a);
-  const ssei bi = _mm_castps_si128(b);
-  const ssei ci = _mm_max_epi32(ai, bi);
-  return _mm_castsi128_ps(ci);
-}
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Ternary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c)
-{
-#  if defined(__KERNEL_NEON__)
-  return vfmaq_f32(c, a, b);
-#  elif defined(__KERNEL_AVX2__)
-  return _mm_fmadd_ps(a, b, c);
-#  else
-  return a * b + c;
-#  endif
-}
-__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c)
-{
-#  if defined(__KERNEL_NEON__)
-  return vfmaq_f32(vnegq_f32(c), a, b);
-#  elif defined(__KERNEL_AVX2__)
-  return _mm_fmsub_ps(a, b, c);
-#  else
-  return a * b - c;
-#  endif
-}
-__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c)
-{
-#  if defined(__KERNEL_NEON__)
-  return vfmsq_f32(c, a, b);
-#  elif defined(__KERNEL_AVX2__)
-  return _mm_fnmadd_ps(a, b, c);
-#  else
-  return c - a * b;
-#  endif
-}
-__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c)
-{
-#  if defined(__KERNEL_NEON__)
-  return vfmsq_f32(vnegq_f32(c), a, b);
-#  elif defined(__KERNEL_AVX2__)
-  return _mm_fnmsub_ps(a, b, c);
-#  else
-  return -a * b - c;
-#  endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef &operator+=(ssef &a, const ssef &b)
-{
-  return a = a + b;
-}
-__forceinline ssef &operator+=(ssef &a, const float &b)
-{
-  return a = a + b;
-}
-
-__forceinline ssef &operator-=(ssef &a, const ssef &b)
-{
-  return a = a - b;
-}
-__forceinline ssef &operator-=(ssef &a, const float &b)
-{
-  return a = a - b;
-}
-
-__forceinline ssef &operator*=(ssef &a, const ssef &b)
-{
-  return a = a * b;
-}
-__forceinline ssef &operator*=(ssef &a, const float &b)
-{
-  return a = a * b;
-}
-
-__forceinline ssef &operator/=(ssef &a, const ssef &b)
-{
-  return a = a / b;
-}
-__forceinline ssef &operator/=(ssef &a, const float &b)
-{
-  return a = a / b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator==(const ssef &a, const ssef &b)
-{
-  return _mm_cmpeq_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator==(const ssef &a, const float &b)
-{
-  return a == ssef(b);
-}
-__forceinline const sseb operator==(const float &a, const ssef &b)
-{
-  return ssef(a) == b;
-}
-
-__forceinline const sseb operator!=(const ssef &a, const ssef &b)
-{
-  return _mm_cmpneq_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator!=(const ssef &a, const float &b)
-{
-  return a != ssef(b);
-}
-__forceinline const sseb operator!=(const float &a, const ssef &b)
-{
-  return ssef(a) != b;
-}
-
-__forceinline const sseb operator<(const ssef &a, const ssef &b)
-{
-  return _mm_cmplt_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator<(const ssef &a, const float &b)
-{
-  return a < ssef(b);
-}
-__forceinline const sseb operator<(const float &a, const ssef &b)
-{
-  return ssef(a) < b;
-}
-
-__forceinline const sseb operator>=(const ssef &a, const ssef &b)
-{
-  return _mm_cmpnlt_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator>=(const ssef &a, const float &b)
-{
-  return a >= ssef(b);
-}
-__forceinline const sseb operator>=(const float &a, const ssef &b)
-{
-  return ssef(a) >= b;
-}
-
-__forceinline const sseb operator>(const ssef &a, const ssef &b)
-{
-  return _mm_cmpnle_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator>(const ssef &a, const float &b)
-{
-  return a > ssef(b);
-}
-__forceinline const sseb operator>(const float &a, const ssef &b)
-{
-  return ssef(a) > b;
-}
-
-__forceinline const sseb operator<=(const ssef &a, const ssef &b)
-{
-  return _mm_cmple_ps(a.m128, b.m128);
-}
-__forceinline const sseb operator<=(const ssef &a, const float &b)
-{
-  return a <= ssef(b);
-}
-__forceinline const sseb operator<=(const float &a, const ssef &b)
-{
-  return ssef(a) <= b;
-}
-
-__forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f)
-{
-#  ifdef __KERNEL_SSE41__
-  return _mm_blendv_ps(f, t, m);
-#  else
-  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
-#  endif
-}
-
-__forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f)
-{
-#  ifdef __KERNEL_SSE41__
-  return _mm_blendv_ps(f, t, m);
-#  else
-  return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f));
-#  endif
-}
-
-__forceinline const ssef select(const int mask, const ssef &t, const ssef &f)
-{
-#  if defined(__KERNEL_SSE41__) && \
-      ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
-  return _mm_blend_ps(f, t, mask);
-#  else
-  return select(sseb(mask), t, f);
-#  endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Rounding Functions
-////////////////////////////////////////////////////////////////////////////////
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline const ssef round_even(const ssef &a)
-{
-#    ifdef __KERNEL_NEON__
-  return vrndnq_f32(a);
-#    else
-  return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT);
-#    endif
-}
-__forceinline const ssef round_down(const ssef &a)
-{
-#    ifdef __KERNEL_NEON__
-  return vrndmq_f32(a);
-#    else
-  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
-#    endif
-}
-__forceinline const ssef round_up(const ssef &a)
-{
-#    ifdef __KERNEL_NEON__
-  return vrndpq_f32(a);
-#    else
-  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
-#    endif
-}
-__forceinline const ssef round_zero(const ssef &a)
-{
-#    ifdef __KERNEL_NEON__
-  return vrndq_f32(a);
-#    else
-  return _mm_round_ps(a, _MM_FROUND_TO_ZERO);
-#    endif
-}
-__forceinline const ssef floor(const ssef &a)
-{
-#    ifdef __KERNEL_NEON__
-  return vrndnq_f32(a);
-#    else
-  return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF);
-#    endif
-}
-__forceinline const ssef ceil(const ssef &a)
-{
-#    ifdef __KERNEL_NEON__
-  return vrndpq_f32(a);
-#    else
-  return _mm_round_ps(a, _MM_FROUND_TO_POS_INF);
-#    endif
-}
-#  endif
-
-__forceinline ssei truncatei(const ssef &a)
-{
-  return _mm_cvttps_epi32(a.m128);
-}
-
-/* This is about 25% faster than straightforward floor to integer conversion
- * due to better pipelining.
- *
- * Unsaturated add 0xffffffff (a < 0) is the same as subtract -1.
- */
-__forceinline ssei floori(const ssef &a)
-{
-  return truncatei(a) + cast((a < 0.0f).m128);
-}
-
-__forceinline ssef floorfrac(const ssef &x, ssei *i)
-{
-  *i = floori(x);
-  return x - ssef(*i);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Common Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef mix(const ssef &a, const ssef &b, const ssef &t)
-{
-  return madd(t, b, (ssef(1.0f) - t) * a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef unpacklo(const ssef &a, const ssef &b)
-{
-  return _mm_unpacklo_ps(a.m128, b.m128);
-}
-__forceinline ssef unpackhi(const ssef &a, const ssef &b)
-{
-  return _mm_unpackhi_ps(a.m128, b.m128);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssef shuffle(const ssef &b)
-{
-#  ifdef __KERNEL_NEON__
-  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(b.m128);
-#  else
-  return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
-#  endif
-}
-
-template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a)
-{
-  return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a)
-{
-  return _mm_movehl_ps(a, a);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssef shuffle(const ssef &a, const ssef &b)
-{
-#  ifdef __KERNEL_NEON__
-  return shuffle_neon<float32x4_t, i0, i1, i2, i3>(a, b);
-#  else
-  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-#  endif
-}
-
-template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b)
-{
-#  ifdef __KERNEL_NEON__
-  return shuffle_neon<float32x4_t, i0, i0, i0, i0>(a, b);
-#  else
-  return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
-#  endif
-}
-
-#  ifndef __KERNEL_NEON__
-template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b)
-{
-  return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b)
-{
-  return _mm_movehl_ps(b, a);
-}
-#  endif
-
-#  if defined(__KERNEL_SSSE3__)
-__forceinline const ssef shuffle8(const ssef &a, const ssei &shuf)
-{
-  return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf));
-}
-#  endif
-
-#  if defined(__KERNEL_SSE3__)
-template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b)
-{
-  return _mm_moveldup_ps(b);
-}
-template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b)
-{
-  return _mm_movehdup_ps(b);
-}
-#  endif
-
-template<size_t i0> __forceinline const ssef shuffle(const ssef &b)
-{
-  return shuffle<i0, i0, i0, i0>(b);
-}
-
-#  if defined(__KERNEL_AVX__)
-__forceinline const ssef shuffle(const ssef &a, const ssei &shuf)
-{
-  return _mm_permutevar_ps(a, shuf);
-}
-#  endif
-
-template<size_t i> __forceinline float extract(const ssef &a)
-{
-  return _mm_cvtss_f32(shuffle<i, i, i, i>(a));
-}
-template<> __forceinline float extract<0>(const ssef &a)
-{
-  return _mm_cvtss_f32(a);
-}
-
-#  if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr>
-__forceinline const ssef insert(const ssef &a, const ssef &b)
-{
-#    ifdef __KERNEL_NEON__
-  ssef res = a;
-  if (clr)
-    res[dst] = 0;
-  else
-    res[dst] = b[src];
-  return res;
-#    else
-  return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr);
-#    endif
-}
-template<size_t dst, size_t src> __forceinline const ssef insert(const ssef &a, const ssef &b)
-{
-  return insert<dst, src, 0>(a, b);
-}
-template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
-{
-  return insert<dst, 0>(a, _mm_set_ss(b));
-}
-#  else
-template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b)
-{
-  ssef c = a;
-  c[dst] = b;
-  return c;
-}
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Transpose
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline void transpose(const ssef &r0,
-                             const ssef &r1,
-                             const ssef &r2,
-                             const ssef &r3,
-                             ssef &c0,
-                             ssef &c1,
-                             ssef &c2,
-                             ssef &c3)
-{
-  ssef l02 = unpacklo(r0, r2);
-  ssef h02 = unpackhi(r0, r2);
-  ssef l13 = unpacklo(r1, r3);
-  ssef h13 = unpackhi(r1, r3);
-  c0 = unpacklo(l02, l13);
-  c1 = unpackhi(l02, l13);
-  c2 = unpacklo(h02, h13);
-  c3 = unpackhi(h02, h13);
-}
-
-__forceinline void transpose(
-    const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2)
-{
-  ssef l02 = unpacklo(r0, r2);
-  ssef h02 = unpackhi(r0, r2);
-  ssef l13 = unpacklo(r1, r3);
-  ssef h13 = unpackhi(r1, r3);
-  c0 = unpacklo(l02, l13);
-  c1 = unpackhi(l02, l13);
-  c2 = unpacklo(h02, h13);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reductions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssef vreduce_min(const ssef &v)
-{
-#  ifdef __KERNEL_NEON__
-  return vdupq_n_f32(vminvq_f32(v));
-#  else
-  ssef h = min(shuffle<1, 0, 3, 2>(v), v);
-  return min(shuffle<2, 3, 0, 1>(h), h);
-#  endif
-}
-__forceinline const ssef vreduce_max(const ssef &v)
-{
-#  ifdef __KERNEL_NEON__
-  return vdupq_n_f32(vmaxvq_f32(v));
-#  else
-  ssef h = max(shuffle<1, 0, 3, 2>(v), v);
-  return max(shuffle<2, 3, 0, 1>(h), h);
-#  endif
-}
-__forceinline const ssef vreduce_add(const ssef &v)
-{
-#  ifdef __KERNEL_NEON__
-  return vdupq_n_f32(vaddvq_f32(v));
-#  else
-  ssef h = shuffle<1, 0, 3, 2>(v) + v;
-  return shuffle<2, 3, 0, 1>(h) + h;
-#  endif
-}
-
-__forceinline float reduce_min(const ssef &v)
-{
-#  ifdef __KERNEL_NEON__
-  return vminvq_f32(v);
-#  else
-  return _mm_cvtss_f32(vreduce_min(v));
-#  endif
-}
-__forceinline float reduce_max(const ssef &v)
-{
-#  ifdef __KERNEL_NEON__
-  return vmaxvq_f32(v);
-#  else
-  return _mm_cvtss_f32(vreduce_max(v));
-#  endif
-}
-__forceinline float reduce_add(const ssef &v)
-{
-#  ifdef __KERNEL_NEON__
-  return vaddvq_f32(v);
-#  else
-  return _mm_cvtss_f32(vreduce_add(v));
-#  endif
-}
-
-__forceinline uint32_t select_min(const ssef &v)
-{
-  return __bsf(movemask(v == vreduce_min(v)));
-}
-__forceinline uint32_t select_max(const ssef &v)
-{
-  return __bsf(movemask(v == vreduce_max(v)));
-}
-
-__forceinline uint32_t select_min(const sseb &valid, const ssef &v)
-{
-  const ssef a = select(valid, v, ssef(pos_inf));
-  return __bsf(movemask(valid & (a == vreduce_min(a))));
-}
-__forceinline uint32_t select_max(const sseb &valid, const ssef &v)
-{
-  const ssef a = select(valid, v, ssef(neg_inf));
-  return __bsf(movemask(valid & (a == vreduce_max(a))));
-}
-
-__forceinline uint32_t movemask(const ssef &a)
-{
-  return _mm_movemask_ps(a);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Memory load and store operations
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssef load4f(const float4 &a)
-{
-#  ifdef __KERNEL_WITH_SSE_ALIGN__
-  return _mm_load_ps(&a.x);
-#  else
-  return _mm_loadu_ps(&a.x);
-#  endif
-}
-
-__forceinline ssef load4f(const float3 &a)
-{
-#  ifdef __KERNEL_WITH_SSE_ALIGN__
-  return _mm_load_ps(&a.x);
-#  else
-  return _mm_loadu_ps(&a.x);
-#  endif
-}
-
-__forceinline ssef load4f(const void *const a)
-{
-  return _mm_load_ps((float *)a);
-}
-
-__forceinline ssef load1f_first(const float a)
-{
-  return _mm_set_ss(a);
-}
-
-__forceinline void store4f(void *ptr, const ssef &v)
-{
-  _mm_store_ps((float *)ptr, v);
-}
-
-__forceinline ssef loadu4f(const void *const a)
-{
-  return _mm_loadu_ps((float *)a);
-}
-
-__forceinline void storeu4f(void *ptr, const ssef &v)
-{
-  _mm_storeu_ps((float *)ptr, v);
-}
-
-__forceinline void store4f(const sseb &mask, void *ptr, const ssef &f)
-{
-#  if defined(__KERNEL_AVX__)
-  _mm_maskstore_ps((float *)ptr, (__m128i)mask, f);
-#  else
-  *(ssef *)ptr = select(mask, f, *(ssef *)ptr);
-#  endif
-}
-
-__forceinline ssef load4f_nt(void *ptr)
-{
-#  if defined(__KERNEL_SSE41__)
-  return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr));
-#  else
-  return _mm_load_ps((float *)ptr);
-#  endif
-}
-
-__forceinline void store4f_nt(void *ptr, const ssef &v)
-{
-#  if defined(__KERNEL_SSE41__)
-  _mm_stream_ps((float *)ptr, v);
-#  else
-  _mm_store_ps((float *)ptr, v);
-#  endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Euclidian Space Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline float dot(const ssef &a, const ssef &b)
-{
-  return reduce_add(a * b);
-}
-
-/* calculate shuffled cross product, useful when order of components does not matter */
-__forceinline ssef cross_zxy(const ssef &a, const ssef &b)
-{
-  const ssef a0 = a;
-  const ssef b0 = shuffle<1, 2, 0, 3>(b);
-  const ssef a1 = shuffle<1, 2, 0, 3>(a);
-  const ssef b1 = b;
-  return msub(a0, b0, a1 * b1);
-}
-
-__forceinline ssef cross(const ssef &a, const ssef &b)
-{
-  return shuffle<1, 2, 0, 3>(cross_zxy(a, b));
-}
-
-ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b)
-{
-#  ifdef __KERNEL_SSE41__
-  return _mm_dp_ps(a.m128, b.m128, 0x7f);
-#  else
-  ssef t = a * b;
-  return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]);
-#  endif
-}
-
-/* squared length taking only specified axes into account */
-template<size_t X, size_t Y, size_t Z, size_t W> ccl_device_inline float len_squared(const ssef &a)
-{
-#  ifndef __KERNEL_SSE41__
-  float4 &t = (float4 &)a;
-  return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) +
-         (W ? t.w * t.w : 0.0f);
-#  else
-  return extract<0>(
-      ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf)));
-#  endif
-}
-
-ccl_device_inline float dot3(const ssef &a, const ssef &b)
-{
-#  ifdef __KERNEL_SSE41__
-  return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f)));
-#  else
-  ssef t = a * b;
-  return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2];
-#  endif
-}
-
-ccl_device_inline const ssef len3_squared_splat(const ssef &a)
-{
-  return dot3_splat(a, a);
-}
-
-ccl_device_inline float len3_squared(const ssef &a)
-{
-  return dot3(a, a);
-}
-
-ccl_device_inline float len3(const ssef &a)
-{
-  return extract<0>(mm_sqrt(dot3_splat(a, a)));
-}
-
-/* SSE shuffle utility functions */
-
-#  ifdef __KERNEL_SSSE3__
-
-/* faster version for SSSE3 */
-typedef ssei shuffle_swap_t;
-
-ccl_device_inline shuffle_swap_t shuffle_swap_identity()
-{
-  return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-}
-
-ccl_device_inline shuffle_swap_t shuffle_swap_swap()
-{
-  return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
-}
-
-ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf)
-{
-  return cast(_mm_shuffle_epi8(cast(a), shuf));
-}
-
-#  else
-
-/* somewhat slower version for SSE2 */
-typedef int shuffle_swap_t;
-
-ccl_device_inline shuffle_swap_t shuffle_swap_identity()
-{
-  return 0;
-}
-
-ccl_device_inline shuffle_swap_t shuffle_swap_swap()
-{
-  return 1;
-}
-
-ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf)
-{
-  /* shuffle value must be a constant, so we need to branch */
-  if (shuf)
-    return shuffle<1, 0, 3, 2>(a);
-  else
-    return shuffle<3, 2, 1, 0>(a);
-}
-
-#  endif
-
-#  if defined(__KERNEL_SSE41__) && !defined(__KERNEL_NEON__)
-
-ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
-                                          const shuffle_swap_t &shuf_identity,
-                                          const shuffle_swap_t &shuf_swap,
-                                          const float3 &idir,
-                                          ssef idirsplat[3],
-                                          shuffle_swap_t shufflexyz[3])
-{
-  const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)};
-  idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn);
-  idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn);
-  idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn);
-
-  const ssef signmask = cast(ssei(0x80000000));
-  const ssef shuf_identity_f = cast(shuf_identity);
-  const ssef shuf_swap_f = cast(shuf_swap);
-
-  shufflexyz[0] = _mm_castps_si128(
-      _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask)));
-  shufflexyz[1] = _mm_castps_si128(
-      _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask)));
-  shufflexyz[2] = _mm_castps_si128(
-      _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask)));
-}
-
-#  else
-
-ccl_device_inline void gen_idirsplat_swap(const ssef &pn,
-                                          const shuffle_swap_t &shuf_identity,
-                                          const shuffle_swap_t &shuf_swap,
-                                          const float3 &idir,
-                                          ssef idirsplat[3],
-                                          shuffle_swap_t shufflexyz[3])
-{
-  idirsplat[0] = ssef(idir.x) ^ pn;
-  idirsplat[1] = ssef(idir.y) ^ pn;
-  idirsplat[2] = ssef(idir.z) ^ pn;
-
-  shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap;
-  shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap;
-  shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap;
-}
-
-#  endif
-
-ccl_device_inline const ssef uint32_to_float(const ssei &in)
-{
-  ssei a = _mm_srli_epi32(in, 16);
-  ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff));
-  ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000));
-  ssef d = _mm_cvtepi32_ps(b);
-  ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000)));
-  return _mm_add_ps(e, d);
-}
-
-template<size_t S1, size_t S2, size_t S3, size_t S4>
-ccl_device_inline const ssef set_sign_bit(const ssef &a)
-{
-  return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31));
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_ssef(const char *label, const ssef &a)
-{
-  printf(
-      "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]);
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
deleted file mode 100644
index 94412fb77e7..00000000000
--- a/intern/cycles/util/util_ssei.h
+++ /dev/null
@@ -1,646 +0,0 @@
-/*
- * Copyright 2011-2013 Intel Corporation
- * Modifications Copyright 2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0(the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SSEI_H__
-#define __UTIL_SSEI_H__
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __KERNEL_SSE2__
-
-struct sseb;
-struct ssef;
-
-/*! 4-wide SSE integer type. */
-struct ssei {
-  typedef sseb Mask;   // mask type
-  typedef ssei Int;    // int type
-  typedef ssef Float;  // float type
-
-  enum { size = 4 };  // number of SIMD elements
-  union {
-    __m128i m128;
-    int32_t i[4];
-  };  // data
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Constructors, Assignment & Cast Operators
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline ssei()
-  {
-  }
-  __forceinline ssei(const ssei &a)
-  {
-    m128 = a.m128;
-  }
-  __forceinline ssei &operator=(const ssei &a)
-  {
-    m128 = a.m128;
-    return *this;
-  }
-
-  __forceinline ssei(const __m128i a) : m128(a)
-  {
-  }
-  __forceinline operator const __m128i &(void) const
-  {
-    return m128;
-  }
-  __forceinline operator __m128i &(void)
-  {
-    return m128;
-  }
-
-  __forceinline ssei(const int a) : m128(_mm_set1_epi32(a))
-  {
-  }
-  __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d))
-  {
-  }
-
-  __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a))
-  {
-  }
-
-  ////////////////////////////////////////////////////////////////////////////////
-  /// Array Access
-  ////////////////////////////////////////////////////////////////////////////////
-
-  __forceinline const int32_t &operator[](const size_t index) const
-  {
-    assert(index < 4);
-    return i[index];
-  }
-  __forceinline int32_t &operator[](const size_t index)
-  {
-    assert(index < 4);
-    return i[index];
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-/// Unary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssei cast(const __m128 &a)
-{
-  return _mm_castps_si128(a);
-}
-__forceinline const ssei operator+(const ssei &a)
-{
-  return a;
-}
-__forceinline const ssei operator-(const ssei &a)
-{
-  return _mm_sub_epi32(_mm_setzero_si128(), a.m128);
-}
-#  if defined(__KERNEL_SSSE3__)
-__forceinline const ssei abs(const ssei &a)
-{
-  return _mm_abs_epi32(a.m128);
-}
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Binary Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const ssei operator+(const ssei &a, const ssei &b)
-{
-  return _mm_add_epi32(a.m128, b.m128);
-}
-__forceinline const ssei operator+(const ssei &a, const int32_t &b)
-{
-  return a + ssei(b);
-}
-__forceinline const ssei operator+(const int32_t &a, const ssei &b)
-{
-  return ssei(a) + b;
-}
-
-__forceinline const ssei operator-(const ssei &a, const ssei &b)
-{
-  return _mm_sub_epi32(a.m128, b.m128);
-}
-__forceinline const ssei operator-(const ssei &a, const int32_t &b)
-{
-  return a - ssei(b);
-}
-__forceinline const ssei operator-(const int32_t &a, const ssei &b)
-{
-  return ssei(a) - b;
-}
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline const ssei operator*(const ssei &a, const ssei &b)
-{
-  return _mm_mullo_epi32(a.m128, b.m128);
-}
-__forceinline const ssei operator*(const ssei &a, const int32_t &b)
-{
-  return a * ssei(b);
-}
-__forceinline const ssei operator*(const int32_t &a, const ssei &b)
-{
-  return ssei(a) * b;
-}
-#  endif
-
-__forceinline const ssei operator&(const ssei &a, const ssei &b)
-{
-  return _mm_and_si128(a.m128, b.m128);
-}
-__forceinline const ssei operator&(const ssei &a, const int32_t &b)
-{
-  return a & ssei(b);
-}
-__forceinline const ssei operator&(const int32_t &a, const ssei &b)
-{
-  return ssei(a) & b;
-}
-
-__forceinline const ssei operator|(const ssei &a, const ssei &b)
-{
-  return _mm_or_si128(a.m128, b.m128);
-}
-__forceinline const ssei operator|(const ssei &a, const int32_t &b)
-{
-  return a | ssei(b);
-}
-__forceinline const ssei operator|(const int32_t &a, const ssei &b)
-{
-  return ssei(a) | b;
-}
-
-__forceinline const ssei operator^(const ssei &a, const ssei &b)
-{
-  return _mm_xor_si128(a.m128, b.m128);
-}
-__forceinline const ssei operator^(const ssei &a, const int32_t &b)
-{
-  return a ^ ssei(b);
-}
-__forceinline const ssei operator^(const int32_t &a, const ssei &b)
-{
-  return ssei(a) ^ b;
-}
-
-__forceinline const ssei operator<<(const ssei &a, const int32_t &n)
-{
-  return _mm_slli_epi32(a.m128, n);
-}
-__forceinline const ssei operator>>(const ssei &a, const int32_t &n)
-{
-  return _mm_srai_epi32(a.m128, n);
-}
-
-__forceinline const ssei andnot(const ssei &a, const ssei &b)
-{
-  return _mm_andnot_si128(a.m128, b.m128);
-}
-__forceinline const ssei andnot(const sseb &a, const ssei &b)
-{
-  return _mm_andnot_si128(cast(a.m128), b.m128);
-}
-__forceinline const ssei andnot(const ssei &a, const sseb &b)
-{
-  return _mm_andnot_si128(a.m128, cast(b.m128));
-}
-
-__forceinline const ssei sra(const ssei &a, const int32_t &b)
-{
-  return _mm_srai_epi32(a.m128, b);
-}
-__forceinline const ssei srl(const ssei &a, const int32_t &b)
-{
-  return _mm_srli_epi32(a.m128, b);
-}
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline const ssei min(const ssei &a, const ssei &b)
-{
-  return _mm_min_epi32(a.m128, b.m128);
-}
-__forceinline const ssei min(const ssei &a, const int32_t &b)
-{
-  return min(a, ssei(b));
-}
-__forceinline const ssei min(const int32_t &a, const ssei &b)
-{
-  return min(ssei(a), b);
-}
-
-__forceinline const ssei max(const ssei &a, const ssei &b)
-{
-  return _mm_max_epi32(a.m128, b.m128);
-}
-__forceinline const ssei max(const ssei &a, const int32_t &b)
-{
-  return max(a, ssei(b));
-}
-__forceinline const ssei max(const int32_t &a, const ssei &b)
-{
-  return max(ssei(a), b);
-}
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Assignment Operators
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssei &operator+=(ssei &a, const ssei &b)
-{
-  return a = a + b;
-}
-__forceinline ssei &operator+=(ssei &a, const int32_t &b)
-{
-  return a = a + b;
-}
-
-__forceinline ssei &operator-=(ssei &a, const ssei &b)
-{
-  return a = a - b;
-}
-__forceinline ssei &operator-=(ssei &a, const int32_t &b)
-{
-  return a = a - b;
-}
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline ssei &operator*=(ssei &a, const ssei &b)
-{
-  return a = a * b;
-}
-__forceinline ssei &operator*=(ssei &a, const int32_t &b)
-{
-  return a = a * b;
-}
-#  endif
-
-__forceinline ssei &operator&=(ssei &a, const ssei &b)
-{
-  return a = a & b;
-}
-__forceinline ssei &operator&=(ssei &a, const int32_t &b)
-{
-  return a = a & b;
-}
-
-__forceinline ssei &operator|=(ssei &a, const ssei &b)
-{
-  return a = a | b;
-}
-__forceinline ssei &operator|=(ssei &a, const int32_t &b)
-{
-  return a = a | b;
-}
-
-__forceinline ssei &operator^=(ssei &a, const ssei &b)
-{
-  return a = a ^ b;
-}
-__forceinline ssei &operator^=(ssei &a, const int32_t &b)
-{
-  return a = a ^ b;
-}
-
-__forceinline ssei &operator<<=(ssei &a, const int32_t &b)
-{
-  return a = a << b;
-}
-__forceinline ssei &operator>>=(ssei &a, const int32_t &b)
-{
-  return a = a >> b;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators + Select
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline const sseb operator==(const ssei &a, const ssei &b)
-{
-  return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128));
-}
-__forceinline const sseb operator==(const ssei &a, const int32_t &b)
-{
-  return a == ssei(b);
-}
-__forceinline const sseb operator==(const int32_t &a, const ssei &b)
-{
-  return ssei(a) == b;
-}
-
-__forceinline const sseb operator!=(const ssei &a, const ssei &b)
-{
-  return !(a == b);
-}
-__forceinline const sseb operator!=(const ssei &a, const int32_t &b)
-{
-  return a != ssei(b);
-}
-__forceinline const sseb operator!=(const int32_t &a, const ssei &b)
-{
-  return ssei(a) != b;
-}
-
-__forceinline const sseb operator<(const ssei &a, const ssei &b)
-{
-  return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128));
-}
-__forceinline const sseb operator<(const ssei &a, const int32_t &b)
-{
-  return a < ssei(b);
-}
-__forceinline const sseb operator<(const int32_t &a, const ssei &b)
-{
-  return ssei(a) < b;
-}
-
-__forceinline const sseb operator>=(const ssei &a, const ssei &b)
-{
-  return !(a < b);
-}
-__forceinline const sseb operator>=(const ssei &a, const int32_t &b)
-{
-  return a >= ssei(b);
-}
-__forceinline const sseb operator>=(const int32_t &a, const ssei &b)
-{
-  return ssei(a) >= b;
-}
-
-__forceinline const sseb operator>(const ssei &a, const ssei &b)
-{
-  return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128));
-}
-__forceinline const sseb operator>(const ssei &a, const int32_t &b)
-{
-  return a > ssei(b);
-}
-__forceinline const sseb operator>(const int32_t &a, const ssei &b)
-{
-  return ssei(a) > b;
-}
-
-__forceinline const sseb operator<=(const ssei &a, const ssei &b)
-{
-  return !(a > b);
-}
-__forceinline const sseb operator<=(const ssei &a, const int32_t &b)
-{
-  return a <= ssei(b);
-}
-__forceinline const sseb operator<=(const int32_t &a, const ssei &b)
-{
-  return ssei(a) <= b;
-}
-
-__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f)
-{
-#  ifdef __KERNEL_SSE41__
-  return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m));
-#  else
-  return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f));
-#  endif
-}
-
-__forceinline const ssei select(const int mask, const ssei &t, const ssei &f)
-{
-#  if defined(__KERNEL_SSE41__) && \
-      ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER))
-  return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask));
-#  else
-  return select(sseb(mask), t, f);
-#  endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Movement/Shifting/Shuffling Functions
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssei unpacklo(const ssei &a, const ssei &b)
-{
-  return _mm_unpacklo_epi32(a, b);
-}
-__forceinline ssei unpackhi(const ssei &a, const ssei &b)
-{
-  return _mm_unpackhi_epi32(a, b);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssei shuffle(const ssei &a)
-{
-#  ifdef __KERNEL_NEON__
-  int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a));
-  return vreinterpretq_m128i_s32(result);
-#  else
-  return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
-#  endif
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3>
-__forceinline const ssei shuffle(const ssei &a, const ssei &b)
-{
-#  ifdef __KERNEL_NEON__
-  int32x4_t result = shuffle_neon<int32x4_t, i0, i1, i2, i3>(vreinterpretq_s32_m128i(a),
-                                                             vreinterpretq_s32_m128i(b));
-  return vreinterpretq_m128i_s32(result);
-#  else
-  return _mm_castps_si128(
-      _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
-#  endif
-}
-
-template<size_t i0> __forceinline const ssei shuffle(const ssei &b)
-{
-  return shuffle<i0, i0, i0, i0>(b);
-}
-
-#  if defined(__KERNEL_SSE41__)
-template<size_t src> __forceinline int extract(const ssei &b)
-{
-  return _mm_extract_epi32(b, src);
-}
-template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b)
-{
-  return _mm_insert_epi32(a, b, dst);
-}
-#  else
-template<size_t src> __forceinline int extract(const ssei &b)
-{
-  return b[src];
-}
-template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b)
-{
-  ssei c = a;
-  c[dst] = b;
-  return c;
-}
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Reductions
-////////////////////////////////////////////////////////////////////////////////
-
-#  if defined(__KERNEL_SSE41__)
-__forceinline const ssei vreduce_min(const ssei &v)
-{
-  ssei h = min(shuffle<1, 0, 3, 2>(v), v);
-  return min(shuffle<2, 3, 0, 1>(h), h);
-}
-__forceinline const ssei vreduce_max(const ssei &v)
-{
-  ssei h = max(shuffle<1, 0, 3, 2>(v), v);
-  return max(shuffle<2, 3, 0, 1>(h), h);
-}
-__forceinline const ssei vreduce_add(const ssei &v)
-{
-  ssei h = shuffle<1, 0, 3, 2>(v) + v;
-  return shuffle<2, 3, 0, 1>(h) + h;
-}
-
-__forceinline int reduce_min(const ssei &v)
-{
-#    ifdef __KERNEL_NEON__
-  return vminvq_s32(vreinterpretq_s32_m128i(v));
-#    else
-  return extract<0>(vreduce_min(v));
-#    endif
-}
-__forceinline int reduce_max(const ssei &v)
-{
-#    ifdef __KERNEL_NEON__
-  return vmaxvq_s32(vreinterpretq_s32_m128i(v));
-#    else
-  return extract<0>(vreduce_max(v));
-#    endif
-}
-__forceinline int reduce_add(const ssei &v)
-{
-#    ifdef __KERNEL_NEON__
-  return vaddvq_s32(vreinterpretq_s32_m128i(v));
-#    else
-  return extract<0>(vreduce_add(v));
-#    endif
-}
-
-__forceinline uint32_t select_min(const ssei &v)
-{
-  return __bsf(movemask(v == vreduce_min(v)));
-}
-__forceinline uint32_t select_max(const ssei &v)
-{
-  return __bsf(movemask(v == vreduce_max(v)));
-}
-
-__forceinline uint32_t select_min(const sseb &valid, const ssei &v)
-{
-  const ssei a = select(valid, v, ssei((int)pos_inf));
-  return __bsf(movemask(valid & (a == vreduce_min(a))));
-}
-__forceinline uint32_t select_max(const sseb &valid, const ssei &v)
-{
-  const ssei a = select(valid, v, ssei((int)neg_inf));
-  return __bsf(movemask(valid & (a == vreduce_max(a))));
-}
-
-#  else
-
-__forceinline int ssei_min(int a, int b)
-{
-  return (a < b) ? a : b;
-}
-__forceinline int ssei_max(int a, int b)
-{
-  return (a > b) ? a : b;
-}
-__forceinline int reduce_min(const ssei &v)
-{
-  return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3]));
-}
-__forceinline int reduce_max(const ssei &v)
-{
-  return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3]));
-}
-__forceinline int reduce_add(const ssei &v)
-{
-  return v[0] + v[1] + v[2] + v[3];
-}
-
-#  endif
-
-////////////////////////////////////////////////////////////////////////////////
-/// Memory load and store operations
-////////////////////////////////////////////////////////////////////////////////
-
-__forceinline ssei load4i(const void *const a)
-{
-  return _mm_load_si128((__m128i *)a);
-}
-
-__forceinline void store4i(void *ptr, const ssei &v)
-{
-  _mm_store_si128((__m128i *)ptr, v);
-}
-
-__forceinline void storeu4i(void *ptr, const ssei &v)
-{
-  _mm_storeu_si128((__m128i *)ptr, v);
-}
-
-__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i)
-{
-#  if defined(__KERNEL_AVX__)
-  _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i));
-#  else
-  *(ssei *)ptr = select(mask, i, *(ssei *)ptr);
-#  endif
-}
-
-__forceinline ssei load4i_nt(void *ptr)
-{
-#  if defined(__KERNEL_SSE41__)
-  return _mm_stream_load_si128((__m128i *)ptr);
-#  else
-  return _mm_load_si128((__m128i *)ptr);
-#  endif
-}
-
-__forceinline void store4i_nt(void *ptr, const ssei &v)
-{
-#  if defined(__KERNEL_SSE41__)
-  _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v));
-#  else
-  _mm_store_si128((__m128i *)ptr, v);
-#  endif
-}
-
-////////////////////////////////////////////////////////////////////////////////
-/// Debug Functions
-////////////////////////////////////////////////////////////////////////////////
-
-ccl_device_inline void print_ssei(const char *label, const ssei &a)
-{
-  printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]);
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_stack_allocator.h b/intern/cycles/util/util_stack_allocator.h
deleted file mode 100644
index ef31c0fe5e2..00000000000
--- a/intern/cycles/util/util_stack_allocator.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_STACK_ALLOCATOR_H__
-#define __UTIL_STACK_ALLOCATOR_H__
-
-#include <cstddef>
-#include <memory>
-
-CCL_NAMESPACE_BEGIN
-
-/* Stack allocator for the use with STL. */
-template<int SIZE, typename T> class ccl_try_align(16) StackAllocator
-{
- public:
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef T *pointer;
-  typedef const T *const_pointer;
-  typedef T &reference;
-  typedef const T &const_reference;
-  typedef T value_type;
-
-  /* Allocator construction/destruction. */
-
-  StackAllocator() : pointer_(0), use_stack_(true)
-  {
-  }
-
-  StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(true)
-  {
-  }
-
-  template<class U>
-  StackAllocator(const StackAllocator<SIZE, U> &) : pointer_(0), use_stack_(false)
-  {
-  }
-
-  /* Memory allocation/deallocation. */
-
-  T *allocate(size_t n, const void *hint = 0)
-  {
-    (void)hint;
-    if (n == 0) {
-      return NULL;
-    }
-    if (pointer_ + n >= SIZE || use_stack_ == false) {
-      size_t size = n * sizeof(T);
-      util_guarded_mem_alloc(size);
-      T *mem;
-#ifdef WITH_BLENDER_GUARDEDALLOC
-      mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc");
-#else
-      mem = (T *)malloc(size);
-#endif
-      if (mem == NULL) {
-        throw std::bad_alloc();
-      }
-      return mem;
-    }
-    T *mem = &data_[pointer_];
-    pointer_ += n;
-    return mem;
-  }
-
-  void deallocate(T * p, size_t n)
-  {
-    if (p == NULL) {
-      return;
-    }
-    if (p < data_ || p >= data_ + SIZE) {
-      util_guarded_mem_free(n * sizeof(T));
-#ifdef WITH_BLENDER_GUARDEDALLOC
-      MEM_freeN(p);
-#else
-      free(p);
-#endif
-      return;
-    }
-    /* We don't support memory free for the stack allocator. */
-  }
-
-  /* Address of an reference. */
-
-  T *address(T & x) const
-  {
-    return &x;
-  }
-
-  const T *address(const T &x) const
-  {
-    return &x;
-  }
-
-  /* Object construction/destruction. */
-
-  void construct(T * p, const T &val)
-  {
-    if (p != NULL) {
-      new ((T *)p) T(val);
-    }
-  }
-
-  void destroy(T * p)
-  {
-    p->~T();
-  }
-
-  /* Maximum allocation size. */
-
-  size_t max_size() const
-  {
-    return size_t(-1);
-  }
-
-  /* Rebind to other type of allocator. */
-
-  template<class U> struct rebind {
-    typedef StackAllocator<SIZE, U> other;
-  };
-
-  /* Operators */
-
-  template<class U> inline StackAllocator &operator=(const StackAllocator<SIZE, U> &)
-  {
-    return *this;
-  }
-
-  StackAllocator<SIZE, T> &operator=(const StackAllocator &)
-  {
-    return *this;
-  }
-
-  inline bool operator==(StackAllocator const & /*other*/) const
-  {
-    return true;
-  }
-
-  inline bool operator!=(StackAllocator const &other) const
-  {
-    return !operator==(other);
-  }
-
- private:
-  int pointer_;
-  bool use_stack_;
-  T data_[SIZE];
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_STACK_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
deleted file mode 100644
index 7df52d462b7..00000000000
--- a/intern/cycles/util/util_static_assert.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* clang-format off */
-
-/* #define static_assert triggers a bug in some clang-format versions, disable
- * format for entire file to keep results consistent. */
-
-#ifndef __UTIL_STATIC_ASSERT_H__
-#define __UTIL_STATIC_ASSERT_H__
-
-CCL_NAMESPACE_BEGIN
-
-#if defined(CYCLES_CUBIN_CC)
-#  define static_assert(statement, message)
-#endif
-
-#define static_assert_align(st, align) \
-  static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned")  // NOLINT
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_STATIC_ASSERT_H__ */
diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h
deleted file mode 100644
index 15cf836de3c..00000000000
--- a/intern/cycles/util/util_stats.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_STATS_H__
-#define __UTIL_STATS_H__
-
-#include "util/util_atomic.h"
-#include "util/util_profiling.h"
-
-CCL_NAMESPACE_BEGIN
-
-class Stats {
- public:
-  enum static_init_t { static_init = 0 };
-
-  Stats() : mem_used(0), mem_peak(0)
-  {
-  }
-  explicit Stats(static_init_t)
-  {
-  }
-
-  void mem_alloc(size_t size)
-  {
-    atomic_add_and_fetch_z(&mem_used, size);
-    atomic_fetch_and_update_max_z(&mem_peak, mem_used);
-  }
-
-  void mem_free(size_t size)
-  {
-    assert(mem_used >= size);
-    atomic_sub_and_fetch_z(&mem_used, size);
-  }
-
-  size_t mem_used;
-  size_t mem_peak;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_STATS_H__ */
diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp
deleted file mode 100644
index 0fc9cb4ae77..00000000000
--- a/intern/cycles/util/util_string.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdarg.h>
-#include <stdio.h>
-
-#include <algorithm>
-#include <cctype>
-
-#include "util/util_foreach.h"
-#include "util/util_string.h"
-#include "util/util_windows.h"
-
-#ifdef _WIN32
-#  ifndef vsnprintf
-#    define vsnprintf _vsnprintf
-#  endif
-#endif /* _WIN32 */
-
-CCL_NAMESPACE_BEGIN
-
-string string_printf(const char *format, ...)
-{
-  vector<char> str(128, 0);
-
-  while (1) {
-    va_list args;
-    int result;
-
-    va_start(args, format);
-    result = vsnprintf(&str[0], str.size(), format, args);
-    va_end(args);
-
-    if (result == -1) {
-      /* not enough space or formatting error */
-      if (str.size() > 65536) {
-        assert(0);
-        return string("");
-      }
-
-      str.resize(str.size() * 2, 0);
-      continue;
-    }
-    else if (result >= (int)str.size()) {
-      /* not enough space */
-      str.resize(result + 1, 0);
-      continue;
-    }
-
-    return string(&str[0]);
-  }
-}
-
-bool string_iequals(const string &a, const string &b)
-{
-  if (a.size() == b.size()) {
-    for (size_t i = 0; i < a.size(); i++)
-      if (toupper(a[i]) != toupper(b[i]))
-        return false;
-
-    return true;
-  }
-
-  return false;
-}
-
-void string_split(vector<string> &tokens,
-                  const string &str,
-                  const string &separators,
-                  bool skip_empty_tokens)
-{
-  size_t token_start = 0, token_length = 0;
-  for (size_t i = 0; i < str.size(); ++i) {
-    const char ch = str[i];
-    if (separators.find(ch) == string::npos) {
-      /* Current character is not a separator,
-       * append it to token by increasing token length.
-       */
-      ++token_length;
-    }
-    else {
-      /* Current character is a separator,
-       * append current token to the list.
-       */
-      if (!skip_empty_tokens || token_length > 0) {
-        string token = str.substr(token_start, token_length);
-        tokens.push_back(token);
-      }
-      token_start = i + 1;
-      token_length = 0;
-    }
-  }
-  /* Append token from the tail of the string if exists. */
-  if (token_length) {
-    string token = str.substr(token_start, token_length);
-    tokens.push_back(token);
-  }
-}
-
-bool string_startswith(const string_view s, const string_view start)
-{
-  const size_t len = start.size();
-
-  if (len > s.size()) {
-    return false;
-  }
-
-  return strncmp(s.c_str(), start.data(), len) == 0;
-}
-
-bool string_endswith(const string_view s, const string_view end)
-{
-  const size_t len = end.size();
-
-  if (len > s.size()) {
-    return false;
-  }
-
-  return strncmp(s.c_str() + s.size() - len, end.data(), len) == 0;
-}
-
-string string_strip(const string &s)
-{
-  string result = s;
-  result.erase(0, result.find_first_not_of(' '));
-  result.erase(result.find_last_not_of(' ') + 1);
-  return result;
-}
-
-void string_replace(string &haystack, const string &needle, const string &other)
-{
-  size_t i = 0, index;
-  while ((index = haystack.find(needle, i)) != string::npos) {
-    haystack.replace(index, needle.size(), other);
-    i = index + other.size();
-  }
-}
-
-string string_remove_trademark(const string &s)
-{
-  string result = s;
-
-  /* Special case, so we don't leave sequential spaces behind. */
-  /* TODO(sergey): Consider using regex perhaps? */
-  string_replace(result, " (TM)", "");
-  string_replace(result, " (R)", "");
-
-  string_replace(result, "(TM)", "");
-  string_replace(result, "(R)", "");
-
-  return string_strip(result);
-}
-
-string string_from_bool(bool var)
-{
-  if (var)
-    return "True";
-  else
-    return "False";
-}
-
-string to_string(const char *str)
-{
-  return string(str);
-}
-
-string string_to_lower(const string &s)
-{
-  string r = s;
-  std::transform(r.begin(), r.end(), r.begin(), [](char c) { return std::tolower(c); });
-  return r;
-}
-
-/* Wide char strings helpers for Windows. */
-
-#ifdef _WIN32
-
-wstring string_to_wstring(const string &str)
-{
-  const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
-  wstring str_wc(length_wc, 0);
-  MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc);
-  return str_wc;
-}
-
-string string_from_wstring(const wstring &str)
-{
-  int length_mb = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), NULL, 0, NULL, NULL);
-  string str_mb(length_mb, 0);
-  WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), &str_mb[0], length_mb, NULL, NULL);
-  return str_mb;
-}
-
-string string_to_ansi(const string &str)
-{
-  const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
-  wstring str_wc(length_wc, 0);
-  MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc);
-
-  int length_mb = WideCharToMultiByte(
-      CP_ACP, 0, str_wc.c_str(), str_wc.size(), NULL, 0, NULL, NULL);
-
-  string str_mb(length_mb, 0);
-  WideCharToMultiByte(CP_ACP, 0, str_wc.c_str(), str_wc.size(), &str_mb[0], length_mb, NULL, NULL);
-
-  return str_mb;
-}
-
-#endif /* _WIN32 */
-
-string string_human_readable_size(size_t size)
-{
-  static const char suffixes[] = "BKMGTPEZY";
-
-  const char *suffix = suffixes;
-  size_t r = 0;
-
-  while (size >= 1024) {
-    r = size % 1024;
-    size /= 1024;
-    suffix++;
-  }
-
-  if (*suffix != 'B')
-    return string_printf("%.2f%c", double(size * 1024 + r) / 1024.0, *suffix);
-  else
-    return string_printf("%zu", size);
-}
-
-string string_human_readable_number(size_t num)
-{
-  if (num == 0) {
-    return "0";
-  }
-
-  /* Add thousands separators. */
-  char buf[32];
-
-  char *p = buf + 31;
-  *p = '\0';
-
-  int i = -1;
-  while (num) {
-    if (++i && i % 3 == 0)
-      *(--p) = ',';
-
-    *(--p) = '0' + (num % 10);
-
-    num /= 10;
-  }
-
-  return p;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
deleted file mode 100644
index 55462cfd8b8..00000000000
--- a/intern/cycles/util/util_string.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_STRING_H__
-#define __UTIL_STRING_H__
-
-#include <sstream>
-#include <string.h>
-#include <string>
-
-/* Use string view implementation from OIIO.
- * Ideally, need to switch to `std::string_view`, but this first requires getting rid of using
- * namespace OIIO as it causes symbol collision. */
-#include <OpenImageIO/string_view.h>
-
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-using std::istringstream;
-using std::ostringstream;
-using std::string;
-using std::stringstream;
-using std::to_string;
-
-using OIIO::string_view;
-
-#ifdef __GNUC__
-#  define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2)))
-#else
-#  define PRINTF_ATTRIBUTE
-#endif
-
-string string_printf(const char *format, ...) PRINTF_ATTRIBUTE;
-
-bool string_iequals(const string &a, const string &b);
-void string_split(vector<string> &tokens,
-                  const string &str,
-                  const string &separators = "\t ",
-                  bool skip_empty_tokens = true);
-void string_replace(string &haystack, const string &needle, const string &other);
-bool string_startswith(string_view s, string_view start);
-bool string_endswith(string_view s, string_view end);
-string string_strip(const string &s);
-string string_remove_trademark(const string &s);
-string string_from_bool(const bool var);
-string to_string(const char *str);
-string string_to_lower(const string &s);
-
-/* Wide char strings are only used on Windows to deal with non-ASCII
- * characters in file names and such. No reason to use such strings
- * for something else at this moment.
- *
- * Please note that strings are expected to be in UTF-8 codepage, and
- * if ANSI is needed then explicit conversion required.
- */
-#ifdef _WIN32
-using std::wstring;
-wstring string_to_wstring(const string &path);
-string string_from_wstring(const wstring &path);
-string string_to_ansi(const string &str);
-#endif
-
-/* Make a string from a size in bytes in human readable form. */
-string string_human_readable_size(size_t size);
-/* Make a string from a unit-less quantity in human readable form. */
-string string_human_readable_number(size_t num);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_STRING_H__ */
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
deleted file mode 100644
index be8c2fb505a..00000000000
--- a/intern/cycles/util/util_system.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_system.h"
-
-#include "util/util_logging.h"
-#include "util/util_string.h"
-#include "util/util_types.h"
-
-#include <numaapi.h>
-
-#include <OpenImageIO/sysutil.h>
-OIIO_NAMESPACE_USING
-
-#ifdef _WIN32
-#  if (!defined(FREE_WINDOWS))
-#    include <intrin.h>
-#  endif
-#  include "util_windows.h"
-#elif defined(__APPLE__)
-#  include <sys/ioctl.h>
-#  include <sys/sysctl.h>
-#  include <sys/types.h>
-#else
-#  include <sys/ioctl.h>
-#  include <unistd.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-bool system_cpu_ensure_initialized()
-{
-  static bool is_initialized = false;
-  static bool result = false;
-  if (is_initialized) {
-    return result;
-  }
-  is_initialized = true;
-  const NUMAAPI_Result numa_result = numaAPI_Initialize();
-  result = (numa_result == NUMAAPI_SUCCESS);
-  return result;
-}
-
-/* Fallback solution, which doesn't use NUMA/CPU groups. */
-static int system_cpu_thread_count_fallback()
-{
-#ifdef _WIN32
-  SYSTEM_INFO info;
-  GetSystemInfo(&info);
-  return info.dwNumberOfProcessors;
-#elif defined(__APPLE__)
-  int count;
-  size_t len = sizeof(count);
-  int mib[2] = {CTL_HW, HW_NCPU};
-  sysctl(mib, 2, &count, &len, NULL, 0);
-  return count;
-#else
-  return sysconf(_SC_NPROCESSORS_ONLN);
-#endif
-}
-
-int system_cpu_thread_count()
-{
-  const int num_nodes = system_cpu_num_numa_nodes();
-  int num_threads = 0;
-  for (int node = 0; node < num_nodes; ++node) {
-    if (!system_cpu_is_numa_node_available(node)) {
-      continue;
-    }
-    num_threads += system_cpu_num_numa_node_processors(node);
-  }
-  return num_threads;
-}
-
-int system_cpu_num_numa_nodes()
-{
-  if (!system_cpu_ensure_initialized()) {
-    /* Fallback to a single node with all the threads. */
-    return 1;
-  }
-  return numaAPI_GetNumNodes();
-}
-
-bool system_cpu_is_numa_node_available(int node)
-{
-  if (!system_cpu_ensure_initialized()) {
-    return true;
-  }
-  return numaAPI_IsNodeAvailable(node);
-}
-
-int system_cpu_num_numa_node_processors(int node)
-{
-  if (!system_cpu_ensure_initialized()) {
-    return system_cpu_thread_count_fallback();
-  }
-  return numaAPI_GetNumNodeProcessors(node);
-}
-
-bool system_cpu_run_thread_on_node(int node)
-{
-  if (!system_cpu_ensure_initialized()) {
-    return true;
-  }
-  return numaAPI_RunThreadOnNode(node);
-}
-
-int system_console_width()
-{
-  int columns = 0;
-
-#ifdef _WIN32
-  CONSOLE_SCREEN_BUFFER_INFO csbi;
-  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) {
-    columns = csbi.dwSize.X;
-  }
-#else
-  struct winsize w;
-  if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) {
-    columns = w.ws_col;
-  }
-#endif
-
-  return (columns > 0) ? columns : 80;
-}
-
-int system_cpu_num_active_group_processors()
-{
-  if (!system_cpu_ensure_initialized()) {
-    return system_cpu_thread_count_fallback();
-  }
-  return numaAPI_GetNumCurrentNodesProcessors();
-}
-
-/* Equivalent of Windows __cpuid for x86 processors on other platforms. */
-#if (!defined(_WIN32) || defined(FREE_WINDOWS)) && (defined(__x86_64__) || defined(__i386__))
-static void __cpuid(int data[4], int selector)
-{
-#  if defined(__x86_64__)
-  asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector));
-#  elif defined(__i386__)
-  asm("pushl %%ebx    \n\t"
-      "cpuid          \n\t"
-      "movl %%ebx, %1 \n\t"
-      "popl %%ebx     \n\t"
-      : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3])
-      : "a"(selector)
-      : "ebx");
-#  else
-  data[0] = data[1] = data[2] = data[3] = 0;
-#  endif
-}
-#endif
-
-string system_cpu_brand_string()
-{
-#if defined(__APPLE__)
-  /* Get from system on macOS. */
-  char modelname[512] = "";
-  size_t bufferlen = 512;
-  if (sysctlbyname("machdep.cpu.brand_string", &modelname, &bufferlen, NULL, 0) == 0) {
-    return modelname;
-  }
-#elif defined(WIN32) || defined(__x86_64__) || defined(__i386__)
-  /* Get from intrinsics on Windows and x86. */
-  char buf[49] = {0};
-  int result[4] = {0};
-
-  __cpuid(result, 0x80000000);
-
-  if (result[0] != 0 && result[0] >= (int)0x80000004) {
-    __cpuid((int *)(buf + 0), 0x80000002);
-    __cpuid((int *)(buf + 16), 0x80000003);
-    __cpuid((int *)(buf + 32), 0x80000004);
-
-    string brand = buf;
-
-    /* Make it a bit more presentable. */
-    brand = string_remove_trademark(brand);
-
-    return brand;
-  }
-#else
-  /* Get from /proc/cpuinfo on Unix systems. */
-  FILE *cpuinfo = fopen("/proc/cpuinfo", "r");
-  if (cpuinfo != nullptr) {
-    char cpuinfo_buf[513] = "";
-    fread(cpuinfo_buf, sizeof(cpuinfo_buf) - 1, 1, cpuinfo);
-    fclose(cpuinfo);
-
-    char *modelname = strstr(cpuinfo_buf, "model name");
-    if (modelname != nullptr) {
-      modelname = strchr(modelname, ':');
-      if (modelname != nullptr) {
-        modelname += 2;
-        char *modelname_end = strchr(modelname, '\n');
-        if (modelname_end != nullptr) {
-          *modelname_end = '\0';
-          return modelname;
-        }
-      }
-    }
-  }
-#endif
-  return "Unknown CPU";
-}
-
-int system_cpu_bits()
-{
-  return (sizeof(void *) * 8);
-}
-
-#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)
-
-struct CPUCapabilities {
-  bool x64;
-  bool mmx;
-  bool sse;
-  bool sse2;
-  bool sse3;
-  bool ssse3;
-  bool sse41;
-  bool sse42;
-  bool sse4a;
-  bool avx;
-  bool f16c;
-  bool avx2;
-  bool xop;
-  bool fma3;
-  bool fma4;
-  bool bmi1;
-  bool bmi2;
-};
-
-static CPUCapabilities &system_cpu_capabilities()
-{
-  static CPUCapabilities caps;
-  static bool caps_init = false;
-
-  if (!caps_init) {
-    int result[4], num;
-
-    memset(&caps, 0, sizeof(caps));
-
-    __cpuid(result, 0);
-    num = result[0];
-
-    if (num >= 1) {
-      __cpuid(result, 0x00000001);
-      caps.mmx = (result[3] & ((int)1 << 23)) != 0;
-      caps.sse = (result[3] & ((int)1 << 25)) != 0;
-      caps.sse2 = (result[3] & ((int)1 << 26)) != 0;
-      caps.sse3 = (result[2] & ((int)1 << 0)) != 0;
-
-      caps.ssse3 = (result[2] & ((int)1 << 9)) != 0;
-      caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
-      caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
-
-      caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
-      caps.avx = false;
-      bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
-      bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
-
-      if (os_uses_xsave_xrestore && cpu_avx_support) {
-        // Check if the OS will save the YMM registers
-        uint32_t xcr_feature_mask;
-#  if defined(__GNUC__)
-        int edx; /* not used */
-        /* actual opcode for xgetbv */
-        __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0));
-#  elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
-        /* Minimum VS2010 SP1 compiler is required. */
-        xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#  else
-        xcr_feature_mask = 0;
-#  endif
-        caps.avx = (xcr_feature_mask & 0x6) == 0x6;
-      }
-
-      caps.f16c = (result[2] & ((int)1 << 29)) != 0;
-
-      __cpuid(result, 0x00000007);
-      caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
-      caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
-      caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
-    }
-
-    caps_init = true;
-  }
-
-  return caps;
-}
-
-bool system_cpu_support_sse2()
-{
-  CPUCapabilities &caps = system_cpu_capabilities();
-  return caps.sse && caps.sse2;
-}
-
-bool system_cpu_support_sse3()
-{
-  CPUCapabilities &caps = system_cpu_capabilities();
-  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3;
-}
-
-bool system_cpu_support_sse41()
-{
-  CPUCapabilities &caps = system_cpu_capabilities();
-  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41;
-}
-
-bool system_cpu_support_avx()
-{
-  CPUCapabilities &caps = system_cpu_capabilities();
-  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx;
-}
-
-bool system_cpu_support_avx2()
-{
-  CPUCapabilities &caps = system_cpu_capabilities();
-  return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c &&
-         caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
-}
-#else
-
-bool system_cpu_support_sse2()
-{
-  return false;
-}
-
-bool system_cpu_support_sse3()
-{
-  return false;
-}
-
-bool system_cpu_support_sse41()
-{
-  return false;
-}
-
-bool system_cpu_support_avx()
-{
-  return false;
-}
-bool system_cpu_support_avx2()
-{
-  return false;
-}
-
-#endif
-
-bool system_call_self(const vector<string> &args)
-{
-  /* Escape program and arguments in case they contain spaces. */
-  string cmd = "\"" + Sysutil::this_program_path() + "\"";
-
-  for (int i = 0; i < args.size(); i++) {
-    cmd += " \"" + args[i] + "\"";
-  }
-
-#ifdef _WIN32
-  /* Use cmd /S to avoid issues with spaces in arguments. */
-  cmd = "cmd /S /C \"" + cmd + " > nul \"";
-#else
-  /* Quiet output. */
-  cmd += " > /dev/null";
-#endif
-
-  return (system(cmd.c_str()) == 0);
-}
-
-size_t system_physical_ram()
-{
-#ifdef _WIN32
-  MEMORYSTATUSEX ram;
-  ram.dwLength = sizeof(ram);
-  GlobalMemoryStatusEx(&ram);
-  return ram.ullTotalPhys;
-#elif defined(__APPLE__)
-  uint64_t ram = 0;
-  size_t len = sizeof(ram);
-  if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) {
-    return ram;
-  }
-  return 0;
-#else
-  size_t ps = sysconf(_SC_PAGESIZE);
-  size_t pn = sysconf(_SC_PHYS_PAGES);
-  return ps * pn;
-#endif
-}
-
-uint64_t system_self_process_id()
-{
-#ifdef _WIN32
-  return GetCurrentProcessId();
-#else
-  return getpid();
-#endif
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
deleted file mode 100644
index a1797e6ca44..00000000000
--- a/intern/cycles/util/util_system.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_SYSTEM_H__
-#define __UTIL_SYSTEM_H__
-
-#include "util/util_string.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Make sure CPU groups / NUMA API is initialized. */
-bool system_cpu_ensure_initialized();
-
-/* Get total number of threads in all NUMA nodes / CPU groups. */
-int system_cpu_thread_count();
-
-/* Get width in characters of the current console output. */
-int system_console_width();
-
-/* Get number of available nodes.
- *
- * This is in fact an index of last node plus one and it's not guaranteed
- * that all nodes up to this one are available. */
-int system_cpu_num_numa_nodes();
-
-/* Returns truth if the given node is available for compute. */
-bool system_cpu_is_numa_node_available(int node);
-
-/* Get number of available processors on a given node. */
-int system_cpu_num_numa_node_processors(int node);
-
-/* Runs the current thread and its children on a specific node.
- *
- * Returns truth if affinity has successfully changed. */
-bool system_cpu_run_thread_on_node(int node);
-
-/* Number of processors within the current CPU group (or within active thread
- * thread affinity). */
-int system_cpu_num_active_group_processors();
-
-string system_cpu_brand_string();
-int system_cpu_bits();
-bool system_cpu_support_sse2();
-bool system_cpu_support_sse3();
-bool system_cpu_support_sse41();
-bool system_cpu_support_avx();
-bool system_cpu_support_avx2();
-
-size_t system_physical_ram();
-
-/* Start a new process of the current application with the given arguments. */
-bool system_call_self(const vector<string> &args);
-
-/* Get identifier of the currently running process. */
-uint64_t system_self_process_id();
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_SYSTEM_H__ */
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
deleted file mode 100644
index 949ba0a7b4d..00000000000
--- a/intern/cycles/util/util_task.cpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_task.h"
-#include "util/util_foreach.h"
-#include "util/util_logging.h"
-#include "util/util_system.h"
-#include "util/util_time.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Task Pool */
-
-TaskPool::TaskPool() : start_time(time_dt()), num_tasks_pushed(0)
-{
-}
-
-TaskPool::~TaskPool()
-{
-  cancel();
-}
-
-void TaskPool::push(TaskRunFunction &&task)
-{
-  tbb_group.run(std::move(task));
-  num_tasks_pushed++;
-}
-
-void TaskPool::wait_work(Summary *stats)
-{
-  tbb_group.wait();
-
-  if (stats != NULL) {
-    stats->time_total = time_dt() - start_time;
-    stats->num_tasks_handled = num_tasks_pushed;
-  }
-
-  num_tasks_pushed = 0;
-}
-
-void TaskPool::cancel()
-{
-  if (num_tasks_pushed > 0) {
-    tbb_group.cancel();
-    tbb_group.wait();
-    num_tasks_pushed = 0;
-  }
-}
-
-bool TaskPool::canceled()
-{
-  return tbb::is_current_task_group_canceling();
-}
-
-/* Task Scheduler */
-
-thread_mutex TaskScheduler::mutex;
-int TaskScheduler::users = 0;
-int TaskScheduler::active_num_threads = 0;
-tbb::global_control *TaskScheduler::global_control = nullptr;
-
-void TaskScheduler::init(int num_threads)
-{
-  thread_scoped_lock lock(mutex);
-  /* Multiple cycles instances can use this task scheduler, sharing the same
-   * threads, so we keep track of the number of users. */
-  ++users;
-  if (users != 1) {
-    return;
-  }
-  if (num_threads > 0) {
-    /* Automatic number of threads. */
-    VLOG(1) << "Overriding number of TBB threads to " << num_threads << ".";
-    global_control = new tbb::global_control(tbb::global_control::max_allowed_parallelism,
-                                             num_threads);
-    active_num_threads = num_threads;
-  }
-  else {
-    active_num_threads = system_cpu_thread_count();
-  }
-}
-
-void TaskScheduler::exit()
-{
-  thread_scoped_lock lock(mutex);
-  users--;
-  if (users == 0) {
-    delete global_control;
-    global_control = nullptr;
-    active_num_threads = 0;
-  }
-}
-
-void TaskScheduler::free_memory()
-{
-  assert(users == 0);
-}
-
-int TaskScheduler::num_threads()
-{
-  return active_num_threads;
-}
-
-/* Dedicated Task Pool */
-
-DedicatedTaskPool::DedicatedTaskPool()
-{
-  do_cancel = false;
-  do_exit = false;
-  num = 0;
-
-  worker_thread = new thread(function_bind(&DedicatedTaskPool::thread_run, this));
-}
-
-DedicatedTaskPool::~DedicatedTaskPool()
-{
-  wait();
-
-  do_exit = true;
-  queue_cond.notify_all();
-
-  worker_thread->join();
-  delete worker_thread;
-}
-
-void DedicatedTaskPool::push(TaskRunFunction &&task, bool front)
-{
-  num_increase();
-
-  /* add task to queue */
-  queue_mutex.lock();
-  if (front)
-    queue.emplace_front(std::move(task));
-  else
-    queue.emplace_back(std::move(task));
-
-  queue_cond.notify_one();
-  queue_mutex.unlock();
-}
-
-void DedicatedTaskPool::wait()
-{
-  thread_scoped_lock num_lock(num_mutex);
-
-  while (num)
-    num_cond.wait(num_lock);
-}
-
-void DedicatedTaskPool::cancel()
-{
-  do_cancel = true;
-
-  clear();
-  wait();
-
-  do_cancel = false;
-}
-
-bool DedicatedTaskPool::canceled()
-{
-  return do_cancel;
-}
-
-void DedicatedTaskPool::num_decrease(int done)
-{
-  thread_scoped_lock num_lock(num_mutex);
-  num -= done;
-
-  assert(num >= 0);
-  if (num == 0)
-    num_cond.notify_all();
-}
-
-void DedicatedTaskPool::num_increase()
-{
-  thread_scoped_lock num_lock(num_mutex);
-  num++;
-  num_cond.notify_all();
-}
-
-bool DedicatedTaskPool::thread_wait_pop(TaskRunFunction &task)
-{
-  thread_scoped_lock queue_lock(queue_mutex);
-
-  while (queue.empty() && !do_exit)
-    queue_cond.wait(queue_lock);
-
-  if (queue.empty()) {
-    assert(do_exit);
-    return false;
-  }
-
-  task = queue.front();
-  queue.pop_front();
-
-  return true;
-}
-
-void DedicatedTaskPool::thread_run()
-{
-  TaskRunFunction task;
-
-  /* keep popping off tasks */
-  while (thread_wait_pop(task)) {
-    /* run task */
-    task();
-
-    /* delete task */
-    task = nullptr;
-
-    /* notify task was done */
-    num_decrease(1);
-  }
-}
-
-void DedicatedTaskPool::clear()
-{
-  thread_scoped_lock queue_lock(queue_mutex);
-
-  /* erase all tasks from the queue */
-  int done = queue.size();
-  queue.clear();
-
-  queue_lock.unlock();
-
-  /* notify done */
-  num_decrease(done);
-}
-
-string TaskPool::Summary::full_report() const
-{
-  string report = "";
-  report += string_printf("Total time:    %f\n", time_total);
-  report += string_printf("Tasks handled: %d\n", num_tasks_handled);
-  return report;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h
deleted file mode 100644
index ec45dfa8040..00000000000
--- a/intern/cycles/util/util_task.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TASK_H__
-#define __UTIL_TASK_H__
-
-#include "util/util_list.h"
-#include "util/util_string.h"
-#include "util/util_tbb.h"
-#include "util/util_thread.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class TaskPool;
-class TaskScheduler;
-
-typedef function<void(void)> TaskRunFunction;
-
-/* Task Pool
- *
- * Pool of tasks that will be executed by the central TaskScheduler. For each
- * pool, we can wait for all tasks to be done, or cancel them before they are
- * done.
- *
- * TaskRunFunction may be created with std::bind or lambda expressions. */
-
-class TaskPool {
- public:
-  struct Summary {
-    /* Time spent to handle all tasks. */
-    double time_total;
-
-    /* Number of all tasks handled by this pool. */
-    int num_tasks_handled;
-
-    /* A full multi-line description of the state of the pool after
-     * all work is done.
-     */
-    string full_report() const;
-  };
-
-  TaskPool();
-  ~TaskPool();
-
-  void push(TaskRunFunction &&task);
-
-  void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */
-  void cancel(); /* cancel all tasks and wait until they are no longer executing */
-
-  static bool canceled(); /* For worker threads, test if current task pool canceled. */
-
- protected:
-  tbb::task_group tbb_group;
-
-  /* ** Statistics ** */
-
-  /* Time stamp of first task pushed. */
-  double start_time;
-
-  /* Number of all tasks pushed to the pool. Cleared after wait_work() and cancel(). */
-  int num_tasks_pushed;
-};
-
-/* Task Scheduler
- *
- * Central scheduler that holds running threads ready to execute tasks. A single
- * queue holds the task from all pools. */
-
-class TaskScheduler {
- public:
-  static void init(int num_threads = 0);
-  static void exit();
-  static void free_memory();
-
-  /* Approximate number of threads that will work on task, which may be lower
-   * or higher than the actual number of threads. Use as little as possible and
-   * leave splitting up tasks to the scheduler. */
-  static int num_threads();
-
- protected:
-  static thread_mutex mutex;
-  static int users;
-  static int active_num_threads;
-
-#ifdef WITH_TBB_GLOBAL_CONTROL
-  static tbb::global_control *global_control;
-#endif
-};
-
-/* Dedicated Task Pool
- *
- * Like a TaskPool, but will launch one dedicated thread to execute all tasks.
- *
- * The run callback that actually executes the task may be created like this:
- * function_bind(&MyClass::task_execute, this, _1, _2) */
-
-class DedicatedTaskPool {
- public:
-  DedicatedTaskPool();
-  ~DedicatedTaskPool();
-
-  void push(TaskRunFunction &&run, bool front = false);
-
-  void wait();   /* wait until all tasks are done */
-  void cancel(); /* cancel all tasks, keep worker thread running */
-
-  bool canceled(); /* for worker thread, test if canceled */
-
- protected:
-  void num_decrease(int done);
-  void num_increase();
-
-  void thread_run();
-  bool thread_wait_pop(TaskRunFunction &task);
-
-  void clear();
-
-  thread_mutex num_mutex;
-  thread_condition_variable num_cond;
-
-  list<TaskRunFunction> queue;
-  thread_mutex queue_mutex;
-  thread_condition_variable queue_cond;
-
-  int num;
-  bool do_cancel;
-  bool do_exit;
-
-  thread *worker_thread;
-};
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_tbb.h b/intern/cycles/util/util_tbb.h
deleted file mode 100644
index 8f84377ac8c..00000000000
--- a/intern/cycles/util/util_tbb.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2011-2020 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TBB_H__
-#define __UTIL_TBB_H__
-
-/* TBB includes <windows.h>, do it ourselves first so we are sure
- * WIN32_LEAN_AND_MEAN and similar are defined beforehand. */
-#include "util_windows.h"
-
-#include <tbb/enumerable_thread_specific.h>
-#include <tbb/parallel_for.h>
-#include <tbb/parallel_for_each.h>
-#include <tbb/task_arena.h>
-#include <tbb/task_group.h>
-
-#if TBB_INTERFACE_VERSION_MAJOR >= 10
-#  define WITH_TBB_GLOBAL_CONTROL
-#  include <tbb/global_control.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-using tbb::blocked_range;
-using tbb::enumerable_thread_specific;
-using tbb::parallel_for;
-
-static inline void parallel_for_cancel()
-{
-#if TBB_INTERFACE_VERSION_MAJOR >= 12
-  tbb::task_group_context *ctx = tbb::task::current_context();
-  if (ctx) {
-    ctx->cancel_group_execution();
-  }
-#else
-  tbb::task::self().cancel_group_execution();
-#endif
-}
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TBB_H__ */
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
deleted file mode 100644
index 4de66bf5f46..00000000000
--- a/intern/cycles/util/util_texture.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TEXTURE_H__
-#define __UTIL_TEXTURE_H__
-
-#include "util_transform.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Color to use when textures are not found. */
-#define TEX_IMAGE_MISSING_R 1
-#define TEX_IMAGE_MISSING_G 0
-#define TEX_IMAGE_MISSING_B 1
-#define TEX_IMAGE_MISSING_A 1
-
-/* Interpolation types for textures
- * cuda also use texture space to store other objects */
-typedef enum InterpolationType {
-  INTERPOLATION_NONE = -1,
-  INTERPOLATION_LINEAR = 0,
-  INTERPOLATION_CLOSEST = 1,
-  INTERPOLATION_CUBIC = 2,
-  INTERPOLATION_SMART = 3,
-
-  INTERPOLATION_NUM_TYPES,
-} InterpolationType;
-
-typedef enum ImageDataType {
-  IMAGE_DATA_TYPE_FLOAT4 = 0,
-  IMAGE_DATA_TYPE_BYTE4 = 1,
-  IMAGE_DATA_TYPE_HALF4 = 2,
-  IMAGE_DATA_TYPE_FLOAT = 3,
-  IMAGE_DATA_TYPE_BYTE = 4,
-  IMAGE_DATA_TYPE_HALF = 5,
-  IMAGE_DATA_TYPE_USHORT4 = 6,
-  IMAGE_DATA_TYPE_USHORT = 7,
-  IMAGE_DATA_TYPE_NANOVDB_FLOAT = 8,
-  IMAGE_DATA_TYPE_NANOVDB_FLOAT3 = 9,
-
-  IMAGE_DATA_NUM_TYPES
-} ImageDataType;
-
-/* Alpha types
- * How to treat alpha in images. */
-typedef enum ImageAlphaType {
-  IMAGE_ALPHA_UNASSOCIATED = 0,
-  IMAGE_ALPHA_ASSOCIATED = 1,
-  IMAGE_ALPHA_CHANNEL_PACKED = 2,
-  IMAGE_ALPHA_IGNORE = 3,
-  IMAGE_ALPHA_AUTO = 4,
-
-  IMAGE_ALPHA_NUM_TYPES,
-} ImageAlphaType;
-
-/* Extension types for textures.
- *
- * Defines how the image is extrapolated past its original bounds. */
-typedef enum ExtensionType {
-  /* Cause the image to repeat horizontally and vertically. */
-  EXTENSION_REPEAT = 0,
-  /* Extend by repeating edge pixels of the image. */
-  EXTENSION_EXTEND = 1,
-  /* Clip to image size and set exterior pixels as transparent. */
-  EXTENSION_CLIP = 2,
-
-  EXTENSION_NUM_TYPES,
-} ExtensionType;
-
-typedef struct TextureInfo {
-  /* Pointer, offset or texture depending on device. */
-  uint64_t data;
-  /* Data Type */
-  uint data_type;
-  /* Interpolation and extension type. */
-  uint interpolation, extension;
-  /* Dimensions. */
-  uint width, height, depth;
-  /* Transform for 3D textures. */
-  uint use_transform_3d;
-  Transform transform_3d;
-} TextureInfo;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TEXTURE_H__ */
diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp
deleted file mode 100644
index cccde5ae7d5..00000000000
--- a/intern/cycles/util/util_thread.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_thread.h"
-
-#include "util/util_system.h"
-#include "util/util_windows.h"
-
-CCL_NAMESPACE_BEGIN
-
-thread::thread(function<void()> run_cb, int node) : run_cb_(run_cb), joined_(false), node_(node)
-{
-#ifdef __APPLE__
-  /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is
-   * too small for Embree, and consistent stack size also makes things more
-   * predictable in general. */
-  pthread_attr_t attribute;
-  pthread_attr_init(&attribute);
-  pthread_attr_setstacksize(&attribute, 1024 * 1024 * 2);
-  pthread_create(&pthread_id, &attribute, run, (void *)this);
-#else
-  std_thread = std::thread(&thread::run, this);
-#endif
-}
-
-thread::~thread()
-{
-  if (!joined_) {
-    join();
-  }
-}
-
-void *thread::run(void *arg)
-{
-  thread *self = (thread *)(arg);
-  if (self->node_ != -1) {
-    system_cpu_run_thread_on_node(self->node_);
-  }
-  self->run_cb_();
-  return NULL;
-}
-
-bool thread::join()
-{
-  joined_ = true;
-#ifdef __APPLE__
-  return pthread_join(pthread_id, NULL) == 0;
-#else
-  try {
-    std_thread.join();
-    return true;
-  }
-  catch (const std::system_error &) {
-    return false;
-  }
-#endif
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
deleted file mode 100644
index 29f9becbefe..00000000000
--- a/intern/cycles/util/util_thread.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_THREAD_H__
-#define __UTIL_THREAD_H__
-
-#include <condition_variable>
-#include <functional>
-#include <mutex>
-#include <queue>
-#include <thread>
-
-#ifdef _WIN32
-#  include "util_windows.h"
-#else
-#  include <pthread.h>
-#endif
-
-/* NOTE: Use tbb/spin_mutex.h instead of util_tbb.h because some of the TBB
- * functionality requires RTTI, which is disabled for OSL kernel. */
-#include <tbb/spin_mutex.h>
-
-#include "util/util_function.h"
-
-CCL_NAMESPACE_BEGIN
-
-typedef std::mutex thread_mutex;
-typedef std::unique_lock<std::mutex> thread_scoped_lock;
-typedef std::condition_variable thread_condition_variable;
-
-/* Own thread implementation similar to std::thread, so we can set a
- * custom stack size on macOS. */
-
-class thread {
- public:
-  /* NOTE: Node index of -1 means that affinity will be inherited from the
-   * parent thread and no override on top of that will happen. */
-  thread(function<void()> run_cb, int node = -1);
-  ~thread();
-
-  static void *run(void *arg);
-  bool join();
-
- protected:
-  function<void()> run_cb_;
-#ifdef __APPLE__
-  pthread_t pthread_id;
-#else
-  std::thread std_thread;
-#endif
-  bool joined_;
-  int node_;
-};
-
-using thread_spin_lock = tbb::spin_mutex;
-
-class thread_scoped_spin_lock {
- public:
-  explicit thread_scoped_spin_lock(thread_spin_lock &lock) : lock_(lock)
-  {
-    lock_.lock();
-  }
-
-  ~thread_scoped_spin_lock()
-  {
-    lock_.unlock();
-  }
-
-  /* TODO(sergey): Implement manual control over lock/unlock. */
-
- protected:
-  thread_spin_lock &lock_;
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_THREAD_H__ */
diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp
deleted file mode 100644
index 1641395d07e..00000000000
--- a/intern/cycles/util/util_time.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_time.h"
-
-#include <stdlib.h>
-
-#if !defined(_WIN32)
-#  include <sys/time.h>
-#  include <unistd.h>
-#endif
-
-#include "util/util_math.h"
-#include "util/util_string.h"
-#include "util/util_windows.h"
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef _WIN32
-double time_dt()
-{
-  __int64 frequency, counter;
-
-  QueryPerformanceFrequency((LARGE_INTEGER *)&frequency);
-  QueryPerformanceCounter((LARGE_INTEGER *)&counter);
-
-  return (double)counter / (double)frequency;
-}
-
-void time_sleep(double t)
-{
-  Sleep((int)(t * 1000));
-}
-#else
-double time_dt()
-{
-  struct timeval now;
-  gettimeofday(&now, NULL);
-
-  return now.tv_sec + now.tv_usec * 1e-6;
-}
-
-/* sleep t seconds */
-void time_sleep(double t)
-{
-  /* get whole seconds */
-  int s = (int)t;
-
-  if (s >= 1) {
-    sleep(s);
-
-    /* adjust parameter to remove whole seconds */
-    t -= s;
-  }
-
-  /* get microseconds */
-  int us = (int)(t * 1e6);
-  if (us > 0)
-    usleep(us);
-}
-#endif
-
-/* Time in format "hours:minutes:seconds.hundreds" */
-
-string time_human_readable_from_seconds(const double seconds)
-{
-  const int h = (((int)seconds) / (60 * 60));
-  const int m = (((int)seconds) / 60) % 60;
-  const int s = (((int)seconds) % 60);
-  const int r = (((int)(seconds * 100)) % 100);
-
-  if (h > 0) {
-    return string_printf("%.2d:%.2d:%.2d.%.2d", h, m, s, r);
-  }
-  else {
-    return string_printf("%.2d:%.2d.%.2d", m, s, r);
-  }
-}
-
-double time_human_readable_to_seconds(const string &time_string)
-{
-  /* Those are multiplies of a corresponding token surrounded by : in the
-   * time string, which denotes how to convert value to seconds.
-   * Effectively: seconds, minutes, hours, days in seconds. */
-  const int multipliers[] = {1, 60, 60 * 60, 24 * 60 * 60};
-  const int num_multiplies = sizeof(multipliers) / sizeof(*multipliers);
-  if (time_string.empty()) {
-    return 0.0;
-  }
-  double result = 0.0;
-  /* Split fractions of a second from the encoded time. */
-  vector<string> fraction_tokens;
-  string_split(fraction_tokens, time_string, ".", false);
-  const int num_fraction_tokens = fraction_tokens.size();
-  if (num_fraction_tokens == 0) {
-    /* Time string is malformed. */
-    return 0.0;
-  }
-  else if (fraction_tokens.size() == 1) {
-    /* There is no fraction of a second specified, the rest of the code
-     * handles this normally. */
-  }
-  else if (fraction_tokens.size() == 2) {
-    result = atof(fraction_tokens[1].c_str());
-    result *= pow(0.1, fraction_tokens[1].length());
-  }
-  else {
-    /* This is not a valid string, the result can not be reliable. */
-    return 0.0;
-  }
-  /* Split hours, minutes and seconds.
-   * Hours part is optional. */
-  vector<string> tokens;
-  string_split(tokens, fraction_tokens[0], ":", false);
-  const int num_tokens = tokens.size();
-  if (num_tokens > num_multiplies) {
-    /* Can not reliably represent the value. */
-    return 0.0;
-  }
-  for (int i = 0; i < num_tokens; ++i) {
-    result += atoi(tokens[num_tokens - i - 1].c_str()) * multipliers[i];
-  }
-  return result;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h
deleted file mode 100644
index a82d400a0d7..00000000000
--- a/intern/cycles/util/util_time.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TIME_H__
-#define __UTIL_TIME_H__
-
-#include "util/util_function.h"
-#include "util/util_string.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Give current time in seconds in double precision, with good accuracy. */
-
-double time_dt();
-
-/* Sleep for the specified number of seconds. */
-
-void time_sleep(double t);
-
-/* Scoped timer. */
-
-class scoped_timer {
- public:
-  explicit scoped_timer(double *value = NULL) : value_(value)
-  {
-    time_start_ = time_dt();
-  }
-
-  ~scoped_timer()
-  {
-    if (value_ != NULL) {
-      *value_ = get_time();
-    }
-  }
-
-  double get_start() const
-  {
-    return time_start_;
-  }
-
-  double get_time() const
-  {
-    return time_dt() - time_start_;
-  }
-
- protected:
-  double *value_;
-  double time_start_;
-};
-
-class scoped_callback_timer {
- public:
-  using callback_type = function<void(double)>;
-
-  explicit scoped_callback_timer(callback_type cb) : cb(cb)
-  {
-  }
-
-  ~scoped_callback_timer()
-  {
-    if (cb) {
-      cb(timer.get_time());
-    }
-  }
-
- protected:
-  scoped_timer timer;
-  callback_type cb;
-};
-
-/* Make human readable string from time, compatible with Blender metadata. */
-
-string time_human_readable_from_seconds(const double seconds);
-double time_human_readable_to_seconds(const string &str);
-
-CCL_NAMESPACE_END
-
-#endif
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
deleted file mode 100644
index e8233b7fe6d..00000000000
--- a/intern/cycles/util/util_transform.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Adapted from code with license:
- *
- * Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
- * Digital Ltd. LLC. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Industrial Light & Magic nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "util/util_transform.h"
-#include "util/util_projection.h"
-
-#include "util/util_boundbox.h"
-#include "util/util_math.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Transform Inverse */
-
-static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
-{
-  /* forward elimination */
-  for (int i = 0; i < 4; i++) {
-    int pivot = i;
-    float pivotsize = M[i][i];
-
-    if (pivotsize < 0)
-      pivotsize = -pivotsize;
-
-    for (int j = i + 1; j < 4; j++) {
-      float tmp = M[j][i];
-
-      if (tmp < 0)
-        tmp = -tmp;
-
-      if (tmp > pivotsize) {
-        pivot = j;
-        pivotsize = tmp;
-      }
-    }
-
-    if (UNLIKELY(pivotsize == 0.0f))
-      return false;
-
-    if (pivot != i) {
-      for (int j = 0; j < 4; j++) {
-        float tmp;
-
-        tmp = M[i][j];
-        M[i][j] = M[pivot][j];
-        M[pivot][j] = tmp;
-
-        tmp = R[i][j];
-        R[i][j] = R[pivot][j];
-        R[pivot][j] = tmp;
-      }
-    }
-
-    for (int j = i + 1; j < 4; j++) {
-      float f = M[j][i] / M[i][i];
-
-      for (int k = 0; k < 4; k++) {
-        M[j][k] -= f * M[i][k];
-        R[j][k] -= f * R[i][k];
-      }
-    }
-  }
-
-  /* backward substitution */
-  for (int i = 3; i >= 0; --i) {
-    float f;
-
-    if (UNLIKELY((f = M[i][i]) == 0.0f))
-      return false;
-
-    for (int j = 0; j < 4; j++) {
-      M[i][j] /= f;
-      R[i][j] /= f;
-    }
-
-    for (int j = 0; j < i; j++) {
-      f = M[j][i];
-
-      for (int k = 0; k < 4; k++) {
-        M[j][k] -= f * M[i][k];
-        R[j][k] -= f * R[i][k];
-      }
-    }
-  }
-
-  return true;
-}
-
-ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
-{
-  ProjectionTransform tfmR = projection_identity();
-  float M[4][4], R[4][4];
-
-  memcpy(R, &tfmR, sizeof(R));
-  memcpy(M, &tfm, sizeof(M));
-
-  if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
-    /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
-     * never be in this situation, but try to invert it anyway with tweak */
-    M[0][0] += 1e-8f;
-    M[1][1] += 1e-8f;
-    M[2][2] += 1e-8f;
-
-    if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
-      return projection_identity();
-    }
-  }
-
-  memcpy(&tfmR, R, sizeof(R));
-
-  return tfmR;
-}
-
-Transform transform_inverse(const Transform &tfm)
-{
-  ProjectionTransform projection(tfm);
-  return projection_to_transform(projection_inverse(projection));
-}
-
-Transform transform_transposed_inverse(const Transform &tfm)
-{
-  ProjectionTransform projection(tfm);
-  ProjectionTransform iprojection = projection_inverse(projection);
-  return projection_to_transform(projection_transpose(iprojection));
-}
-
-/* Motion Transform */
-
-float4 transform_to_quat(const Transform &tfm)
-{
-  double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]);
-  float4 qt;
-
-  if (trace > 0.0) {
-    double s = sqrt(trace + 1.0);
-
-    qt.w = (float)(s / 2.0);
-    s = 0.5 / s;
-
-    qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s);
-    qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s);
-    qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s);
-  }
-  else {
-    int i = 0;
-
-    if (tfm[1][1] > tfm[i][i])
-      i = 1;
-    if (tfm[2][2] > tfm[i][i])
-      i = 2;
-
-    int j = (i + 1) % 3;
-    int k = (j + 1) % 3;
-
-    double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0);
-
-    double q[3];
-    q[i] = s * 0.5;
-    if (s != 0.0)
-      s = 0.5 / s;
-
-    double w = (double)(tfm[k][j] - tfm[j][k]) * s;
-    q[j] = (double)(tfm[j][i] + tfm[i][j]) * s;
-    q[k] = (double)(tfm[k][i] + tfm[i][k]) * s;
-
-    qt.x = (float)q[0];
-    qt.y = (float)q[1];
-    qt.z = (float)q[2];
-    qt.w = (float)w;
-  }
-
-  return qt;
-}
-
-static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm)
-{
-  /* extract translation */
-  decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f);
-
-  /* extract rotation */
-  Transform M = *tfm;
-  M.x.w = 0.0f;
-  M.y.w = 0.0f;
-  M.z.w = 0.0f;
-
-#if 0
-  Transform R = M;
-  float norm;
-  int iteration = 0;
-
-  do {
-    Transform Rnext;
-    Transform Rit = transform_transposed_inverse(R);
-
-    for (int i = 0; i < 3; i++)
-      for (int j = 0; j < 4; j++)
-        Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
-
-    norm = 0.0f;
-    for (int i = 0; i < 3; i++) {
-      norm = max(norm,
-                 fabsf(R[i][0] - Rnext[i][0]) + fabsf(R[i][1] - Rnext[i][1]) +
-                     fabsf(R[i][2] - Rnext[i][2]));
-    }
-
-    R = Rnext;
-    iteration++;
-  } while (iteration < 100 && norm > 1e-4f);
-
-  if (transform_negative_scale(R))
-    R = R * transform_scale(-1.0f, -1.0f, -1.0f);
-
-  decomp->x = transform_to_quat(R);
-
-  /* extract scale and pack it */
-  Transform scale = transform_inverse(R) * M;
-  decomp->y.w = scale.x.x;
-  decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y);
-  decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
-#else
-  float3 colx = transform_get_column(&M, 0);
-  float3 coly = transform_get_column(&M, 1);
-  float3 colz = transform_get_column(&M, 2);
-
-  /* extract scale and shear first */
-  float3 scale, shear;
-  scale.x = len(colx);
-  colx = safe_divide_float3_float(colx, scale.x);
-  shear.z = dot(colx, coly);
-  coly -= shear.z * colx;
-  scale.y = len(coly);
-  coly = safe_divide_float3_float(coly, scale.y);
-  shear.y = dot(colx, colz);
-  colz -= shear.y * colx;
-  shear.x = dot(coly, colz);
-  colz -= shear.x * coly;
-  scale.z = len(colz);
-  colz = safe_divide_float3_float(colz, scale.z);
-
-  transform_set_column(&M, 0, colx);
-  transform_set_column(&M, 1, coly);
-  transform_set_column(&M, 2, colz);
-
-  if (transform_negative_scale(M)) {
-    scale *= -1.0f;
-    M = M * transform_scale(-1.0f, -1.0f, -1.0f);
-  }
-
-  decomp->x = transform_to_quat(M);
-
-  decomp->y.w = scale.x;
-  decomp->z = make_float4(shear.z, shear.y, 0.0f, scale.y);
-  decomp->w = make_float4(shear.x, 0.0f, 0.0f, scale.z);
-#endif
-}
-
-void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size)
-{
-  /* Decompose and correct rotation. */
-  for (size_t i = 0; i < size; i++) {
-    transform_decompose(decomp + i, motion + i);
-
-    if (i > 0) {
-      /* Ensure rotation around shortest angle, negated quaternions are the same
-       * but this means we don't have to do the check in quat_interpolate */
-      if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f)
-        decomp[i].x = -decomp[i].x;
-    }
-  }
-
-  /* Copy rotation to decomposed transform where scale is degenerate. This avoids weird object
-   * rotation interpolation when the scale goes to 0 for a time step.
-   *
-   * Note that this is very simple and naive implementation, which only deals with degenerated
-   * scale happening only on one frame. It is possible to improve it further by interpolating
-   * rotation into s degenerated range using rotation from time-steps from adjacent non-degenerated
-   * time steps. */
-  for (size_t i = 0; i < size; i++) {
-    const float3 scale = make_float3(decomp[i].y.w, decomp[i].z.w, decomp[i].w.w);
-    if (!is_zero(scale)) {
-      continue;
-    }
-
-    if (i > 0) {
-      decomp[i].x = decomp[i - 1].x;
-    }
-    else if (i < size - 1) {
-      decomp[i].x = decomp[i + 1].x;
-    }
-  }
-}
-
-Transform transform_from_viewplane(BoundBox2D &viewplane)
-{
-  return transform_scale(1.0f / (viewplane.right - viewplane.left),
-                         1.0f / (viewplane.top - viewplane.bottom),
-                         1.0f) *
-         transform_translate(-viewplane.left, -viewplane.bottom, 0.0f);
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
deleted file mode 100644
index fc04f9aab46..00000000000
--- a/intern/cycles/util/util_transform.h
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TRANSFORM_H__
-#define __UTIL_TRANSFORM_H__
-
-#ifndef __KERNEL_GPU__
-#  include <string.h>
-#endif
-
-#include "util/util_math.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Affine transformation, stored as 4x3 matrix. */
-
-typedef struct Transform {
-  float4 x, y, z;
-
-#ifndef __KERNEL_GPU__
-  float4 operator[](int i) const
-  {
-    return *(&x + i);
-  }
-  float4 &operator[](int i)
-  {
-    return *(&x + i);
-  }
-#endif
-} Transform;
-
-/* Transform decomposed in rotation/translation/scale. we use the same data
- * structure as Transform, and tightly pack decomposition into it. first the
- * rotation (4), then translation (3), then 3x3 scale matrix (9). */
-
-typedef struct DecomposedTransform {
-  float4 x, y, z, w;
-} DecomposedTransform;
-
-/* Functions */
-
-ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a)
-{
-  /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
-  ssef x, y, z, w, aa;
-  aa = a.m128;
-
-  x = _mm_loadu_ps(&t->x.x);
-  y = _mm_loadu_ps(&t->y.x);
-  z = _mm_loadu_ps(&t->z.x);
-  w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
-
-  _MM_TRANSPOSE4_PS(x, y, z, w);
-
-  ssef tmp = shuffle<0>(aa) * x;
-  tmp = madd(shuffle<1>(aa), y, tmp);
-  tmp = madd(shuffle<2>(aa), z, tmp);
-  tmp += w;
-
-  return float3(tmp.m128);
-#else
-  float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w,
-                         a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
-                         a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
-
-  return c;
-#endif
-}
-
-ccl_device_inline float3 transform_direction(ccl_private const Transform *t, const float3 a)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
-  ssef x, y, z, w, aa;
-  aa = a.m128;
-  x = _mm_loadu_ps(&t->x.x);
-  y = _mm_loadu_ps(&t->y.x);
-  z = _mm_loadu_ps(&t->z.x);
-  w = _mm_setzero_ps();
-
-  _MM_TRANSPOSE4_PS(x, y, z, w);
-
-  ssef tmp = shuffle<0>(aa) * x;
-  tmp = madd(shuffle<1>(aa), y, tmp);
-  tmp = madd(shuffle<2>(aa), z, tmp);
-
-  return float3(tmp.m128);
-#else
-  float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
-                         a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
-                         a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
-
-  return c;
-#endif
-}
-
-ccl_device_inline float3 transform_direction_transposed(ccl_private const Transform *t,
-                                                        const float3 a)
-{
-  float3 x = make_float3(t->x.x, t->y.x, t->z.x);
-  float3 y = make_float3(t->x.y, t->y.y, t->z.y);
-  float3 z = make_float3(t->x.z, t->y.z, t->z.z);
-
-  return make_float3(dot(x, a), dot(y, a), dot(z, a));
-}
-
-ccl_device_inline Transform make_transform(float a,
-                                           float b,
-                                           float c,
-                                           float d,
-                                           float e,
-                                           float f,
-                                           float g,
-                                           float h,
-                                           float i,
-                                           float j,
-                                           float k,
-                                           float l)
-{
-  Transform t;
-
-  t.x.x = a;
-  t.x.y = b;
-  t.x.z = c;
-  t.x.w = d;
-  t.y.x = e;
-  t.y.y = f;
-  t.y.z = g;
-  t.y.w = h;
-  t.z.x = i;
-  t.z.y = j;
-  t.z.z = k;
-  t.z.w = l;
-
-  return t;
-}
-
-ccl_device_inline Transform euler_to_transform(const float3 euler)
-{
-  float cx = cosf(euler.x);
-  float cy = cosf(euler.y);
-  float cz = cosf(euler.z);
-  float sx = sinf(euler.x);
-  float sy = sinf(euler.y);
-  float sz = sinf(euler.z);
-
-  Transform t;
-  t.x.x = cy * cz;
-  t.y.x = cy * sz;
-  t.z.x = -sy;
-
-  t.x.y = sy * sx * cz - cx * sz;
-  t.y.y = sy * sx * sz + cx * cz;
-  t.z.y = cy * sx;
-
-  t.x.z = sy * cx * cz + sx * sz;
-  t.y.z = sy * cx * sz - sx * cz;
-  t.z.z = cy * cx;
-
-  t.x.w = t.y.w = t.z.w = 0.0f;
-  return t;
-}
-
-/* Constructs a coordinate frame from a normalized normal. */
-ccl_device_inline Transform make_transform_frame(float3 N)
-{
-  const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
-  const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
-  const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1);
-  const float3 dy = normalize(cross(N, dx));
-  return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f);
-}
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline Transform transform_zero()
-{
-  Transform zero = {zero_float4(), zero_float4(), zero_float4()};
-  return zero;
-}
-
-ccl_device_inline Transform operator*(const Transform a, const Transform b)
-{
-  float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
-  float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
-  float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
-  float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
-
-  Transform t;
-  t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
-  t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
-  t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
-
-  return t;
-}
-
-ccl_device_inline void print_transform(const char *label, const Transform &t)
-{
-  print_float4(label, t.x);
-  print_float4(label, t.y);
-  print_float4(label, t.z);
-  printf("\n");
-}
-
-ccl_device_inline Transform transform_translate(float3 t)
-{
-  return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z);
-}
-
-ccl_device_inline Transform transform_translate(float x, float y, float z)
-{
-  return transform_translate(make_float3(x, y, z));
-}
-
-ccl_device_inline Transform transform_scale(float3 s)
-{
-  return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
-}
-
-ccl_device_inline Transform transform_scale(float x, float y, float z)
-{
-  return transform_scale(make_float3(x, y, z));
-}
-
-ccl_device_inline Transform transform_rotate(float angle, float3 axis)
-{
-  float s = sinf(angle);
-  float c = cosf(angle);
-  float t = 1.0f - c;
-
-  axis = normalize(axis);
-
-  return make_transform(axis.x * axis.x * t + c,
-                        axis.x * axis.y * t - s * axis.z,
-                        axis.x * axis.z * t + s * axis.y,
-                        0.0f,
-
-                        axis.y * axis.x * t + s * axis.z,
-                        axis.y * axis.y * t + c,
-                        axis.y * axis.z * t - s * axis.x,
-                        0.0f,
-
-                        axis.z * axis.x * t - s * axis.y,
-                        axis.z * axis.y * t + s * axis.x,
-                        axis.z * axis.z * t + c,
-                        0.0f);
-}
-
-/* Euler is assumed to be in XYZ order. */
-ccl_device_inline Transform transform_euler(float3 euler)
-{
-  return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
-         transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
-         transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
-}
-
-ccl_device_inline Transform transform_identity()
-{
-  return transform_scale(1.0f, 1.0f, 1.0f);
-}
-
-ccl_device_inline bool operator==(const Transform &A, const Transform &B)
-{
-  return memcmp(&A, &B, sizeof(Transform)) == 0;
-}
-
-ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
-{
-  return !(A == B);
-}
-
-ccl_device_inline float3 transform_get_column(const Transform *t, int column)
-{
-  return make_float3(t->x[column], t->y[column], t->z[column]);
-}
-
-ccl_device_inline void transform_set_column(Transform *t, int column, float3 value)
-{
-  t->x[column] = value.x;
-  t->y[column] = value.y;
-  t->z[column] = value.z;
-}
-
-Transform transform_inverse(const Transform &a);
-Transform transform_transposed_inverse(const Transform &a);
-
-ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
-{
-  /* the epsilon here is quite arbitrary, but this function is only used for
-   * surface area and bump, where we expect it to not be so sensitive */
-  float eps = 1e-6f;
-
-  float sx = len_squared(float4_to_float3(tfm.x));
-  float sy = len_squared(float4_to_float3(tfm.y));
-  float sz = len_squared(float4_to_float3(tfm.z));
-  float stx = len_squared(transform_get_column(&tfm, 0));
-  float sty = len_squared(transform_get_column(&tfm, 1));
-  float stz = len_squared(transform_get_column(&tfm, 2));
-
-  if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps &&
-      fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) {
-    scale = sx;
-    return true;
-  }
-
-  return false;
-}
-
-ccl_device_inline bool transform_negative_scale(const Transform &tfm)
-{
-  float3 c0 = transform_get_column(&tfm, 0);
-  float3 c1 = transform_get_column(&tfm, 1);
-  float3 c2 = transform_get_column(&tfm, 2);
-
-  return (dot(cross(c0, c1), c2) < 0.0f);
-}
-
-ccl_device_inline Transform transform_clear_scale(const Transform &tfm)
-{
-  Transform ntfm = tfm;
-
-  transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0)));
-  transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1)));
-  transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2)));
-
-  return ntfm;
-}
-
-ccl_device_inline Transform transform_empty()
-{
-  return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-}
-
-#endif
-
-/* Motion Transform */
-
-ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
-{
-  /* Optix is using lerp to interpolate motion transformations. */
-#ifdef __KERNEL_OPTIX__
-  return normalize((1.0f - t) * q1 + t * q2);
-#else  /* __KERNEL_OPTIX__ */
-  /* note: this does not ensure rotation around shortest angle, q1 and q2
-   * are assumed to be matched already in transform_motion_decompose */
-  float costheta = dot(q1, q2);
-
-  /* possible optimization: it might be possible to precompute theta/qperp */
-
-  if (costheta > 0.9995f) {
-    /* linear interpolation in degenerate case */
-    return normalize((1.0f - t) * q1 + t * q2);
-  }
-  else {
-    /* slerp */
-    float theta = acosf(clamp(costheta, -1.0f, 1.0f));
-    float4 qperp = normalize(q2 - q1 * costheta);
-    float thetap = theta * t;
-    return q1 * cosf(thetap) + qperp * sinf(thetap);
-  }
-#endif /* __KERNEL_OPTIX__ */
-}
-
-ccl_device_inline Transform transform_quick_inverse(Transform M)
-{
-  /* possible optimization: can we avoid doing this altogether and construct
-   * the inverse matrix directly from negated translation, transposed rotation,
-   * scale can be inverted but what about shearing? */
-  Transform R;
-  float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
-              M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
-  if (det == 0.0f) {
-    M.x.x += 1e-8f;
-    M.y.y += 1e-8f;
-    M.z.z += 1e-8f;
-    det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
-          M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
-  }
-  det = (det != 0.0f) ? 1.0f / det : 0.0f;
-
-  float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z,
-                                M.z.y * M.x.z - M.z.z * M.x.y,
-                                M.y.z * M.x.y - M.y.y * M.x.z);
-  float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x,
-                                M.z.z * M.x.x - M.z.x * M.x.z,
-                                M.y.x * M.x.z - M.y.z * M.x.x);
-  float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y,
-                                M.z.x * M.x.y - M.z.y * M.x.x,
-                                M.y.y * M.x.x - M.y.x * M.x.y);
-  float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
-
-  R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
-  R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
-  R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
-
-  return R;
-}
-
-ccl_device_inline void transform_compose(ccl_private Transform *tfm,
-                                         ccl_private const DecomposedTransform *decomp)
-{
-  /* rotation */
-  float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
-
-  q0 = M_SQRT2_F * decomp->x.w;
-  q1 = M_SQRT2_F * decomp->x.x;
-  q2 = M_SQRT2_F * decomp->x.y;
-  q3 = M_SQRT2_F * decomp->x.z;
-
-  qda = q0 * q1;
-  qdb = q0 * q2;
-  qdc = q0 * q3;
-  qaa = q1 * q1;
-  qab = q1 * q2;
-  qac = q1 * q3;
-  qbb = q2 * q2;
-  qbc = q2 * q3;
-  qcc = q3 * q3;
-
-  float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac);
-  float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc);
-  float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb);
-
-  /* scale */
-  float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
-  float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
-  float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
-
-  /* compose with translation */
-  tfm->x = make_float4(
-      dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
-  tfm->y = make_float4(
-      dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
-  tfm->z = make_float4(
-      dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
-}
-
-/* Interpolate from array of decomposed transforms. */
-ccl_device void transform_motion_array_interpolate(Transform *tfm,
-                                                   const DecomposedTransform *motion,
-                                                   uint numsteps,
-                                                   float time)
-{
-  /* Figure out which steps we need to interpolate. */
-  int maxstep = numsteps - 1;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  const DecomposedTransform *a = motion + step;
-  const DecomposedTransform *b = motion + step + 1;
-
-  /* Interpolate rotation, translation and scale. */
-  DecomposedTransform decomp;
-  decomp.x = quat_interpolate(a->x, b->x, t);
-  decomp.y = (1.0f - t) * a->y + t * b->y;
-  decomp.z = (1.0f - t) * a->z + t * b->z;
-  decomp.w = (1.0f - t) * a->w + t * b->w;
-
-  /* Compose rotation, translation, scale into matrix. */
-  transform_compose(tfm, &decomp);
-}
-
-ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm)
-{
-  return isfinite4_safe(tfm->x) && isfinite4_safe(tfm->y) && isfinite4_safe(tfm->z);
-}
-
-ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp)
-{
-  return isfinite4_safe(decomp->x) && isfinite4_safe(decomp->y) && isfinite4_safe(decomp->z) &&
-         isfinite4_safe(decomp->w);
-}
-
-#ifndef __KERNEL_GPU__
-
-class BoundBox2D;
-
-ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
-{
-  return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
-}
-
-float4 transform_to_quat(const Transform &tfm);
-void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
-Transform transform_from_viewplane(BoundBox2D &viewplane);
-
-#endif
-
-/* TODO: This can be removed when we know if no devices will require explicit
- * address space qualifiers for this case. */
-
-#define transform_point_auto transform_point
-#define transform_direction_auto transform_direction
-#define transform_direction_transposed_auto transform_direction_transposed
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TRANSFORM_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
deleted file mode 100644
index 442c32b3a3d..00000000000
--- a/intern/cycles/util/util_types.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_H__
-#define __UTIL_TYPES_H__
-
-#include <stdlib.h>
-
-/* Standard Integer Types */
-
-#if !defined(__KERNEL_GPU__)
-#  include <stdint.h>
-#endif
-
-#include "util/util_defines.h"
-
-#ifndef __KERNEL_GPU__
-#  include "util/util_optimization.h"
-#  include "util/util_simd.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Types
- *
- * Define simpler unsigned type names, and integer with defined number of bits.
- * Also vector types, named to be compatible with OpenCL builtin types, while
- * working for CUDA and C++ too. */
-
-/* Shorter Unsigned Names */
-
-typedef unsigned char uchar;
-typedef unsigned int uint;
-typedef unsigned short ushort;
-
-/* Fixed Bits Types */
-
-#ifndef __KERNEL_GPU__
-/* Generic Memory Pointer */
-
-typedef uint64_t device_ptr;
-#endif /* __KERNEL_GPU__ */
-
-ccl_device_inline size_t align_up(size_t offset, size_t alignment)
-{
-  return (offset + alignment - 1) & ~(alignment - 1);
-}
-
-ccl_device_inline size_t divide_up(size_t x, size_t y)
-{
-  return (x + y - 1) / y;
-}
-
-ccl_device_inline size_t round_up(size_t x, size_t multiple)
-{
-  return ((x + multiple - 1) / multiple) * multiple;
-}
-
-ccl_device_inline size_t round_down(size_t x, size_t multiple)
-{
-  return (x / multiple) * multiple;
-}
-
-ccl_device_inline bool is_power_of_two(size_t x)
-{
-  return (x & (x - 1)) == 0;
-}
-
-CCL_NAMESPACE_END
-
-/* Vectorized types declaration. */
-#include "util/util_types_uchar2.h"
-#include "util/util_types_uchar3.h"
-#include "util/util_types_uchar4.h"
-
-#include "util/util_types_int2.h"
-#include "util/util_types_int3.h"
-#include "util/util_types_int4.h"
-
-#include "util/util_types_uint2.h"
-#include "util/util_types_uint3.h"
-#include "util/util_types_uint4.h"
-
-#include "util/util_types_ushort4.h"
-
-#include "util/util_types_float2.h"
-#include "util/util_types_float3.h"
-#include "util/util_types_float4.h"
-#include "util/util_types_float8.h"
-
-#include "util/util_types_vector3.h"
-
-/* Vectorized types implementation. */
-#include "util/util_types_uchar2_impl.h"
-#include "util/util_types_uchar3_impl.h"
-#include "util/util_types_uchar4_impl.h"
-
-#include "util/util_types_int2_impl.h"
-#include "util/util_types_int3_impl.h"
-#include "util/util_types_int4_impl.h"
-
-#include "util/util_types_uint2_impl.h"
-#include "util/util_types_uint3_impl.h"
-#include "util/util_types_uint4_impl.h"
-
-#include "util/util_types_float2_impl.h"
-#include "util/util_types_float3_impl.h"
-#include "util/util_types_float4_impl.h"
-#include "util/util_types_float8_impl.h"
-
-#include "util/util_types_vector3_impl.h"
-
-/* SSE types. */
-#ifndef __KERNEL_GPU__
-#  include "util/util_sseb.h"
-#  include "util/util_ssef.h"
-#  include "util/util_ssei.h"
-#  if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-#    include "util/util_avxb.h"
-#    include "util/util_avxf.h"
-#    include "util/util_avxi.h"
-#  endif
-#endif
-
-#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_types_float2.h b/intern/cycles/util/util_types_float2.h
deleted file mode 100644
index 3760bf579b6..00000000000
--- a/intern/cycles/util/util_types_float2.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_FLOAT2_H__
-#define __UTIL_TYPES_FLOAT2_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct float2 {
-  float x, y;
-
-  __forceinline float operator[](int i) const;
-  __forceinline float &operator[](int i);
-};
-
-ccl_device_inline float2 make_float2(float x, float y);
-ccl_device_inline void print_float2(const char *label, const float2 &a);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_types_float2_impl.h b/intern/cycles/util/util_types_float2_impl.h
deleted file mode 100644
index 7810d2a8781..00000000000
--- a/intern/cycles/util/util_types_float2_impl.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__
-#define __UTIL_TYPES_FLOAT2_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-__forceinline float float2::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-__forceinline float &float2::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-ccl_device_inline float2 make_float2(float x, float y)
-{
-  float2 a = {x, y};
-  return a;
-}
-
-ccl_device_inline void print_float2(const char *label, const float2 &a)
-{
-  printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_float3.h b/intern/cycles/util/util_types_float3.h
deleted file mode 100644
index 694a600bf5c..00000000000
--- a/intern/cycles/util/util_types_float3.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_FLOAT3_H__
-#define __UTIL_TYPES_FLOAT3_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct ccl_try_align(16) float3
-{
-#  ifdef __KERNEL_SSE__
-  union {
-    __m128 m128;
-    struct {
-      float x, y, z, w;
-    };
-  };
-
-  __forceinline float3();
-  __forceinline float3(const float3 &a);
-  __forceinline explicit float3(const __m128 &a);
-
-  __forceinline operator const __m128 &() const;
-  __forceinline operator __m128 &();
-
-  __forceinline float3 &operator=(const float3 &a);
-#  else  /* __KERNEL_SSE__ */
-  float x, y, z, w;
-#  endif /* __KERNEL_SSE__ */
-
-  __forceinline float operator[](int i) const;
-  __forceinline float &operator[](int i);
-};
-
-ccl_device_inline float3 make_float3(float f);
-ccl_device_inline float3 make_float3(float x, float y, float z);
-ccl_device_inline void print_float3(const char *label, const float3 &a);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT3_H__ */
diff --git a/intern/cycles/util/util_types_float3_impl.h b/intern/cycles/util/util_types_float3_impl.h
deleted file mode 100644
index ab25fb4c975..00000000000
--- a/intern/cycles/util/util_types_float3_impl.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__
-#define __UTIL_TYPES_FLOAT3_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-#  ifdef __KERNEL_SSE__
-__forceinline float3::float3()
-{
-}
-
-__forceinline float3::float3(const float3 &a) : m128(a.m128)
-{
-}
-
-__forceinline float3::float3(const __m128 &a) : m128(a)
-{
-}
-
-__forceinline float3::operator const __m128 &() const
-{
-  return m128;
-}
-
-__forceinline float3::operator __m128 &()
-{
-  return m128;
-}
-
-__forceinline float3 &float3::operator=(const float3 &a)
-{
-  m128 = a.m128;
-  return *this;
-}
-#  endif /* __KERNEL_SSE__ */
-
-__forceinline float float3::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-__forceinline float &float3::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-ccl_device_inline float3 make_float3(float f)
-{
-#  ifdef __KERNEL_SSE__
-  float3 a(_mm_set1_ps(f));
-#  else
-  float3 a = {f, f, f, f};
-#  endif
-  return a;
-}
-
-ccl_device_inline float3 make_float3(float x, float y, float z)
-{
-#  ifdef __KERNEL_SSE__
-  float3 a(_mm_set_ps(0.0f, z, y, x));
-#  else
-  float3 a = {x, y, z, 0.0f};
-#  endif
-  return a;
-}
-
-ccl_device_inline void print_float3(const char *label, const float3 &a)
-{
-  printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_float4.h b/intern/cycles/util/util_types_float4.h
deleted file mode 100644
index c29e6e15bc3..00000000000
--- a/intern/cycles/util/util_types_float4.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_FLOAT4_H__
-#define __UTIL_TYPES_FLOAT4_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct int4;
-
-struct ccl_try_align(16) float4
-{
-#  ifdef __KERNEL_SSE__
-  union {
-    __m128 m128;
-    struct {
-      float x, y, z, w;
-    };
-  };
-
-  __forceinline float4();
-  __forceinline explicit float4(const __m128 &a);
-
-  __forceinline operator const __m128 &() const;
-  __forceinline operator __m128 &();
-
-  __forceinline float4 &operator=(const float4 &a);
-
-#  else  /* __KERNEL_SSE__ */
-  float x, y, z, w;
-#  endif /* __KERNEL_SSE__ */
-
-  __forceinline float operator[](int i) const;
-  __forceinline float &operator[](int i);
-};
-
-ccl_device_inline float4 make_float4(float f);
-ccl_device_inline float4 make_float4(float x, float y, float z, float w);
-ccl_device_inline float4 make_float4(const int4 &i);
-ccl_device_inline void print_float4(const char *label, const float4 &a);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_types_float4_impl.h b/intern/cycles/util/util_types_float4_impl.h
deleted file mode 100644
index 05a1feee5b2..00000000000
--- a/intern/cycles/util/util_types_float4_impl.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_FLOAT4_IMPL_H__
-#define __UTIL_TYPES_FLOAT4_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-#  ifdef __KERNEL_SSE__
-__forceinline float4::float4()
-{
-}
-
-__forceinline float4::float4(const __m128 &a) : m128(a)
-{
-}
-
-__forceinline float4::operator const __m128 &() const
-{
-  return m128;
-}
-
-__forceinline float4::operator __m128 &()
-{
-  return m128;
-}
-
-__forceinline float4 &float4::operator=(const float4 &a)
-{
-  m128 = a.m128;
-  return *this;
-}
-#  endif /* __KERNEL_SSE__ */
-
-__forceinline float float4::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 4);
-  return *(&x + i);
-}
-
-__forceinline float &float4::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 4);
-  return *(&x + i);
-}
-
-ccl_device_inline float4 make_float4(float f)
-{
-#  ifdef __KERNEL_SSE__
-  float4 a(_mm_set1_ps(f));
-#  else
-  float4 a = {f, f, f, f};
-#  endif
-  return a;
-}
-
-ccl_device_inline float4 make_float4(float x, float y, float z, float w)
-{
-#  ifdef __KERNEL_SSE__
-  float4 a(_mm_set_ps(w, z, y, x));
-#  else
-  float4 a = {x, y, z, w};
-#  endif
-  return a;
-}
-
-ccl_device_inline float4 make_float4(const int4 &i)
-{
-#  ifdef __KERNEL_SSE__
-  float4 a(_mm_cvtepi32_ps(i.m128));
-#  else
-  float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
-#  endif
-  return a;
-}
-
-ccl_device_inline void print_float4(const char *label, const float4 &a)
-{
-  printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h
deleted file mode 100644
index 27da120a4ba..00000000000
--- a/intern/cycles/util/util_types_float8.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Original code Copyright 2017, Intel Corporation
- * Modifications Copyright 2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __UTIL_TYPES_FLOAT8_H__
-#define __UTIL_TYPES_FLOAT8_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-
-struct ccl_try_align(32) float8
-{
-#  ifdef __KERNEL_AVX2__
-  union {
-    __m256 m256;
-    struct {
-      float a, b, c, d, e, f, g, h;
-    };
-  };
-
-  __forceinline float8();
-  __forceinline float8(const float8 &a);
-  __forceinline explicit float8(const __m256 &a);
-
-  __forceinline operator const __m256 &() const;
-  __forceinline operator __m256 &();
-
-  __forceinline float8 &operator=(const float8 &a);
-
-#  else  /* __KERNEL_AVX2__ */
-  float a, b, c, d, e, f, g, h;
-#  endif /* __KERNEL_AVX2__ */
-
-  __forceinline float operator[](int i) const;
-  __forceinline float &operator[](int i);
-};
-
-ccl_device_inline float8 make_float8(float f);
-ccl_device_inline float8
-make_float8(float a, float b, float c, float d, float e, float f, float g, float h);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT8_H__ */
diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h
deleted file mode 100644
index 4e4ea28c6a4..00000000000
--- a/intern/cycles/util/util_types_float8_impl.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Original code Copyright 2017, Intel Corporation
- * Modifications Copyright 2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
-#define __UTIL_TYPES_FLOAT8_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-#  ifdef __KERNEL_AVX2__
-__forceinline float8::float8()
-{
-}
-
-__forceinline float8::float8(const float8 &f) : m256(f.m256)
-{
-}
-
-__forceinline float8::float8(const __m256 &f) : m256(f)
-{
-}
-
-__forceinline float8::operator const __m256 &() const
-{
-  return m256;
-}
-
-__forceinline float8::operator __m256 &()
-{
-  return m256;
-}
-
-__forceinline float8 &float8::operator=(const float8 &f)
-{
-  m256 = f.m256;
-  return *this;
-}
-#  endif /* __KERNEL_AVX2__ */
-
-__forceinline float float8::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 8);
-  return *(&a + i);
-}
-
-__forceinline float &float8::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 8);
-  return *(&a + i);
-}
-
-ccl_device_inline float8 make_float8(float f)
-{
-#  ifdef __KERNEL_AVX2__
-  float8 r(_mm256_set1_ps(f));
-#  else
-  float8 r = {f, f, f, f, f, f, f, f};
-#  endif
-  return r;
-}
-
-ccl_device_inline float8
-make_float8(float a, float b, float c, float d, float e, float f, float g, float h)
-{
-#  ifdef __KERNEL_AVX2__
-  float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h));
-#  else
-  float8 r = {a, b, c, d, e, f, g, h};
-#  endif
-  return r;
-}
-
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_int2.h b/intern/cycles/util/util_types_int2.h
deleted file mode 100644
index 8811e5ec7c2..00000000000
--- a/intern/cycles/util/util_types_int2.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_INT2_H__
-#define __UTIL_TYPES_INT2_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct int2 {
-  int x, y;
-
-  __forceinline int operator[](int i) const;
-  __forceinline int &operator[](int i);
-};
-
-ccl_device_inline int2 make_int2(int x, int y);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_INT2_H__ */
diff --git a/intern/cycles/util/util_types_int2_impl.h b/intern/cycles/util/util_types_int2_impl.h
deleted file mode 100644
index ce95d4f14e5..00000000000
--- a/intern/cycles/util/util_types_int2_impl.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_INT2_IMPL_H__
-#define __UTIL_TYPES_INT2_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-int int2::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-int &int2::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-ccl_device_inline int2 make_int2(int x, int y)
-{
-  int2 a = {x, y};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_INT2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_int3.h b/intern/cycles/util/util_types_int3.h
deleted file mode 100644
index 09edc09dff3..00000000000
--- a/intern/cycles/util/util_types_int3.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_INT3_H__
-#define __UTIL_TYPES_INT3_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct ccl_try_align(16) int3
-{
-#  ifdef __KERNEL_SSE__
-  union {
-    __m128i m128;
-    struct {
-      int x, y, z, w;
-    };
-  };
-
-  __forceinline int3();
-  __forceinline int3(const int3 &a);
-  __forceinline explicit int3(const __m128i &a);
-
-  __forceinline operator const __m128i &() const;
-  __forceinline operator __m128i &();
-
-  __forceinline int3 &operator=(const int3 &a);
-#  else  /* __KERNEL_SSE__ */
-  int x, y, z, w;
-#  endif /* __KERNEL_SSE__ */
-
-  __forceinline int operator[](int i) const;
-  __forceinline int &operator[](int i);
-};
-
-ccl_device_inline int3 make_int3(int i);
-ccl_device_inline int3 make_int3(int x, int y, int z);
-ccl_device_inline void print_int3(const char *label, const int3 &a);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_INT3_H__ */
diff --git a/intern/cycles/util/util_types_int3_impl.h b/intern/cycles/util/util_types_int3_impl.h
deleted file mode 100644
index 080c892640b..00000000000
--- a/intern/cycles/util/util_types_int3_impl.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_INT3_IMPL_H__
-#define __UTIL_TYPES_INT3_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-#  ifdef __KERNEL_SSE__
-__forceinline int3::int3()
-{
-}
-
-__forceinline int3::int3(const __m128i &a) : m128(a)
-{
-}
-
-__forceinline int3::int3(const int3 &a) : m128(a.m128)
-{
-}
-
-__forceinline int3::operator const __m128i &() const
-{
-  return m128;
-}
-
-__forceinline int3::operator __m128i &()
-{
-  return m128;
-}
-
-__forceinline int3 &int3::operator=(const int3 &a)
-{
-  m128 = a.m128;
-  return *this;
-}
-#  endif /* __KERNEL_SSE__ */
-
-__forceinline int int3::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-__forceinline int &int3::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-ccl_device_inline int3 make_int3(int i)
-{
-#  ifdef __KERNEL_SSE__
-  int3 a(_mm_set1_epi32(i));
-#  else
-  int3 a = {i, i, i, i};
-#  endif
-  return a;
-}
-
-ccl_device_inline int3 make_int3(int x, int y, int z)
-{
-#  ifdef __KERNEL_SSE__
-  int3 a(_mm_set_epi32(0, z, y, x));
-#  else
-  int3 a = {x, y, z, 0};
-#  endif
-
-  return a;
-}
-
-ccl_device_inline void print_int3(const char *label, const int3 &a)
-{
-  printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_INT3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_int4.h b/intern/cycles/util/util_types_int4.h
deleted file mode 100644
index 5c7917cf5d6..00000000000
--- a/intern/cycles/util/util_types_int4.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_INT4_H__
-#define __UTIL_TYPES_INT4_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-
-struct float3;
-struct float4;
-
-struct ccl_try_align(16) int4
-{
-#  ifdef __KERNEL_SSE__
-  union {
-    __m128i m128;
-    struct {
-      int x, y, z, w;
-    };
-  };
-
-  __forceinline int4();
-  __forceinline int4(const int4 &a);
-  __forceinline explicit int4(const __m128i &a);
-
-  __forceinline operator const __m128i &() const;
-  __forceinline operator __m128i &();
-
-  __forceinline int4 &operator=(const int4 &a);
-#  else  /* __KERNEL_SSE__ */
-  int x, y, z, w;
-#  endif /* __KERNEL_SSE__ */
-
-  __forceinline int operator[](int i) const;
-  __forceinline int &operator[](int i);
-};
-
-ccl_device_inline int4 make_int4(int i);
-ccl_device_inline int4 make_int4(int x, int y, int z, int w);
-ccl_device_inline int4 make_int4(const float3 &f);
-ccl_device_inline int4 make_int4(const float4 &f);
-ccl_device_inline void print_int4(const char *label, const int4 &a);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_INT4_H__ */
diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h
deleted file mode 100644
index c6f6ff23a17..00000000000
--- a/intern/cycles/util/util_types_int4_impl.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_INT4_IMPL_H__
-#define __UTIL_TYPES_INT4_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-#  ifdef __KERNEL_SSE__
-__forceinline int4::int4()
-{
-}
-
-__forceinline int4::int4(const int4 &a) : m128(a.m128)
-{
-}
-
-__forceinline int4::int4(const __m128i &a) : m128(a)
-{
-}
-
-__forceinline int4::operator const __m128i &() const
-{
-  return m128;
-}
-
-__forceinline int4::operator __m128i &()
-{
-  return m128;
-}
-
-__forceinline int4 &int4::operator=(const int4 &a)
-{
-  m128 = a.m128;
-  return *this;
-}
-#  endif /* __KERNEL_SSE__ */
-
-__forceinline int int4::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 4);
-  return *(&x + i);
-}
-
-__forceinline int &int4::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 4);
-  return *(&x + i);
-}
-
-ccl_device_inline int4 make_int4(int i)
-{
-#  ifdef __KERNEL_SSE__
-  int4 a(_mm_set1_epi32(i));
-#  else
-  int4 a = {i, i, i, i};
-#  endif
-  return a;
-}
-
-ccl_device_inline int4 make_int4(int x, int y, int z, int w)
-{
-#  ifdef __KERNEL_SSE__
-  int4 a(_mm_set_epi32(w, z, y, x));
-#  else
-  int4 a = {x, y, z, w};
-#  endif
-  return a;
-}
-
-ccl_device_inline int4 make_int4(const float3 &f)
-{
-#  ifdef __KERNEL_SSE__
-  int4 a(_mm_cvtps_epi32(f.m128));
-#  else
-  int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-#  endif
-  return a;
-}
-
-ccl_device_inline int4 make_int4(const float4 &f)
-{
-#  ifdef __KERNEL_SSE__
-  int4 a(_mm_cvtps_epi32(f.m128));
-#  else
-  int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-#  endif
-  return a;
-}
-
-ccl_device_inline void print_int4(const char *label, const int4 &a)
-{
-  printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_INT4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uchar2.h b/intern/cycles/util/util_types_uchar2.h
deleted file mode 100644
index 8cc486e3e48..00000000000
--- a/intern/cycles/util/util_types_uchar2.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UCHAR2_H__
-#define __UTIL_TYPES_UCHAR2_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct uchar2 {
-  uchar x, y;
-
-  __forceinline uchar operator[](int i) const;
-  __forceinline uchar &operator[](int i);
-};
-
-ccl_device_inline uchar2 make_uchar2(uchar x, uchar y);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UCHAR2_H__ */
diff --git a/intern/cycles/util/util_types_uchar2_impl.h b/intern/cycles/util/util_types_uchar2_impl.h
deleted file mode 100644
index 16968c32dd9..00000000000
--- a/intern/cycles/util/util_types_uchar2_impl.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UCHAR2_IMPL_H__
-#define __UTIL_TYPES_UCHAR2_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-uchar uchar2::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-uchar &uchar2::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-ccl_device_inline uchar2 make_uchar2(uchar x, uchar y)
-{
-  uchar2 a = {x, y};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UCHAR2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uchar3.h b/intern/cycles/util/util_types_uchar3.h
deleted file mode 100644
index 5838c437c70..00000000000
--- a/intern/cycles/util/util_types_uchar3.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UCHAR3_H__
-#define __UTIL_TYPES_UCHAR3_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct uchar3 {
-  uchar x, y, z;
-
-  __forceinline uchar operator[](int i) const;
-  __forceinline uchar &operator[](int i);
-};
-
-ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UCHAR3_H__ */
diff --git a/intern/cycles/util/util_types_uchar3_impl.h b/intern/cycles/util/util_types_uchar3_impl.h
deleted file mode 100644
index aa31b725731..00000000000
--- a/intern/cycles/util/util_types_uchar3_impl.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UCHAR3_IMPL_H__
-#define __UTIL_TYPES_UCHAR3_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-uchar uchar3::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-uchar &uchar3::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
-{
-  uchar3 a = {x, y, z};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UCHAR3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uchar4.h b/intern/cycles/util/util_types_uchar4.h
deleted file mode 100644
index 22b6a1ac705..00000000000
--- a/intern/cycles/util/util_types_uchar4.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UCHAR4_H__
-#define __UTIL_TYPES_UCHAR4_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct uchar4 {
-  uchar x, y, z, w;
-
-  __forceinline uchar operator[](int i) const;
-  __forceinline uchar &operator[](int i);
-};
-
-ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UCHAR4_H__ */
diff --git a/intern/cycles/util/util_types_uchar4_impl.h b/intern/cycles/util/util_types_uchar4_impl.h
deleted file mode 100644
index 79879f176a6..00000000000
--- a/intern/cycles/util/util_types_uchar4_impl.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UCHAR4_IMPL_H__
-#define __UTIL_TYPES_UCHAR4_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-uchar uchar4::operator[](int i) const
-{
-  util_assert(i >= 0);
-  util_assert(i < 4);
-  return *(&x + i);
-}
-
-uchar &uchar4::operator[](int i)
-{
-  util_assert(i >= 0);
-  util_assert(i < 4);
-  return *(&x + i);
-}
-
-ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
-{
-  uchar4 a = {x, y, z, w};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UCHAR4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uint2.h b/intern/cycles/util/util_types_uint2.h
deleted file mode 100644
index abcb8ee5346..00000000000
--- a/intern/cycles/util/util_types_uint2.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UINT2_H__
-#define __UTIL_TYPES_UINT2_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct uint2 {
-  uint x, y;
-
-  __forceinline uint operator[](uint i) const;
-  __forceinline uint &operator[](uint i);
-};
-
-ccl_device_inline uint2 make_uint2(uint x, uint y);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UINT2_H__ */
diff --git a/intern/cycles/util/util_types_uint2_impl.h b/intern/cycles/util/util_types_uint2_impl.h
deleted file mode 100644
index db62bd99b89..00000000000
--- a/intern/cycles/util/util_types_uint2_impl.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UINT2_IMPL_H__
-#define __UTIL_TYPES_UINT2_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-__forceinline uint uint2::operator[](uint i) const
-{
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-__forceinline uint &uint2::operator[](uint i)
-{
-  util_assert(i < 2);
-  return *(&x + i);
-}
-
-ccl_device_inline uint2 make_uint2(uint x, uint y)
-{
-  uint2 a = {x, y};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UINT2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uint3.h b/intern/cycles/util/util_types_uint3.h
deleted file mode 100644
index 436d870b621..00000000000
--- a/intern/cycles/util/util_types_uint3.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UINT3_H__
-#define __UTIL_TYPES_UINT3_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct uint3 {
-  uint x, y, z;
-
-  __forceinline uint operator[](uint i) const;
-  __forceinline uint &operator[](uint i);
-};
-
-ccl_device_inline uint3 make_uint3(uint x, uint y, uint z);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UINT3_H__ */
diff --git a/intern/cycles/util/util_types_uint3_impl.h b/intern/cycles/util/util_types_uint3_impl.h
deleted file mode 100644
index d188fa06e2a..00000000000
--- a/intern/cycles/util/util_types_uint3_impl.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UINT3_IMPL_H__
-#define __UTIL_TYPES_UINT3_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-__forceinline uint uint3::operator[](uint i) const
-{
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-__forceinline uint &uint3::operator[](uint i)
-{
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-ccl_device_inline uint3 make_uint3(uint x, uint y, uint z)
-{
-  uint3 a = {x, y, z};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UINT3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uint4.h b/intern/cycles/util/util_types_uint4.h
deleted file mode 100644
index 57f2859fedf..00000000000
--- a/intern/cycles/util/util_types_uint4.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UINT4_H__
-#define __UTIL_TYPES_UINT4_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-struct uint4 {
-  uint x, y, z, w;
-
-  __forceinline uint operator[](uint i) const;
-  __forceinline uint &operator[](uint i);
-};
-
-ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w);
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UINT4_H__ */
diff --git a/intern/cycles/util/util_types_uint4_impl.h b/intern/cycles/util/util_types_uint4_impl.h
deleted file mode 100644
index bac8d23030d..00000000000
--- a/intern/cycles/util/util_types_uint4_impl.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_UINT4_IMPL_H__
-#define __UTIL_TYPES_UINT4_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-__forceinline uint uint4::operator[](uint i) const
-{
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-__forceinline uint &uint4::operator[](uint i)
-{
-  util_assert(i < 3);
-  return *(&x + i);
-}
-
-ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
-{
-  uint4 a = {x, y, z, w};
-  return a;
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_UINT4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_ushort4.h b/intern/cycles/util/util_types_ushort4.h
deleted file mode 100644
index 476ceec622c..00000000000
--- a/intern/cycles/util/util_types_ushort4.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_USHORT4_H__
-#define __UTIL_TYPES_USHORT4_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-
-struct ushort4 {
-  uint16_t x, y, z, w;
-};
-
-#endif
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_USHORT4_H__ */
diff --git a/intern/cycles/util/util_types_vector3.h b/intern/cycles/util/util_types_vector3.h
deleted file mode 100644
index 728c7ca62a1..00000000000
--- a/intern/cycles/util/util_types_vector3.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_VECTOR3_H__
-#define __UTIL_TYPES_VECTOR3_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-template<typename T> class vector3 {
- public:
-  T x, y, z;
-
-  __forceinline vector3();
-  __forceinline vector3(const T &a);
-  __forceinline vector3(const T &x, const T &y, const T &z);
-};
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_VECTOR3_H__ */
diff --git a/intern/cycles/util/util_types_vector3_impl.h b/intern/cycles/util/util_types_vector3_impl.h
deleted file mode 100644
index 33ba53e20b2..00000000000
--- a/intern/cycles/util/util_types_vector3_impl.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2011-2017 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_TYPES_VECTOR3_IMPL_H__
-#define __UTIL_TYPES_VECTOR3_IMPL_H__
-
-#ifndef __UTIL_TYPES_H__
-#  error "Do not include this file directly, include util_types.h instead."
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-#ifndef __KERNEL_GPU__
-template<typename T> ccl_always_inline vector3<T>::vector3()
-{
-}
-
-template<typename T> ccl_always_inline vector3<T>::vector3(const T &a) : x(a), y(a), z(a)
-{
-}
-
-template<typename T>
-ccl_always_inline vector3<T>::vector3(const T &x, const T &y, const T &z) : x(x), y(y), z(z)
-{
-}
-#endif /* __KERNEL_GPU__ */
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_VECTOR3_IMPL_H__ */
diff --git a/intern/cycles/util/util_unique_ptr.h b/intern/cycles/util/util_unique_ptr.h
deleted file mode 100644
index 3181eafd43d..00000000000
--- a/intern/cycles/util/util_unique_ptr.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_UNIQUE_PTR_H__
-#define __UTIL_UNIQUE_PTR_H__
-
-#include <memory>
-
-CCL_NAMESPACE_BEGIN
-
-using std::make_unique;
-using std::unique_ptr;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_UNIQUE_PTR_H__ */
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
deleted file mode 100644
index 87cd4de8438..00000000000
--- a/intern/cycles/util/util_vector.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_VECTOR_H__
-#define __UTIL_VECTOR_H__
-
-#include <cassert>
-#include <cstring>
-#include <vector>
-
-#include "util/util_aligned_malloc.h"
-#include "util/util_guarded_allocator.h"
-#include "util/util_types.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Own subclass-ed version of std::vector. Subclass is needed because:
- *
- * - Use own allocator which keeps track of used/peak memory.
- * - Have method to ensure capacity is re-set to 0.
- */
-template<typename value_type, typename allocator_type = GuardedAllocator<value_type>>
-class vector : public std::vector<value_type, allocator_type> {
- public:
-  typedef std::vector<value_type, allocator_type> BaseClass;
-
-  /* Inherit all constructors from base class. */
-  using BaseClass::vector;
-
-  /* Try as hard as possible to use zero memory. */
-  void free_memory()
-  {
-    vector<value_type, allocator_type> empty;
-    BaseClass::swap(empty);
-  }
-
-  /* Some external API might demand working with std::vector. */
-  operator std::vector<value_type>()
-  {
-    return std::vector<value_type>(this->begin(), this->end());
-  }
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_VECTOR_H__ */
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
deleted file mode 100644
index 8bce5ff85aa..00000000000
--- a/intern/cycles/util/util_version.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_VERSION_H__
-#define __UTIL_VERSION_H__
-
-/* Cycles version number */
-
-CCL_NAMESPACE_BEGIN
-
-#define CYCLES_VERSION_MAJOR 1
-#define CYCLES_VERSION_MINOR 13
-#define CYCLES_VERSION_PATCH 0
-
-#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c
-#define CYCLES_MAKE_VERSION_STRING(a, b, c) CYCLES_MAKE_VERSION_STRING2(a, b, c)
-#define CYCLES_VERSION_STRING \
-  CYCLES_MAKE_VERSION_STRING(CYCLES_VERSION_MAJOR, CYCLES_VERSION_MINOR, CYCLES_VERSION_PATCH)
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_VERSION_H__ */
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
deleted file mode 100644
index 9d9ff451b3b..00000000000
--- a/intern/cycles/util/util_view.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "util/util_opengl.h"
-#include "util/util_string.h"
-#include "util/util_time.h"
-#include "util/util_version.h"
-#include "util/util_view.h"
-
-#ifdef __APPLE__
-#  include <GLUT/glut.h>
-#else
-#  include <GL/glut.h>
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* structs */
-
-struct View {
-  ViewInitFunc initf;
-  ViewExitFunc exitf;
-  ViewResizeFunc resize;
-  ViewDisplayFunc display;
-  ViewKeyboardFunc keyboard;
-  ViewMotionFunc motion;
-
-  bool first_display;
-  bool redraw;
-
-  int mouseX, mouseY;
-  int mouseBut0, mouseBut2;
-
-  int width, height;
-} V;
-
-/* public */
-
-static void view_display_text(int x, int y, const char *text)
-{
-  const char *c;
-
-  glRasterPos3f(x, y, 0);
-
-  for (c = text; *c != '\0'; c++)
-    glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c);
-}
-
-void view_display_info(const char *info)
-{
-  const int height = 20;
-
-  glEnable(GL_BLEND);
-  glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
-  glColor4f(0.1f, 0.1f, 0.1f, 0.8f);
-  glRectf(0.0f, V.height - height, V.width, V.height);
-  glDisable(GL_BLEND);
-
-  glColor3f(0.5f, 0.5f, 0.5f);
-
-  view_display_text(10, 7 + V.height - height, info);
-
-  glColor3f(1.0f, 1.0f, 1.0f);
-}
-
-void view_display_help()
-{
-  const int w = (int)((float)V.width / 1.15f);
-  const int h = (int)((float)V.height / 1.15f);
-
-  const int x1 = (V.width - w) / 2;
-  const int x2 = x1 + w;
-
-  const int y1 = (V.height - h) / 2;
-  const int y2 = y1 + h;
-
-  glEnable(GL_BLEND);
-  glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
-  glColor4f(0.5f, 0.5f, 0.5f, 0.8f);
-  glRectf(x1, y1, x2, y2);
-  glDisable(GL_BLEND);
-
-  glColor3f(0.8f, 0.8f, 0.8f);
-
-  string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING;
-
-  view_display_text(x1 + 20, y2 - 20, info.c_str());
-  view_display_text(x1 + 20, y2 - 40, "(C) 2011-2016 Blender Foundation");
-  view_display_text(x1 + 20, y2 - 80, "Controls:");
-  view_display_text(x1 + 20, y2 - 100, "h:  Info/Help");
-  view_display_text(x1 + 20, y2 - 120, "r:  Reset");
-  view_display_text(x1 + 20, y2 - 140, "p:  Pause");
-  view_display_text(x1 + 20, y2 - 160, "esc:  Cancel");
-  view_display_text(x1 + 20, y2 - 180, "q:  Quit program");
-
-  view_display_text(x1 + 20, y2 - 210, "i:  Interactive mode");
-  view_display_text(x1 + 20, y2 - 230, "Left mouse:  Move camera");
-  view_display_text(x1 + 20, y2 - 250, "Right mouse:  Rotate camera");
-  view_display_text(x1 + 20, y2 - 270, "W/A/S/D:  Move camera");
-  view_display_text(x1 + 20, y2 - 290, "0/1/2/3:  Set max bounces");
-
-  glColor3f(1.0f, 1.0f, 1.0f);
-}
-
-static void view_display()
-{
-  if (V.first_display) {
-    if (V.initf)
-      V.initf();
-    if (V.exitf)
-      atexit(V.exitf);
-
-    V.first_display = false;
-  }
-
-  glClearColor(0.05f, 0.05f, 0.05f, 0.0f);
-  glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
-
-  glMatrixMode(GL_PROJECTION);
-  glLoadIdentity();
-  glOrtho(0, V.width, 0, V.height, -1, 1);
-
-  glMatrixMode(GL_MODELVIEW);
-  glLoadIdentity();
-
-  glRasterPos3f(0, 0, 0);
-
-  if (V.display)
-    V.display();
-
-  glutSwapBuffers();
-}
-
-static void view_reshape(int width, int height)
-{
-  if (width <= 0 || height <= 0)
-    return;
-
-  V.width = width;
-  V.height = height;
-
-  glViewport(0, 0, width, height);
-
-  glMatrixMode(GL_PROJECTION);
-  glLoadIdentity();
-
-  glMatrixMode(GL_MODELVIEW);
-  glLoadIdentity();
-
-  if (V.resize)
-    V.resize(width, height);
-}
-
-static void view_keyboard(unsigned char key, int x, int y)
-{
-  if (V.keyboard)
-    V.keyboard(key);
-
-  if (key == 'm')
-    printf("mouse %d %d\n", x, y);
-  if (key == 'q') {
-    if (V.exitf)
-      V.exitf();
-    exit(0);
-  }
-}
-
-static void view_mouse(int button, int state, int x, int y)
-{
-  if (button == 0) {
-    if (state == GLUT_DOWN) {
-      V.mouseX = x;
-      V.mouseY = y;
-      V.mouseBut0 = 1;
-    }
-    else if (state == GLUT_UP) {
-      V.mouseBut0 = 0;
-    }
-  }
-  else if (button == 2) {
-    if (state == GLUT_DOWN) {
-      V.mouseX = x;
-      V.mouseY = y;
-      V.mouseBut2 = 1;
-    }
-    else if (state == GLUT_UP) {
-      V.mouseBut2 = 0;
-    }
-  }
-}
-
-static void view_motion(int x, int y)
-{
-  const int but = V.mouseBut0 ? 0 : 2;
-  const int distX = x - V.mouseX;
-  const int distY = y - V.mouseY;
-
-  if (V.motion)
-    V.motion(distX, distY, but);
-
-  V.mouseX = x;
-  V.mouseY = y;
-}
-
-static void view_idle()
-{
-  if (V.redraw) {
-    V.redraw = false;
-    glutPostRedisplay();
-  }
-
-  time_sleep(0.1);
-}
-
-void view_main_loop(const char *title,
-                    int width,
-                    int height,
-                    ViewInitFunc initf,
-                    ViewExitFunc exitf,
-                    ViewResizeFunc resize,
-                    ViewDisplayFunc display,
-                    ViewKeyboardFunc keyboard,
-                    ViewMotionFunc motion)
-{
-  const char *name = "app";
-  char *argv = (char *)name;
-  int argc = 1;
-
-  memset(&V, 0, sizeof(V));
-  V.width = width;
-  V.height = height;
-  V.first_display = true;
-  V.redraw = false;
-  V.initf = initf;
-  V.exitf = exitf;
-  V.resize = resize;
-  V.display = display;
-  V.keyboard = keyboard;
-  V.motion = motion;
-
-  glutInit(&argc, &argv);
-  glutInitWindowSize(width, height);
-  glutInitWindowPosition(0, 0);
-  glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
-  glutCreateWindow(title);
-
-  glewInit();
-
-  view_reshape(width, height);
-
-  glutDisplayFunc(view_display);
-  glutIdleFunc(view_idle);
-  glutReshapeFunc(view_reshape);
-  glutKeyboardFunc(view_keyboard);
-  glutMouseFunc(view_mouse);
-  glutMotionFunc(view_motion);
-
-  glutMainLoop();
-}
-
-void view_redraw()
-{
-  V.redraw = true;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h
deleted file mode 100644
index ad5c53ee5d5..00000000000
--- a/intern/cycles/util/util_view.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_VIEW_H__
-#define __UTIL_VIEW_H__
-
-/* Functions to display a simple OpenGL window using GLUT, simplified to the
- * bare minimum we need to reduce boilerplate code in tests apps. */
-
-CCL_NAMESPACE_BEGIN
-
-typedef void (*ViewInitFunc)();
-typedef void (*ViewExitFunc)();
-typedef void (*ViewResizeFunc)(int width, int height);
-typedef void (*ViewDisplayFunc)();
-typedef void (*ViewKeyboardFunc)(unsigned char key);
-typedef void (*ViewMotionFunc)(int x, int y, int button);
-
-void view_main_loop(const char *title,
-                    int width,
-                    int height,
-                    ViewInitFunc initf,
-                    ViewExitFunc exitf,
-                    ViewResizeFunc resize,
-                    ViewDisplayFunc display,
-                    ViewKeyboardFunc keyboard,
-                    ViewMotionFunc motion);
-
-void view_display_info(const char *info);
-void view_display_help();
-void view_redraw();
-
-CCL_NAMESPACE_END
-
-#endif /*__UTIL_VIEW_H__*/
diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp
deleted file mode 100644
index 807a5adc84a..00000000000
--- a/intern/cycles/util/util_windows.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2019-2019 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifdef _WIN32
-#  include <windows.h>
-#endif
-
-#include "util_windows.h"
-
-CCL_NAMESPACE_BEGIN
-
-bool system_windows_version_at_least(int major, int build)
-{
-#ifdef _WIN32
-  HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
-  if (hMod == 0) {
-    return false;
-  }
-
-  typedef NTSTATUS(WINAPI * RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
-  RtlGetVersionPtr rtl_get_version = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
-  if (rtl_get_version == NULL) {
-    return false;
-  }
-
-  RTL_OSVERSIONINFOW rovi = {0};
-  rovi.dwOSVersionInfoSize = sizeof(rovi);
-  if (rtl_get_version(&rovi) != 0) {
-    return false;
-  }
-
-  return (rovi.dwMajorVersion > major ||
-          (rovi.dwMajorVersion == major && rovi.dwBuildNumber >= build));
-#else
-  (void)major;
-  (void)build;
-  return false;
-#endif
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h
deleted file mode 100644
index 9cbf91a23a7..00000000000
--- a/intern/cycles/util/util_windows.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_WINDOWS_H__
-#define __UTIL_WINDOWS_H__
-
-#ifdef _WIN32
-
-#  ifndef NOGDI
-#    define NOGDI
-#  endif
-#  ifndef NOMINMAX
-#    define NOMINMAX
-#  endif
-#  ifndef WIN32_LEAN_AND_MEAN
-#    define WIN32_LEAN_AND_MEAN
-#  endif
-
-#  include <windows.h>
-
-#endif /* _WIN32 */
-
-CCL_NAMESPACE_BEGIN
-
-bool system_windows_version_at_least(int major, int build);
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_WINDOWS_H__ */
diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h
deleted file mode 100644
index 6f06f17937b..00000000000
--- a/intern/cycles/util/util_xml.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __UTIL_XML_H__
-#define __UTIL_XML_H__
-
-/* PugiXML is used for XML parsing. */
-
-#include <pugixml.hpp>
-
-CCL_NAMESPACE_BEGIN
-
-OIIO_NAMESPACE_USING
-
-#ifdef WITH_SYSTEM_PUGIXML
-#  define PUGIXML_NAMESPACE pugi
-#else
-#  define PUGIXML_NAMESPACE OIIO_NAMESPACE::pugi
-#endif
-
-using PUGIXML_NAMESPACE::xml_attribute;
-using PUGIXML_NAMESPACE::xml_document;
-using PUGIXML_NAMESPACE::xml_node;
-using PUGIXML_NAMESPACE::xml_parse_result;
-
-CCL_NAMESPACE_END
-
-#endif /* __UTIL_XML_H__ */
diff --git a/intern/cycles/util/vector.h b/intern/cycles/util/vector.h
new file mode 100644
index 00000000000..db35f198dc1
--- /dev/null
+++ b/intern/cycles/util/vector.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_VECTOR_H__
+#define __UTIL_VECTOR_H__
+
+#include <cassert>
+#include <cstring>
+#include <vector>
+
+#include "util/aligned_malloc.h"
+#include "util/guarded_allocator.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Own subclass-ed version of std::vector. Subclass is needed because:
+ *
+ * - Use own allocator which keeps track of used/peak memory.
+ * - Have method to ensure capacity is re-set to 0.
+ */
+template<typename value_type, typename allocator_type = GuardedAllocator<value_type>>
+class vector : public std::vector<value_type, allocator_type> {
+ public:
+  typedef std::vector<value_type, allocator_type> BaseClass;
+
+  /* Inherit all constructors from base class. */
+  using BaseClass::vector;
+
+  /* Try as hard as possible to use zero memory. */
+  void free_memory()
+  {
+    vector<value_type, allocator_type> empty;
+    BaseClass::swap(empty);
+  }
+
+  /* Some external API might demand working with std::vector. */
+  operator std::vector<value_type>()
+  {
+    return std::vector<value_type>(this->begin(), this->end());
+  }
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_VECTOR_H__ */
diff --git a/intern/cycles/util/version.h b/intern/cycles/util/version.h
new file mode 100644
index 00000000000..8bce5ff85aa
--- /dev/null
+++ b/intern/cycles/util/version.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_VERSION_H__
+#define __UTIL_VERSION_H__
+
+/* Cycles version number */
+
+CCL_NAMESPACE_BEGIN
+
+#define CYCLES_VERSION_MAJOR 1
+#define CYCLES_VERSION_MINOR 13
+#define CYCLES_VERSION_PATCH 0
+
+#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c
+#define CYCLES_MAKE_VERSION_STRING(a, b, c) CYCLES_MAKE_VERSION_STRING2(a, b, c)
+#define CYCLES_VERSION_STRING \
+  CYCLES_MAKE_VERSION_STRING(CYCLES_VERSION_MAJOR, CYCLES_VERSION_MINOR, CYCLES_VERSION_PATCH)
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_VERSION_H__ */
diff --git a/intern/cycles/util/view.cpp b/intern/cycles/util/view.cpp
new file mode 100644
index 00000000000..1c70cea1a8b
--- /dev/null
+++ b/intern/cycles/util/view.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util/opengl.h"
+#include "util/string.h"
+#include "util/time.h"
+#include "util/version.h"
+#include "util/view.h"
+
+#ifdef __APPLE__
+#  include <GLUT/glut.h>
+#else
+#  include <GL/glut.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* structs */
+
+struct View {
+  ViewInitFunc initf;
+  ViewExitFunc exitf;
+  ViewResizeFunc resize;
+  ViewDisplayFunc display;
+  ViewKeyboardFunc keyboard;
+  ViewMotionFunc motion;
+
+  bool first_display;
+  bool redraw;
+
+  int mouseX, mouseY;
+  int mouseBut0, mouseBut2;
+
+  int width, height;
+} V;
+
+/* public */
+
+static void view_display_text(int x, int y, const char *text)
+{
+  const char *c;
+
+  glRasterPos3f(x, y, 0);
+
+  for (c = text; *c != '\0'; c++)
+    glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c);
+}
+
+void view_display_info(const char *info)
+{
+  const int height = 20;
+
+  glEnable(GL_BLEND);
+  glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+  glColor4f(0.1f, 0.1f, 0.1f, 0.8f);
+  glRectf(0.0f, V.height - height, V.width, V.height);
+  glDisable(GL_BLEND);
+
+  glColor3f(0.5f, 0.5f, 0.5f);
+
+  view_display_text(10, 7 + V.height - height, info);
+
+  glColor3f(1.0f, 1.0f, 1.0f);
+}
+
+void view_display_help()
+{
+  const int w = (int)((float)V.width / 1.15f);
+  const int h = (int)((float)V.height / 1.15f);
+
+  const int x1 = (V.width - w) / 2;
+  const int x2 = x1 + w;
+
+  const int y1 = (V.height - h) / 2;
+  const int y2 = y1 + h;
+
+  glEnable(GL_BLEND);
+  glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+  glColor4f(0.5f, 0.5f, 0.5f, 0.8f);
+  glRectf(x1, y1, x2, y2);
+  glDisable(GL_BLEND);
+
+  glColor3f(0.8f, 0.8f, 0.8f);
+
+  string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING;
+
+  view_display_text(x1 + 20, y2 - 20, info.c_str());
+  view_display_text(x1 + 20, y2 - 40, "(C) 2011-2016 Blender Foundation");
+  view_display_text(x1 + 20, y2 - 80, "Controls:");
+  view_display_text(x1 + 20, y2 - 100, "h:  Info/Help");
+  view_display_text(x1 + 20, y2 - 120, "r:  Reset");
+  view_display_text(x1 + 20, y2 - 140, "p:  Pause");
+  view_display_text(x1 + 20, y2 - 160, "esc:  Cancel");
+  view_display_text(x1 + 20, y2 - 180, "q:  Quit program");
+
+  view_display_text(x1 + 20, y2 - 210, "i:  Interactive mode");
+  view_display_text(x1 + 20, y2 - 230, "Left mouse:  Move camera");
+  view_display_text(x1 + 20, y2 - 250, "Right mouse:  Rotate camera");
+  view_display_text(x1 + 20, y2 - 270, "W/A/S/D:  Move camera");
+  view_display_text(x1 + 20, y2 - 290, "0/1/2/3:  Set max bounces");
+
+  glColor3f(1.0f, 1.0f, 1.0f);
+}
+
+static void view_display()
+{
+  if (V.first_display) {
+    if (V.initf)
+      V.initf();
+    if (V.exitf)
+      atexit(V.exitf);
+
+    V.first_display = false;
+  }
+
+  glClearColor(0.05f, 0.05f, 0.05f, 0.0f);
+  glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+  glMatrixMode(GL_PROJECTION);
+  glLoadIdentity();
+  glOrtho(0, V.width, 0, V.height, -1, 1);
+
+  glMatrixMode(GL_MODELVIEW);
+  glLoadIdentity();
+
+  glRasterPos3f(0, 0, 0);
+
+  if (V.display)
+    V.display();
+
+  glutSwapBuffers();
+}
+
+static void view_reshape(int width, int height)
+{
+  if (width <= 0 || height <= 0)
+    return;
+
+  V.width = width;
+  V.height = height;
+
+  glViewport(0, 0, width, height);
+
+  glMatrixMode(GL_PROJECTION);
+  glLoadIdentity();
+
+  glMatrixMode(GL_MODELVIEW);
+  glLoadIdentity();
+
+  if (V.resize)
+    V.resize(width, height);
+}
+
+static void view_keyboard(unsigned char key, int x, int y)
+{
+  if (V.keyboard)
+    V.keyboard(key);
+
+  if (key == 'm')
+    printf("mouse %d %d\n", x, y);
+  if (key == 'q') {
+    if (V.exitf)
+      V.exitf();
+    exit(0);
+  }
+}
+
+static void view_mouse(int button, int state, int x, int y)
+{
+  if (button == 0) {
+    if (state == GLUT_DOWN) {
+      V.mouseX = x;
+      V.mouseY = y;
+      V.mouseBut0 = 1;
+    }
+    else if (state == GLUT_UP) {
+      V.mouseBut0 = 0;
+    }
+  }
+  else if (button == 2) {
+    if (state == GLUT_DOWN) {
+      V.mouseX = x;
+      V.mouseY = y;
+      V.mouseBut2 = 1;
+    }
+    else if (state == GLUT_UP) {
+      V.mouseBut2 = 0;
+    }
+  }
+}
+
+static void view_motion(int x, int y)
+{
+  const int but = V.mouseBut0 ? 0 : 2;
+  const int distX = x - V.mouseX;
+  const int distY = y - V.mouseY;
+
+  if (V.motion)
+    V.motion(distX, distY, but);
+
+  V.mouseX = x;
+  V.mouseY = y;
+}
+
+static void view_idle()
+{
+  if (V.redraw) {
+    V.redraw = false;
+    glutPostRedisplay();
+  }
+
+  time_sleep(0.1);
+}
+
+void view_main_loop(const char *title,
+                    int width,
+                    int height,
+                    ViewInitFunc initf,
+                    ViewExitFunc exitf,
+                    ViewResizeFunc resize,
+                    ViewDisplayFunc display,
+                    ViewKeyboardFunc keyboard,
+                    ViewMotionFunc motion)
+{
+  const char *name = "app";
+  char *argv = (char *)name;
+  int argc = 1;
+
+  memset(&V, 0, sizeof(V));
+  V.width = width;
+  V.height = height;
+  V.first_display = true;
+  V.redraw = false;
+  V.initf = initf;
+  V.exitf = exitf;
+  V.resize = resize;
+  V.display = display;
+  V.keyboard = keyboard;
+  V.motion = motion;
+
+  glutInit(&argc, &argv);
+  glutInitWindowSize(width, height);
+  glutInitWindowPosition(0, 0);
+  glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH);
+  glutCreateWindow(title);
+
+  glewInit();
+
+  view_reshape(width, height);
+
+  glutDisplayFunc(view_display);
+  glutIdleFunc(view_idle);
+  glutReshapeFunc(view_reshape);
+  glutKeyboardFunc(view_keyboard);
+  glutMouseFunc(view_mouse);
+  glutMotionFunc(view_motion);
+
+  glutMainLoop();
+}
+
+void view_redraw()
+{
+  V.redraw = true;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/view.h b/intern/cycles/util/view.h
new file mode 100644
index 00000000000..ad5c53ee5d5
--- /dev/null
+++ b/intern/cycles/util/view.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_VIEW_H__
+#define __UTIL_VIEW_H__
+
+/* Functions to display a simple OpenGL window using GLUT, simplified to the
+ * bare minimum we need to reduce boilerplate code in tests apps. */
+
+CCL_NAMESPACE_BEGIN
+
+typedef void (*ViewInitFunc)();
+typedef void (*ViewExitFunc)();
+typedef void (*ViewResizeFunc)(int width, int height);
+typedef void (*ViewDisplayFunc)();
+typedef void (*ViewKeyboardFunc)(unsigned char key);
+typedef void (*ViewMotionFunc)(int x, int y, int button);
+
+void view_main_loop(const char *title,
+                    int width,
+                    int height,
+                    ViewInitFunc initf,
+                    ViewExitFunc exitf,
+                    ViewResizeFunc resize,
+                    ViewDisplayFunc display,
+                    ViewKeyboardFunc keyboard,
+                    ViewMotionFunc motion);
+
+void view_display_info(const char *info);
+void view_display_help();
+void view_redraw();
+
+CCL_NAMESPACE_END
+
+#endif /*__UTIL_VIEW_H__*/
diff --git a/intern/cycles/util/windows.cpp b/intern/cycles/util/windows.cpp
new file mode 100644
index 00000000000..96944d07390
--- /dev/null
+++ b/intern/cycles/util/windows.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2019-2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+#  include <windows.h>
+#endif
+
+#include "util/windows.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool system_windows_version_at_least(int major, int build)
+{
+#ifdef _WIN32
+  HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll");
+  if (hMod == 0) {
+    return false;
+  }
+
+  typedef NTSTATUS(WINAPI * RtlGetVersionPtr)(PRTL_OSVERSIONINFOW);
+  RtlGetVersionPtr rtl_get_version = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion");
+  if (rtl_get_version == NULL) {
+    return false;
+  }
+
+  RTL_OSVERSIONINFOW rovi = {0};
+  rovi.dwOSVersionInfoSize = sizeof(rovi);
+  if (rtl_get_version(&rovi) != 0) {
+    return false;
+  }
+
+  return (rovi.dwMajorVersion > major ||
+          (rovi.dwMajorVersion == major && rovi.dwBuildNumber >= build));
+#else
+  (void)major;
+  (void)build;
+  return false;
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/windows.h b/intern/cycles/util/windows.h
new file mode 100644
index 00000000000..9cbf91a23a7
--- /dev/null
+++ b/intern/cycles/util/windows.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_WINDOWS_H__
+#define __UTIL_WINDOWS_H__
+
+#ifdef _WIN32
+
+#  ifndef NOGDI
+#    define NOGDI
+#  endif
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN
+#  endif
+
+#  include <windows.h>
+
+#endif /* _WIN32 */
+
+CCL_NAMESPACE_BEGIN
+
+bool system_windows_version_at_least(int major, int build);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_WINDOWS_H__ */
diff --git a/intern/cycles/util/xml.h b/intern/cycles/util/xml.h
new file mode 100644
index 00000000000..6f06f17937b
--- /dev/null
+++ b/intern/cycles/util/xml.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_XML_H__
+#define __UTIL_XML_H__
+
+/* PugiXML is used for XML parsing. */
+
+#include <pugixml.hpp>
+
+CCL_NAMESPACE_BEGIN
+
+OIIO_NAMESPACE_USING
+
+#ifdef WITH_SYSTEM_PUGIXML
+#  define PUGIXML_NAMESPACE pugi
+#else
+#  define PUGIXML_NAMESPACE OIIO_NAMESPACE::pugi
+#endif
+
+using PUGIXML_NAMESPACE::xml_attribute;
+using PUGIXML_NAMESPACE::xml_document;
+using PUGIXML_NAMESPACE::xml_node;
+using PUGIXML_NAMESPACE::xml_parse_result;
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_XML_H__ */
-- 
cgit v1.2.3