Cycles: remove SIMD BVH optimizations, to be replaced by Embree

Ref T73778 Depends on D8011 Maniphest Tasks: T73778 Differential Revision: https://developer.blender.org/D8012
author: Brecht Van Lommel <brecht@blender.org> 2020-06-10 19:55:33 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2020-06-22 14:28:01 +0300
commit: d1ef5146d72d40f97fdcbf28e96da49193c21dea (patch)
tree: 7a19a24bd6b809c7de72b4e2499d62b8740e639a /intern
parent: 1de0e13af619e405f351bf42924f819dc3a9bc44 (diff)
39 files changed, 71 insertions, 7913 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 89ed059af21..f0f7d24002f 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -53,12 +53,6 @@ enum_displacement_methods = (
     ('BOTH', "Displacement and Bump", "Combination of true displacement and bump mapping for finer detail"),
 )
 
-enum_bvh_layouts = (
-    ('BVH2', "BVH2", "", 1),
-    ('BVH4', "BVH4", "", 2),
-    ('BVH8', "BVH8", "", 4),
-)
-
 enum_bvh_types = (
     ('DYNAMIC_BVH', "Dynamic BVH", "Objects can be individually updated, at the cost of slower render time"),
     ('STATIC_BVH', "Static BVH", "Any object modification requires a complete BVH rebuild, but renders faster"),
@@ -772,11 +766,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
     debug_use_cpu_sse41: BoolProperty(name="SSE41", default=True)
     debug_use_cpu_sse3: BoolProperty(name="SSE3", default=True)
     debug_use_cpu_sse2: BoolProperty(name="SSE2", default=True)
-    debug_bvh_layout: EnumProperty(
-        name="BVH Layout",
-        items=enum_bvh_layouts,
-        default='BVH8',
-    )
     debug_use_cpu_split_kernel: BoolProperty(name="Split Kernel", default=False)
 
     debug_use_cuda_adaptive_compile: BoolProperty(name="Adaptive Compile", default=False)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 9680bd04751..0859a8a82b0 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -688,16 +688,20 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa
 
         col = layout.column()
 
-        if _cycles.with_embree:
-            row = col.row()
-            row.active = use_cpu(context)
-            row.prop(cscene, "use_bvh_embree")
+        use_embree = False
+        if use_cpu(context):
+            use_embree = _cycles.with_embree
+            if not use_embree:
+              sub = col.column(align=True)
+              sub.label(text="Cycles built without Embree support")
+              sub.label(text="CPU raytracing performance will be poor")
+
         col.prop(cscene, "debug_use_spatial_splits")
         sub = col.column()
-        sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
+        sub.active = not use_embree
         sub.prop(cscene, "debug_use_hair_bvh")
         sub = col.column()
-        sub.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
+        sub.active = not cscene.debug_use_spatial_splits and not use_embree
         sub.prop(cscene, "debug_bvh_time_steps")
 
 
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 09813dc8c05..f5fd6f31c75 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -751,15 +751,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
     params.texture_limit = 0;
   }
 
-  /* TODO(sergey): Once OSL supports per-microarchitecture optimization get
-   * rid of this.
-   */
-  if (params.shadingsystem == SHADINGSYSTEM_OSL) {
-    params.bvh_layout = BVH_LAYOUT_BVH4;
-  }
-  else {
-    params.bvh_layout = DebugFlags().cpu.bvh_layout;
-  }
+  params.bvh_layout = DebugFlags().cpu.bvh_layout;
 
 #ifdef WITH_EMBREE
   params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index fb724704a84..8b8f3ca7265 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -9,8 +9,6 @@ set(INC_SYS
 set(SRC
   bvh.cpp
   bvh2.cpp
-  bvh4.cpp
-  bvh8.cpp
   bvh_binning.cpp
   bvh_build.cpp
   bvh_embree.cpp
@@ -24,8 +22,6 @@ set(SRC
 set(SRC_HEADERS
   bvh.h
   bvh2.h
-  bvh4.h
-  bvh8.h
   bvh_binning.h
   bvh_build.h
   bvh_embree.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 8749bcfc07e..e9e67fd1305 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -22,8 +22,6 @@
 #include "render/object.h"
 
 #include "bvh/bvh2.h"
-#include "bvh/bvh4.h"
-#include "bvh/bvh8.h"
 #include "bvh/bvh_build.h"
 #include "bvh/bvh_embree.h"
 #include "bvh/bvh_node.h"
@@ -42,10 +40,6 @@ const char *bvh_layout_name(BVHLayout layout)
   switch (layout) {
     case BVH_LAYOUT_BVH2:
       return "BVH2";
-    case BVH_LAYOUT_BVH4:
-      return "BVH4";
-    case BVH_LAYOUT_BVH8:
-      return "BVH8";
     case BVH_LAYOUT_NONE:
       return "NONE";
     case BVH_LAYOUT_EMBREE:
@@ -109,10 +103,6 @@ BVH *BVH::create(const BVHParams &params,
   switch (params.bvh_layout) {
     case BVH_LAYOUT_BVH2:
       return new BVH2(params, geometry, objects);
-    case BVH_LAYOUT_BVH4:
-      return new BVH4(params, geometry, objects);
-    case BVH_LAYOUT_BVH8:
-      return new BVH8(params, geometry, objects);
     case BVH_LAYOUT_EMBREE:
 #ifdef WITH_EMBREE
       return new BVHEmbree(params, geometry, objects);
@@ -332,13 +322,6 @@ void BVH::pack_primitives()
 
 void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
 {
-  /* The BVH's for instances are built separately, but for traversal all
-   * BVH's are stored in global arrays. This function merges them into the
-   * top level BVH, adjusting indexes and offsets where appropriate.
-   */
-  const bool use_qbvh = (params.bvh_layout == BVH_LAYOUT_BVH4);
-  const bool use_obvh = (params.bvh_layout == BVH_LAYOUT_BVH8);
-
   /* Adjust primitive index to point to the triangle in the global array, for
    * geometry with transform applied and already in the top level BVH.
    */
@@ -501,53 +484,21 @@ void BVH::pack_instances(size_t nodes_size, size_t leaf_nodes_size)
       for (size_t i = 0, j = 0; i < bvh_nodes_size; j++) {
         size_t nsize, nsize_bbox;
         if (bvh_nodes[i].x & PATH_RAY_NODE_UNALIGNED) {
-          if (use_obvh) {
-            nsize = BVH_UNALIGNED_ONODE_SIZE;
-            nsize_bbox = BVH_UNALIGNED_ONODE_SIZE - 1;
-          }
-          else {
-            nsize = use_qbvh ? BVH_UNALIGNED_QNODE_SIZE : BVH_UNALIGNED_NODE_SIZE;
-            nsize_bbox = (use_qbvh) ? BVH_UNALIGNED_QNODE_SIZE - 1 : 0;
-          }
+          nsize = BVH_UNALIGNED_NODE_SIZE;
+          nsize_bbox = 0;
         }
         else {
-          if (use_obvh) {
-            nsize = BVH_ONODE_SIZE;
-            nsize_bbox = BVH_ONODE_SIZE - 1;
-          }
-          else {
-            nsize = (use_qbvh) ? BVH_QNODE_SIZE : BVH_NODE_SIZE;
-            nsize_bbox = (use_qbvh) ? BVH_QNODE_SIZE - 1 : 0;
-          }
+          nsize = BVH_NODE_SIZE;
+          nsize_bbox = 0;
         }
 
         memcpy(pack_nodes + pack_nodes_offset, bvh_nodes + i, nsize_bbox * sizeof(int4));
 
         /* Modify offsets into arrays */
         int4 data = bvh_nodes[i + nsize_bbox];
-
-        if (use_obvh) {
-          int4 data1 = bvh_nodes[i + nsize_bbox - 1];
-          data.z += (data.z < 0) ? -noffset_leaf : noffset;
-          data.w += (data.w < 0) ? -noffset_leaf : noffset;
-          data.x += (data.x < 0) ? -noffset_leaf : noffset;
-          data.y += (data.y < 0) ? -noffset_leaf : noffset;
-          data1.z += (data1.z < 0) ? -noffset_leaf : noffset;
-          data1.w += (data1.w < 0) ? -noffset_leaf : noffset;
-          data1.x += (data1.x < 0) ? -noffset_leaf : noffset;
-          data1.y += (data1.y < 0) ? -noffset_leaf : noffset;
-          pack_nodes[pack_nodes_offset + nsize_bbox] = data;
-          pack_nodes[pack_nodes_offset + nsize_bbox - 1] = data1;
-        }
-        else {
-          data.z += (data.z < 0) ? -noffset_leaf : noffset;
-          data.w += (data.w < 0) ? -noffset_leaf : noffset;
-          if (use_qbvh) {
-            data.x += (data.x < 0) ? -noffset_leaf : noffset;
-            data.y += (data.y < 0) ? -noffset_leaf : noffset;
-          }
-          pack_nodes[pack_nodes_offset + nsize_bbox] = data;
-        }
+        data.z += (data.z < 0) ? -noffset_leaf : noffset;
+        data.w += (data.w < 0) ? -noffset_leaf : noffset;
+        pack_nodes[pack_nodes_offset + nsize_bbox] = data;
 
         /* Usually this copies nothing, but we better
          * be prepared for possible node size extension.
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index bdde38640c9..6639e06b0bc 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -76,7 +76,7 @@ struct PackedBVH {
   }
 };
 
-enum BVH_TYPE { bvh2, bvh4, bvh8 };
+enum BVH_TYPE { bvh2 };
 
 /* BVH */
 
diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp
deleted file mode 100644
index 143c3e54f94..00000000000
--- a/intern/cycles/bvh/bvh4.cpp
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bvh/bvh4.h"
-
-#include "render/mesh.h"
-#include "render/object.h"
-
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Can we avoid this somehow or make more generic?
- *
- * Perhaps we can merge nodes in actual tree and make our
- * life easier all over the place.
- */
-
-BVH4::BVH4(const BVHParams &params_,
-           const vector<Geometry *> &geometry_,
-           const vector<Object *> &objects_)
-    : BVH(params_, geometry_, objects_)
-{
-  params.bvh_layout = BVH_LAYOUT_BVH4;
-}
-
-namespace {
-
-BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
-{
-  if (node->is_leaf()) {
-    return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
-  }
-  /* Collect nodes of one layer deeper, allowing us to have more children in an inner layer. */
-  assert(node->num_children() <= 2);
-  const BVHNode *children[4];
-  const BVHNode *child0 = node->get_child(0);
-  const BVHNode *child1 = node->get_child(1);
-  int num_children = 0;
-  if (child0->is_leaf()) {
-    children[num_children++] = child0;
-  }
-  else {
-    children[num_children++] = child0->get_child(0);
-    children[num_children++] = child0->get_child(1);
-  }
-  if (child1->is_leaf()) {
-    children[num_children++] = child1;
-  }
-  else {
-    children[num_children++] = child1->get_child(0);
-    children[num_children++] = child1->get_child(1);
-  }
-  /* Merge children in subtrees. */
-  BVHNode *children4[4];
-  for (int i = 0; i < num_children; ++i) {
-    children4[i] = bvh_node_merge_children_recursively(children[i]);
-  }
-  /* Allocate new node. */
-  BVHNode *node4 = new InnerNode(node->bounds, children4, num_children);
-  /* TODO(sergey): Consider doing this from the InnerNode() constructor.
-   * But in order to do this nicely need to think of how to pass all the
-   * parameters there. */
-  if (node->is_unaligned) {
-    node4->is_unaligned = true;
-    node4->aligned_space = new Transform();
-    *node4->aligned_space = *node->aligned_space;
-  }
-  return node4;
-}
-
-}  // namespace
-
-BVHNode *BVH4::widen_children_nodes(const BVHNode *root)
-{
-  if (root == NULL) {
-    return NULL;
-  }
-  if (root->is_leaf()) {
-    return const_cast<BVHNode *>(root);
-  }
-  BVHNode *root4 = bvh_node_merge_children_recursively(root);
-  /* TODO(sergey): Pack children nodes to parents which has less that 4
-   * children. */
-  return root4;
-}
-
-void BVH4::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
-{
-  float4 data[BVH_QNODE_LEAF_SIZE];
-  memset(data, 0, sizeof(data));
-  if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
-    /* object */
-    data[0].x = __int_as_float(~(leaf->lo));
-    data[0].y = __int_as_float(0);
-  }
-  else {
-    /* triangle */
-    data[0].x = __int_as_float(leaf->lo);
-    data[0].y = __int_as_float(leaf->hi);
-  }
-  data[0].z = __uint_as_float(leaf->visibility);
-  if (leaf->num_triangles() != 0) {
-    data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
-  }
-
-  memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
-}
-
-void BVH4::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
-  bool has_unaligned = false;
-  /* Check whether we have to create unaligned node or all nodes are aligned
-   * and we can cut some corner here.
-   */
-  if (params.use_unaligned_nodes) {
-    for (int i = 0; i < num; i++) {
-      if (en[i].node->is_unaligned) {
-        has_unaligned = true;
-        break;
-      }
-    }
-  }
-  if (has_unaligned) {
-    /* There's no unaligned children, pack into AABB node. */
-    pack_unaligned_inner(e, en, num);
-  }
-  else {
-    /* Create unaligned node with orientation transform for each of the
-     * children.
-     */
-    pack_aligned_inner(e, en, num);
-  }
-}
-
-void BVH4::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
-  BoundBox bounds[4];
-  int child[4];
-  for (int i = 0; i < num; ++i) {
-    bounds[i] = en[i].node->bounds;
-    child[i] = en[i].encodeIdx();
-  }
-  pack_aligned_node(
-      e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
-}
-
-void BVH4::pack_aligned_node(int idx,
-                             const BoundBox *bounds,
-                             const int *child,
-                             const uint visibility,
-                             const float time_from,
-                             const float time_to,
-                             const int num)
-{
-  float4 data[BVH_QNODE_SIZE];
-  memset(data, 0, sizeof(data));
-
-  data[0].x = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
-  data[0].y = time_from;
-  data[0].z = time_to;
-
-  for (int i = 0; i < num; i++) {
-    float3 bb_min = bounds[i].min;
-    float3 bb_max = bounds[i].max;
-
-    data[1][i] = bb_min.x;
-    data[2][i] = bb_max.x;
-    data[3][i] = bb_min.y;
-    data[4][i] = bb_max.y;
-    data[5][i] = bb_min.z;
-    data[6][i] = bb_max.z;
-
-    data[7][i] = __int_as_float(child[i]);
-  }
-
-  for (int i = num; i < 4; i++) {
-    /* We store BB which would never be recorded as intersection
-     * so kernel might safely assume there are always 4 child nodes.
-     */
-    data[1][i] = FLT_MAX;
-    data[2][i] = -FLT_MAX;
-
-    data[3][i] = FLT_MAX;
-    data[4][i] = -FLT_MAX;
-
-    data[5][i] = FLT_MAX;
-    data[6][i] = -FLT_MAX;
-
-    data[7][i] = __int_as_float(0);
-  }
-
-  memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_QNODE_SIZE);
-}
-
-void BVH4::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
-  Transform aligned_space[4];
-  BoundBox bounds[4];
-  int child[4];
-  for (int i = 0; i < num; ++i) {
-    aligned_space[i] = en[i].node->get_aligned_space();
-    bounds[i] = en[i].node->bounds;
-    child[i] = en[i].encodeIdx();
-  }
-  pack_unaligned_node(e.idx,
-                      aligned_space,
-                      bounds,
-                      child,
-                      e.node->visibility,
-                      e.node->time_from,
-                      e.node->time_to,
-                      num);
-}
-
-void BVH4::pack_unaligned_node(int idx,
-                               const Transform *aligned_space,
-                               const BoundBox *bounds,
-                               const int *child,
-                               const uint visibility,
-                               const float time_from,
-                               const float time_to,
-                               const int num)
-{
-  float4 data[BVH_UNALIGNED_QNODE_SIZE];
-  memset(data, 0, sizeof(data));
-
-  data[0].x = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
-  data[0].y = time_from;
-  data[0].z = time_to;
-
-  for (int i = 0; i < num; i++) {
-    Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
-
-    data[1][i] = space.x.x;
-    data[2][i] = space.x.y;
-    data[3][i] = space.x.z;
-
-    data[4][i] = space.y.x;
-    data[5][i] = space.y.y;
-    data[6][i] = space.y.z;
-
-    data[7][i] = space.z.x;
-    data[8][i] = space.z.y;
-    data[9][i] = space.z.z;
-
-    data[10][i] = space.x.w;
-    data[11][i] = space.y.w;
-    data[12][i] = space.z.w;
-
-    data[13][i] = __int_as_float(child[i]);
-  }
-
-  for (int i = num; i < 4; i++) {
-    /* We store BB which would never be recorded as intersection
-     * so kernel might safely assume there are always 4 child nodes.
-     */
-
-    data[1][i] = NAN;
-    data[2][i] = NAN;
-    data[3][i] = NAN;
-
-    data[4][i] = NAN;
-    data[5][i] = NAN;
-    data[6][i] = NAN;
-
-    data[7][i] = NAN;
-    data[8][i] = NAN;
-    data[9][i] = NAN;
-
-    data[10][i] = NAN;
-    data[11][i] = NAN;
-    data[12][i] = NAN;
-
-    data[13][i] = __int_as_float(0);
-  }
-
-  memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_QNODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void BVH4::pack_nodes(const BVHNode *root)
-{
-  /* Calculate size of the arrays required. */
-  const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
-  const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
-  assert(num_leaf_nodes <= num_nodes);
-  const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
-  size_t node_size;
-  if (params.use_unaligned_nodes) {
-    const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
-    node_size = (num_unaligned_nodes * BVH_UNALIGNED_QNODE_SIZE) +
-                (num_inner_nodes - num_unaligned_nodes) * BVH_QNODE_SIZE;
-  }
-  else {
-    node_size = num_inner_nodes * BVH_QNODE_SIZE;
-  }
-  /* Resize arrays. */
-  pack.nodes.clear();
-  pack.leaf_nodes.clear();
-  /* For top level BVH, first merge existing BVH's so we know the offsets. */
-  if (params.top_level) {
-    pack_instances(node_size, num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
-  }
-  else {
-    pack.nodes.resize(node_size);
-    pack.leaf_nodes.resize(num_leaf_nodes * BVH_QNODE_LEAF_SIZE);
-  }
-
-  int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
-  vector<BVHStackEntry> stack;
-  stack.reserve(BVHParams::MAX_DEPTH * 2);
-  if (root->is_leaf()) {
-    stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
-  }
-  else {
-    stack.push_back(BVHStackEntry(root, nextNodeIdx));
-    nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
-  }
-
-  while (stack.size()) {
-    BVHStackEntry e = stack.back();
-    stack.pop_back();
-
-    if (e.node->is_leaf()) {
-      /* leaf node */
-      const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
-      pack_leaf(e, leaf);
-    }
-    else {
-      /* Inner node. */
-      /* Collect nodes. */
-      const BVHNode *children[4];
-      const int num_children = e.node->num_children();
-      /* Push entries on the stack. */
-      for (int i = 0; i < num_children; ++i) {
-        int idx;
-        children[i] = e.node->get_child(i);
-        assert(children[i] != NULL);
-        if (children[i]->is_leaf()) {
-          idx = nextLeafNodeIdx++;
-        }
-        else {
-          idx = nextNodeIdx;
-          nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_QNODE_SIZE : BVH_QNODE_SIZE;
-        }
-        stack.push_back(BVHStackEntry(children[i], idx));
-      }
-      /* Set node. */
-      pack_inner(e, &stack[stack.size() - num_children], num_children);
-    }
-  }
-
-  assert(node_size == nextNodeIdx);
-  /* Root index to start traversal at, to handle case of single leaf node. */
-  pack.root_index = (root->is_leaf()) ? -1 : 0;
-}
-
-void BVH4::refit_nodes()
-{
-  assert(!params.top_level);
-
-  BoundBox bbox = BoundBox::empty;
-  uint visibility = 0;
-  refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
-}
-
-void BVH4::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
-{
-  if (leaf) {
-    /* Refit leaf node. */
-    int4 *data = &pack.leaf_nodes[idx];
-    int4 c = data[0];
-
-    BVH::refit_primitives(c.x, c.y, bbox, visibility);
-
-    /* TODO(sergey): This is actually a copy of pack_leaf(),
-     * but this chunk of code only knows actual data and has
-     * no idea about BVHNode.
-     *
-     * Would be nice to de-duplicate code, but trying to make
-     * making code more general ends up in much nastier code
-     * in my opinion so far.
-     *
-     * Same applies to the inner nodes case below.
-     */
-    float4 leaf_data[BVH_QNODE_LEAF_SIZE];
-    leaf_data[0].x = __int_as_float(c.x);
-    leaf_data[0].y = __int_as_float(c.y);
-    leaf_data[0].z = __uint_as_float(visibility);
-    leaf_data[0].w = __uint_as_float(c.w);
-    memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_QNODE_LEAF_SIZE);
-  }
-  else {
-    int4 *data = &pack.nodes[idx];
-    bool is_unaligned = (data[0].x & PATH_RAY_NODE_UNALIGNED) != 0;
-    int4 c;
-    if (is_unaligned) {
-      c = data[13];
-    }
-    else {
-      c = data[7];
-    }
-    /* Refit inner node, set bbox from children. */
-    BoundBox child_bbox[4] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
-    uint child_visibility[4] = {0};
-    int num_nodes = 0;
-
-    for (int i = 0; i < 4; ++i) {
-      if (c[i] != 0) {
-        refit_node((c[i] < 0) ? -c[i] - 1 : c[i], (c[i] < 0), child_bbox[i], child_visibility[i]);
-        ++num_nodes;
-        bbox.grow(child_bbox[i]);
-        visibility |= child_visibility[i];
-      }
-    }
-
-    if (is_unaligned) {
-      Transform aligned_space[4] = {
-          transform_identity(), transform_identity(), transform_identity(), transform_identity()};
-      pack_unaligned_node(
-          idx, aligned_space, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
-    }
-    else {
-      pack_aligned_node(idx, child_bbox, &c[0], visibility, 0.0f, 1.0f, num_nodes);
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh4.h b/intern/cycles/bvh/bvh4.h
deleted file mode 100644
index afbb9007afb..00000000000
--- a/intern/cycles/bvh/bvh4.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Adapted from code copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __BVH4_H__
-#define __BVH4_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHNode;
-struct BVHStackEntry;
-class BVHParams;
-class BoundBox;
-class LeafNode;
-class Object;
-class Progress;
-
-#define BVH_QNODE_SIZE 8
-#define BVH_QNODE_LEAF_SIZE 1
-#define BVH_UNALIGNED_QNODE_SIZE 14
-
-/* BVH4
- *
- * Quad BVH, with each node having four children, to use with SIMD instructions.
- */
-class BVH4 : public BVH {
- protected:
-  /* constructor */
-  friend class BVH;
-  BVH4(const BVHParams &params,
-       const vector<Geometry *> &geometry,
-       const vector<Object *> &objects);
-
-  /* Building process. */
-  virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
-
-  /* pack */
-  void pack_nodes(const BVHNode *root) override;
-
-  void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
-  void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-
-  void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-  void pack_aligned_node(int idx,
-                         const BoundBox *bounds,
-                         const int *child,
-                         const uint visibility,
-                         const float time_from,
-                         const float time_to,
-                         const int num);
-
-  void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-  void pack_unaligned_node(int idx,
-                           const Transform *aligned_space,
-                           const BoundBox *bounds,
-                           const int *child,
-                           const uint visibility,
-                           const float time_from,
-                           const float time_to,
-                           const int num);
-
-  /* refit */
-  void refit_nodes() override;
-  void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH4_H__ */
diff --git a/intern/cycles/bvh/bvh8.cpp b/intern/cycles/bvh/bvh8.cpp
deleted file mode 100644
index b805865b2c8..00000000000
--- a/intern/cycles/bvh/bvh8.cpp
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Original code Copyright 2017, Intel Corporation
- * Modifications Copyright 2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "bvh/bvh8.h"
-
-#include "render/hair.h"
-#include "render/mesh.h"
-#include "render/object.h"
-
-#include "bvh/bvh_node.h"
-#include "bvh/bvh_unaligned.h"
-
-CCL_NAMESPACE_BEGIN
-
-BVH8::BVH8(const BVHParams &params_,
-           const vector<Geometry *> &geometry_,
-           const vector<Object *> &objects_)
-    : BVH(params_, geometry_, objects_)
-{
-}
-
-namespace {
-
-BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
-{
-  if (node->is_leaf()) {
-    return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
-  }
-  /* Collect nodes of two layer deeper, allowing us to have more childrem in
-   * an inner layer. */
-  assert(node->num_children() <= 2);
-  const BVHNode *children[8];
-  const BVHNode *child0 = node->get_child(0);
-  const BVHNode *child1 = node->get_child(1);
-  int num_children = 0;
-  if (child0->is_leaf()) {
-    children[num_children++] = child0;
-  }
-  else {
-    const BVHNode *child00 = child0->get_child(0), *child01 = child0->get_child(1);
-    if (child00->is_leaf()) {
-      children[num_children++] = child00;
-    }
-    else {
-      children[num_children++] = child00->get_child(0);
-      children[num_children++] = child00->get_child(1);
-    }
-    if (child01->is_leaf()) {
-      children[num_children++] = child01;
-    }
-    else {
-      children[num_children++] = child01->get_child(0);
-      children[num_children++] = child01->get_child(1);
-    }
-  }
-  if (child1->is_leaf()) {
-    children[num_children++] = child1;
-  }
-  else {
-    const BVHNode *child10 = child1->get_child(0), *child11 = child1->get_child(1);
-    if (child10->is_leaf()) {
-      children[num_children++] = child10;
-    }
-    else {
-      children[num_children++] = child10->get_child(0);
-      children[num_children++] = child10->get_child(1);
-    }
-    if (child11->is_leaf()) {
-      children[num_children++] = child11;
-    }
-    else {
-      children[num_children++] = child11->get_child(0);
-      children[num_children++] = child11->get_child(1);
-    }
-  }
-  /* Merge children in subtrees. */
-  BVHNode *children4[8];
-  for (int i = 0; i < num_children; ++i) {
-    children4[i] = bvh_node_merge_children_recursively(children[i]);
-  }
-  /* Allocate new node. */
-  BVHNode *node8 = new InnerNode(node->bounds, children4, num_children);
-  /* TODO(sergey): Consider doing this from the InnerNode() constructor.
-   * But in order to do this nicely need to think of how to pass all the
-   * parameters there. */
-  if (node->is_unaligned) {
-    node8->is_unaligned = true;
-    node8->aligned_space = new Transform();
-    *node8->aligned_space = *node->aligned_space;
-  }
-  return node8;
-}
-
-}  // namespace
-
-BVHNode *BVH8::widen_children_nodes(const BVHNode *root)
-{
-  if (root == NULL) {
-    return NULL;
-  }
-  if (root->is_leaf()) {
-    return const_cast<BVHNode *>(root);
-  }
-  BVHNode *root8 = bvh_node_merge_children_recursively(root);
-  /* TODO(sergey): Pack children nodes to parents which has less that 4
-   * children. */
-  return root8;
-}
-
-void BVH8::pack_leaf(const BVHStackEntry &e, const LeafNode *leaf)
-{
-  float4 data[BVH_ONODE_LEAF_SIZE];
-  memset(data, 0, sizeof(data));
-  if (leaf->num_triangles() == 1 && pack.prim_index[leaf->lo] == -1) {
-    /* object */
-    data[0].x = __int_as_float(~(leaf->lo));
-    data[0].y = __int_as_float(0);
-  }
-  else {
-    /* triangle */
-    data[0].x = __int_as_float(leaf->lo);
-    data[0].y = __int_as_float(leaf->hi);
-  }
-  data[0].z = __uint_as_float(leaf->visibility);
-  if (leaf->num_triangles() != 0) {
-    data[0].w = __uint_as_float(pack.prim_type[leaf->lo]);
-  }
-
-  memcpy(&pack.leaf_nodes[e.idx], data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
-}
-
-void BVH8::pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
-  bool has_unaligned = false;
-  /* Check whether we have to create unaligned node or all nodes are aligned
-   * and we can cut some corner here.
-   */
-  if (params.use_unaligned_nodes) {
-    for (int i = 0; i < num; i++) {
-      if (en[i].node->is_unaligned) {
-        has_unaligned = true;
-        break;
-      }
-    }
-  }
-  if (has_unaligned) {
-    /* There's no unaligned children, pack into AABB node. */
-    pack_unaligned_inner(e, en, num);
-  }
-  else {
-    /* Create unaligned node with orientation transform for each of the
-     * children.
-     */
-    pack_aligned_inner(e, en, num);
-  }
-}
-
-void BVH8::pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
-  BoundBox bounds[8];
-  int child[8];
-  for (int i = 0; i < num; ++i) {
-    bounds[i] = en[i].node->bounds;
-    child[i] = en[i].encodeIdx();
-  }
-  pack_aligned_node(
-      e.idx, bounds, child, e.node->visibility, e.node->time_from, e.node->time_to, num);
-}
-
-void BVH8::pack_aligned_node(int idx,
-                             const BoundBox *bounds,
-                             const int *child,
-                             const uint visibility,
-                             const float time_from,
-                             const float time_to,
-                             const int num)
-{
-  float8 data[8];
-  memset(data, 0, sizeof(data));
-
-  data[0].a = __uint_as_float(visibility & ~PATH_RAY_NODE_UNALIGNED);
-  data[0].b = time_from;
-  data[0].c = time_to;
-
-  for (int i = 0; i < num; i++) {
-    float3 bb_min = bounds[i].min;
-    float3 bb_max = bounds[i].max;
-
-    data[1][i] = bb_min.x;
-    data[2][i] = bb_max.x;
-    data[3][i] = bb_min.y;
-    data[4][i] = bb_max.y;
-    data[5][i] = bb_min.z;
-    data[6][i] = bb_max.z;
-
-    data[7][i] = __int_as_float(child[i]);
-  }
-
-  for (int i = num; i < 8; i++) {
-    /* We store BB which would never be recorded as intersection
-     * so kernel might safely assume there are always 4 child nodes.
-     */
-    data[1][i] = FLT_MAX;
-    data[2][i] = -FLT_MAX;
-
-    data[3][i] = FLT_MAX;
-    data[4][i] = -FLT_MAX;
-
-    data[5][i] = FLT_MAX;
-    data[6][i] = -FLT_MAX;
-
-    data[7][i] = __int_as_float(0);
-  }
-
-  memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_ONODE_SIZE);
-}
-
-void BVH8::pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num)
-{
-  Transform aligned_space[8];
-  BoundBox bounds[8];
-  int child[8];
-  for (int i = 0; i < num; ++i) {
-    aligned_space[i] = en[i].node->get_aligned_space();
-    bounds[i] = en[i].node->bounds;
-    child[i] = en[i].encodeIdx();
-  }
-  pack_unaligned_node(e.idx,
-                      aligned_space,
-                      bounds,
-                      child,
-                      e.node->visibility,
-                      e.node->time_from,
-                      e.node->time_to,
-                      num);
-}
-
-void BVH8::pack_unaligned_node(int idx,
-                               const Transform *aligned_space,
-                               const BoundBox *bounds,
-                               const int *child,
-                               const uint visibility,
-                               const float time_from,
-                               const float time_to,
-                               const int num)
-{
-  float8 data[BVH_UNALIGNED_ONODE_SIZE];
-  memset(data, 0, sizeof(data));
-
-  data[0].a = __uint_as_float(visibility | PATH_RAY_NODE_UNALIGNED);
-  data[0].b = time_from;
-  data[0].c = time_to;
-
-  for (int i = 0; i < num; i++) {
-    Transform space = BVHUnaligned::compute_node_transform(bounds[i], aligned_space[i]);
-
-    data[1][i] = space.x.x;
-    data[2][i] = space.x.y;
-    data[3][i] = space.x.z;
-
-    data[4][i] = space.y.x;
-    data[5][i] = space.y.y;
-    data[6][i] = space.y.z;
-
-    data[7][i] = space.z.x;
-    data[8][i] = space.z.y;
-    data[9][i] = space.z.z;
-
-    data[10][i] = space.x.w;
-    data[11][i] = space.y.w;
-    data[12][i] = space.z.w;
-
-    data[13][i] = __int_as_float(child[i]);
-  }
-
-  for (int i = num; i < 8; i++) {
-    /* We store BB which would never be recorded as intersection
-     * so kernel might safely assume there are always 4 child nodes.
-     */
-
-    data[1][i] = NAN;
-    data[2][i] = NAN;
-    data[3][i] = NAN;
-
-    data[4][i] = NAN;
-    data[5][i] = NAN;
-    data[6][i] = NAN;
-
-    data[7][i] = NAN;
-    data[8][i] = NAN;
-    data[9][i] = NAN;
-
-    data[10][i] = NAN;
-    data[11][i] = NAN;
-    data[12][i] = NAN;
-
-    data[13][i] = __int_as_float(0);
-  }
-
-  memcpy(&pack.nodes[idx], data, sizeof(float4) * BVH_UNALIGNED_ONODE_SIZE);
-}
-
-/* Quad SIMD Nodes */
-
-void BVH8::pack_nodes(const BVHNode *root)
-{
-  /* Calculate size of the arrays required. */
-  const size_t num_nodes = root->getSubtreeSize(BVH_STAT_NODE_COUNT);
-  const size_t num_leaf_nodes = root->getSubtreeSize(BVH_STAT_LEAF_COUNT);
-  assert(num_leaf_nodes <= num_nodes);
-  const size_t num_inner_nodes = num_nodes - num_leaf_nodes;
-  size_t node_size;
-  if (params.use_unaligned_nodes) {
-    const size_t num_unaligned_nodes = root->getSubtreeSize(BVH_STAT_UNALIGNED_INNER_COUNT);
-    node_size = (num_unaligned_nodes * BVH_UNALIGNED_ONODE_SIZE) +
-                (num_inner_nodes - num_unaligned_nodes) * BVH_ONODE_SIZE;
-  }
-  else {
-    node_size = num_inner_nodes * BVH_ONODE_SIZE;
-  }
-  /* Resize arrays. */
-  pack.nodes.clear();
-  pack.leaf_nodes.clear();
-  /* For top level BVH, first merge existing BVH's so we know the offsets. */
-  if (params.top_level) {
-    pack_instances(node_size, num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
-  }
-  else {
-    pack.nodes.resize(node_size);
-    pack.leaf_nodes.resize(num_leaf_nodes * BVH_ONODE_LEAF_SIZE);
-  }
-
-  int nextNodeIdx = 0, nextLeafNodeIdx = 0;
-
-  vector<BVHStackEntry> stack;
-  stack.reserve(BVHParams::MAX_DEPTH * 2);
-  if (root->is_leaf()) {
-    stack.push_back(BVHStackEntry(root, nextLeafNodeIdx++));
-  }
-  else {
-    stack.push_back(BVHStackEntry(root, nextNodeIdx));
-    nextNodeIdx += root->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
-  }
-
-  while (stack.size()) {
-    BVHStackEntry e = stack.back();
-    stack.pop_back();
-
-    if (e.node->is_leaf()) {
-      /* leaf node */
-      const LeafNode *leaf = reinterpret_cast<const LeafNode *>(e.node);
-      pack_leaf(e, leaf);
-    }
-    else {
-      /* Inner node. */
-      /* Collect nodes. */
-      const BVHNode *children[8];
-      int num_children = e.node->num_children();
-      /* Push entries on the stack. */
-      for (int i = 0; i < num_children; ++i) {
-        int idx;
-        children[i] = e.node->get_child(i);
-        if (children[i]->is_leaf()) {
-          idx = nextLeafNodeIdx++;
-        }
-        else {
-          idx = nextNodeIdx;
-          nextNodeIdx += children[i]->has_unaligned() ? BVH_UNALIGNED_ONODE_SIZE : BVH_ONODE_SIZE;
-        }
-        stack.push_back(BVHStackEntry(children[i], idx));
-      }
-      /* Set node. */
-      pack_inner(e, &stack[stack.size() - num_children], num_children);
-    }
-  }
-
-  assert(node_size == nextNodeIdx);
-  /* Root index to start traversal at, to handle case of single leaf node. */
-  pack.root_index = (root->is_leaf()) ? -1 : 0;
-}
-
-void BVH8::refit_nodes()
-{
-  assert(!params.top_level);
-
-  BoundBox bbox = BoundBox::empty;
-  uint visibility = 0;
-  refit_node(0, (pack.root_index == -1) ? true : false, bbox, visibility);
-}
-
-void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility)
-{
-  if (leaf) {
-    int4 *data = &pack.leaf_nodes[idx];
-    int4 c = data[0];
-    /* Refit leaf node. */
-    for (int prim = c.x; prim < c.y; prim++) {
-      int pidx = pack.prim_index[prim];
-      int tob = pack.prim_object[prim];
-      Object *ob = objects[tob];
-
-      if (pidx == -1) {
-        /* Object instance. */
-        bbox.grow(ob->bounds);
-      }
-      else {
-        /* Primitives. */
-        if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
-          /* Curves. */
-          const Hair *hair = static_cast<const Hair *>(ob->geometry);
-          int prim_offset = (params.top_level) ? hair->prim_offset : 0;
-          Hair::Curve curve = hair->get_curve(pidx - prim_offset);
-          int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
-
-          curve.bounds_grow(k, &hair->curve_keys[0], &hair->curve_radius[0], bbox);
-
-          /* Motion curves. */
-          if (hair->use_motion_blur) {
-            Attribute *attr = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-            if (attr) {
-              size_t hair_size = hair->curve_keys.size();
-              size_t steps = hair->motion_steps - 1;
-              float3 *key_steps = attr->data_float3();
-
-              for (size_t i = 0; i < steps; i++) {
-                curve.bounds_grow(k, key_steps + i * hair_size, &hair->curve_radius[0], bbox);
-              }
-            }
-          }
-        }
-        else {
-          /* Triangles. */
-          const Mesh *mesh = static_cast<const Mesh *>(ob->geometry);
-          int prim_offset = (params.top_level) ? mesh->prim_offset : 0;
-          Mesh::Triangle triangle = mesh->get_triangle(pidx - prim_offset);
-          const float3 *vpos = &mesh->verts[0];
-
-          triangle.bounds_grow(vpos, bbox);
-
-          /* Motion triangles. */
-          if (mesh->use_motion_blur) {
-            Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-
-            if (attr) {
-              size_t mesh_size = mesh->verts.size();
-              size_t steps = mesh->motion_steps - 1;
-              float3 *vert_steps = attr->data_float3();
-
-              for (size_t i = 0; i < steps; i++) {
-                triangle.bounds_grow(vert_steps + i * mesh_size, bbox);
-              }
-            }
-          }
-        }
-      }
-
-      visibility |= ob->visibility;
-    }
-
-    float4 leaf_data[BVH_ONODE_LEAF_SIZE];
-    leaf_data[0].x = __int_as_float(c.x);
-    leaf_data[0].y = __int_as_float(c.y);
-    leaf_data[0].z = __uint_as_float(visibility);
-    leaf_data[0].w = __uint_as_float(c.w);
-    memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE);
-  }
-  else {
-    float8 *data = (float8 *)&pack.nodes[idx];
-    bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0;
-    /* Refit inner node, set bbox from children. */
-    BoundBox child_bbox[8] = {BoundBox::empty,
-                              BoundBox::empty,
-                              BoundBox::empty,
-                              BoundBox::empty,
-                              BoundBox::empty,
-                              BoundBox::empty,
-                              BoundBox::empty,
-                              BoundBox::empty};
-    int child[8];
-    uint child_visibility[8] = {0};
-    int num_nodes = 0;
-
-    for (int i = 0; i < 8; ++i) {
-      child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]);
-
-      if (child[i] != 0) {
-        refit_node((child[i] < 0) ? -child[i] - 1 : child[i],
-                   (child[i] < 0),
-                   child_bbox[i],
-                   child_visibility[i]);
-        ++num_nodes;
-        bbox.grow(child_bbox[i]);
-        visibility |= child_visibility[i];
-      }
-    }
-
-    if (is_unaligned) {
-      Transform aligned_space[8] = {transform_identity(),
-                                    transform_identity(),
-                                    transform_identity(),
-                                    transform_identity(),
-                                    transform_identity(),
-                                    transform_identity(),
-                                    transform_identity(),
-                                    transform_identity()};
-      pack_unaligned_node(
-          idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
-    }
-    else {
-      pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes);
-    }
-  }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh8.h b/intern/cycles/bvh/bvh8.h
deleted file mode 100644
index d23fa528e3e..00000000000
--- a/intern/cycles/bvh/bvh8.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Original code Copyright 2017, Intel Corporation
- * Modifications Copyright 2018, Blender Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BVH8_H__
-#define __BVH8_H__
-
-#include "bvh/bvh.h"
-#include "bvh/bvh_params.h"
-
-#include "util/util_types.h"
-#include "util/util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-class BVHNode;
-struct BVHStackEntry;
-class BVHParams;
-class BoundBox;
-class LeafNode;
-class Object;
-class Progress;
-
-#define BVH_ONODE_SIZE 16
-#define BVH_ONODE_LEAF_SIZE 1
-#define BVH_UNALIGNED_ONODE_SIZE 28
-
-/* BVH8
- *
- * Octo BVH, with each node having eight children, to use with SIMD instructions.
- */
-class BVH8 : public BVH {
- protected:
-  /* constructor */
-  friend class BVH;
-  BVH8(const BVHParams &params,
-       const vector<Geometry *> &geometry,
-       const vector<Object *> &objects);
-
-  /* Building process. */
-  virtual BVHNode *widen_children_nodes(const BVHNode *root) override;
-
-  /* pack */
-  void pack_nodes(const BVHNode *root) override;
-
-  void pack_leaf(const BVHStackEntry &e, const LeafNode *leaf);
-  void pack_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-
-  void pack_aligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-  void pack_aligned_node(int idx,
-                         const BoundBox *bounds,
-                         const int *child,
-                         const uint visibility,
-                         const float time_from,
-                         const float time_to,
-                         const int num);
-
-  void pack_unaligned_inner(const BVHStackEntry &e, const BVHStackEntry *en, int num);
-  void pack_unaligned_node(int idx,
-                           const Transform *aligned_space,
-                           const BoundBox *bounds,
-                           const int *child,
-                           const uint visibility,
-                           const float time_from,
-                           const float time_to,
-                           const int num);
-
-  /* refit */
-  void refit_nodes() override;
-  void refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility);
-};
-
-CCL_NAMESPACE_END
-
-#endif /* __BVH8_H__ */
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index bc85d9386ad..a36c76c852a 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -345,17 +345,6 @@ class CPUDevice : public Device {
   virtual BVHLayoutMask get_bvh_layout_mask() const
   {
     BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
-    if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
-      bvh_layout_mask |= BVH_LAYOUT_BVH4;
-    }
-    /* MSVC does not support the -march=native switch and you always end up  */
-    /* with an sse2 kernel when you use WITH_KERNEL_NATIVE. We *cannot* feed */
-    /* that kernel BVH8 even if the CPU flags would allow for it. */
-#if (defined(__x86_64__) || defined(_M_X64)) && !(defined(_MSC_VER) && defined(WITH_KERNEL_NATIVE))
-    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
-      bvh_layout_mask |= BVH_LAYOUT_BVH8;
-    }
-#endif
 #ifdef WITH_EMBREE
     bvh_layout_mask |= BVH_LAYOUT_EMBREE;
 #endif /* WITH_EMBREE */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 35339abff45..7cc0d32d521 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -81,18 +81,6 @@ set(SRC_BVH_HEADERS
   bvh/bvh_types.h
   bvh/bvh_volume.h
   bvh/bvh_volume_all.h
-  bvh/qbvh_nodes.h
-  bvh/qbvh_shadow_all.h
-  bvh/qbvh_local.h
-  bvh/qbvh_traversal.h
-  bvh/qbvh_volume.h
-  bvh/qbvh_volume_all.h
-  bvh/obvh_nodes.h
-  bvh/obvh_shadow_all.h
-  bvh/obvh_local.h
-  bvh/obvh_traversal.h
-  bvh/obvh_volume.h
-  bvh/obvh_volume_all.h
   bvh/bvh_embree.h
 )
 
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index bf48d3dd826..80b58f46329 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -35,14 +35,6 @@ CCL_NAMESPACE_BEGIN
 
 #ifndef __KERNEL_OPTIX__
 
-/* Common QBVH functions. */
-#  ifdef __QBVH__
-#    include "kernel/bvh/qbvh_nodes.h"
-#    ifdef __KERNEL_AVX2__
-#      include "kernel/bvh/obvh_nodes.h"
-#    endif
-#  endif
-
 /* Regular BVH traversal */
 
 #  include "kernel/bvh/bvh_nodes.h"
diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h
index 7a069ef1108..4006c9c1632 100644
--- a/intern/cycles/kernel/bvh/bvh_local.h
+++ b/intern/cycles/kernel/bvh/bvh_local.h
@@ -17,13 +17,6 @@
  * limitations under the License.
  */
 
-#ifdef __QBVH__
-#  include "kernel/bvh/qbvh_local.h"
-#  ifdef __KERNEL_AVX2__
-#    include "kernel/bvh/obvh_local.h"
-#  endif
-#endif
-
 #if BVH_FEATURE(BVH_HAIR)
 #  define NODE_INTERSECT bvh_node_intersect
 #else
@@ -88,26 +81,6 @@ ccl_device_inline
     object = local_object;
   }
 
-#if defined(__KERNEL_SSE2__)
-  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
-  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-  ssef Psplat[3], idirsplat[3];
-#  if BVH_FEATURE(BVH_HAIR)
-  ssef tnear(0.0f), tfar(isect_t);
-#  endif
-  shuffle_swap_t shufflexyz[3];
-
-  Psplat[0] = ssef(P.x);
-  Psplat[1] = ssef(P.y);
-  Psplat[2] = ssef(P.z);
-
-  ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
-  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
   /* traversal loop */
   do {
     do {
@@ -117,33 +90,16 @@ ccl_device_inline
         float dist[2];
         float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
-#if !defined(__KERNEL_SSE2__)
         traverse_mask = NODE_INTERSECT(kg,
                                        P,
-#  if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
                                        dir,
-#  endif
+#endif
                                        idir,
                                        isect_t,
                                        node_addr,
                                        PATH_RAY_ALL_VISIBILITY,
                                        dist);
-#else  // __KERNEL_SSE2__
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-                                       dir,
-#  if BVH_FEATURE(BVH_HAIR)
-                                       tnear,
-                                       tfar,
-#  endif
-                                       tsplat,
-                                       Psplat,
-                                       idirsplat,
-                                       shufflexyz,
-                                       node_addr,
-                                       PATH_RAY_ALL_VISIBILITY,
-                                       dist);
-#endif  // __KERNEL_SSE2__
 
         node_addr = __float_as_int(cnodes.z);
         node_addr_child1 = __float_as_int(cnodes.w);
@@ -247,20 +203,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          uint *lcg_state,
                                          int max_hits)
 {
-  switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
-    case BVH_LAYOUT_BVH8:
-      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
-#ifdef __QBVH__
-    case BVH_LAYOUT_BVH4:
-      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-#endif
-    case BVH_LAYOUT_BVH2:
-      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
-  }
-  kernel_assert(!"Should not happen");
-  return false;
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits);
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index db598d1c7fa..5367bdb633c 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -28,7 +28,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
   return space;
 }
 
-#if !defined(__KERNEL_SSE2__)
 ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
                                                       const float3 P,
                                                       const float3 idir,
@@ -39,9 +38,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
 {
 
   /* fetch node data */
-#  ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
   float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#  endif
+#endif
   float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
   float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
   float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
@@ -68,13 +67,13 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
   dist[0] = c0min;
   dist[1] = c1min;
 
-#  ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
   /* this visibility test gives a 5% performance hit, how to solve? */
   return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
          (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-#  else
+#else
   return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0);
-#  endif
+#endif
 }
 
 ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg,
@@ -113,21 +112,21 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
                                                         float dist[2])
 {
   int mask = 0;
-#  ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
   float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-#  endif
+#endif
   if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
-#  ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
     if ((__float_as_uint(cnodes.x) & visibility))
-#  endif
+#endif
     {
       mask |= 1;
     }
   }
   if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) {
-#  ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
     if ((__float_as_uint(cnodes.y) & visibility))
-#  endif
+#endif
     {
       mask |= 2;
     }
@@ -152,125 +151,3 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
     return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist);
   }
 }
-
-#else /* !defined(__KERNEL_SSE2__) */
-
-int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg,
-                                                      const float3 &P,
-                                                      const float3 &dir,
-                                                      const ssef &tsplat,
-                                                      const ssef Psplat[3],
-                                                      const ssef idirsplat[3],
-                                                      const shuffle_swap_t shufflexyz[3],
-                                                      const int node_addr,
-                                                      const uint visibility,
-                                                      float dist[2])
-{
-  /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
-  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-
-  /* fetch node data */
-  const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr;
-
-  /* intersect ray against child nodes */
-  const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
-  const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
-  const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
-
-  /* calculate { c0min, c1min, -c0max, -c1max} */
-  ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
-  const ssef tminmax = minmax ^ pn;
-  const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
-
-  dist[0] = tminmax[0];
-  dist[1] = tminmax[1];
-
-  int mask = movemask(lrhit);
-
-#  ifdef __VISIBILITY_FLAG__
-  /* this visibility test gives a 5% performance hit, how to solve? */
-  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-  int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
-              (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-  return cmask;
-#  else
-  return mask & 3;
-#  endif
-}
-
-ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
-                                                        const float3 P,
-                                                        const float3 dir,
-                                                        const ssef &isect_near,
-                                                        const ssef &isect_far,
-                                                        const int node_addr,
-                                                        const uint visibility,
-                                                        float dist[2])
-{
-  Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0);
-  Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1);
-
-  float3 aligned_dir0 = transform_direction(&space0, dir),
-         aligned_dir1 = transform_direction(&space1, dir);
-  float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P);
-  float3 nrdir0 = -bvh_inverse_direction(aligned_dir0),
-         nrdir1 = -bvh_inverse_direction(aligned_dir1);
-
-  ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f),
-       lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f),
-       lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f);
-
-  ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f),
-       upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f),
-       upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f);
-
-  ssef tnear_x = min(lower_x, upper_x);
-  ssef tnear_y = min(lower_y, upper_y);
-  ssef tnear_z = min(lower_z, upper_z);
-  ssef tfar_x = max(lower_x, upper_x);
-  ssef tfar_y = max(lower_y, upper_y);
-  ssef tfar_z = max(lower_z, upper_z);
-
-  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-  sseb vmask = tnear <= tfar;
-  dist[0] = tnear.f[0];
-  dist[1] = tnear.f[1];
-
-  int mask = (int)movemask(vmask);
-
-#  ifdef __VISIBILITY_FLAG__
-  /* this visibility test gives a 5% performance hit, how to solve? */
-  float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-  int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) |
-              (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0);
-  return cmask;
-#  else
-  return mask & 3;
-#  endif
-}
-
-ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg,
-                                              const float3 &P,
-                                              const float3 &dir,
-                                              const ssef &isect_near,
-                                              const ssef &isect_far,
-                                              const ssef &tsplat,
-                                              const ssef Psplat[3],
-                                              const ssef idirsplat[3],
-                                              const shuffle_swap_t shufflexyz[3],
-                                              const int node_addr,
-                                              const uint visibility,
-                                              float dist[2])
-{
-  float4 node = kernel_tex_fetch(__bvh_nodes, node_addr);
-  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-    return bvh_unaligned_node_intersect(
-        kg, P, dir, isect_near, isect_far, node_addr, visibility, dist);
-  }
-  else {
-    return bvh_aligned_node_intersect(
-        kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist);
-  }
-}
-#endif /* !defined(__KERNEL_SSE2__) */
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 03916bfdca9..12b88f159e2 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -17,13 +17,6 @@
  * limitations under the License.
  */
 
-#ifdef __QBVH__
-#  include "kernel/bvh/qbvh_shadow_all.h"
-#  ifdef __KERNEL_AVX2__
-#    include "kernel/bvh/obvh_shadow_all.h"
-#  endif
-#endif
-
 #if BVH_FEATURE(BVH_HAIR)
 #  define NODE_INTERSECT bvh_node_intersect
 #else
@@ -80,26 +73,6 @@ ccl_device_inline
   *num_hits = 0;
   isect_array->t = tmax;
 
-#if defined(__KERNEL_SSE2__)
-  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
-  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-  ssef Psplat[3], idirsplat[3];
-#  if BVH_FEATURE(BVH_HAIR)
-  ssef tnear(0.0f), tfar(isect_t);
-#  endif
-  shuffle_swap_t shufflexyz[3];
-
-  Psplat[0] = ssef(P.x);
-  Psplat[1] = ssef(P.y);
-  Psplat[2] = ssef(P.z);
-
-  ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
-  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
   /* traversal loop */
   do {
     do {
@@ -109,33 +82,16 @@ ccl_device_inline
         float dist[2];
         float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
-#if !defined(__KERNEL_SSE2__)
         traverse_mask = NODE_INTERSECT(kg,
                                        P,
-#  if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
                                        dir,
-#  endif
+#endif
                                        idir,
                                        isect_t,
                                        node_addr,
                                        visibility,
                                        dist);
-#else  // __KERNEL_SSE2__
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-                                       dir,
-#  if BVH_FEATURE(BVH_HAIR)
-                                       tnear,
-                                       tfar,
-#  endif
-                                       tsplat,
-                                       Psplat,
-                                       idirsplat,
-                                       shufflexyz,
-                                       node_addr,
-                                       visibility,
-                                       dist);
-#endif  // __KERNEL_SSE2__
 
         node_addr = __float_as_int(cnodes.z);
         node_addr_child1 = __float_as_int(cnodes.w);
@@ -272,18 +228,6 @@ ccl_device_inline
           num_hits_in_instance = 0;
           isect_array->t = isect_t;
 
-#  if defined(__KERNEL_SSE2__)
-          Psplat[0] = ssef(P.x);
-          Psplat[1] = ssef(P.y);
-          Psplat[2] = ssef(P.z);
-
-          tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-#    if BVH_FEATURE(BVH_HAIR)
-          tfar = ssef(isect_t);
-#    endif
-          gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
           ++stack_ptr;
           kernel_assert(stack_ptr < BVH_STACK_SIZE);
           traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -322,18 +266,6 @@ ccl_device_inline
       isect_t = tmax;
       isect_array->t = isect_t;
 
-#  if defined(__KERNEL_SSE2__)
-      Psplat[0] = ssef(P.x);
-      Psplat[1] = ssef(P.y);
-      Psplat[2] = ssef(P.z);
-
-      tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-#    if BVH_FEATURE(BVH_HAIR)
-      tfar = ssef(isect_t);
-#    endif
-      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
       object = OBJECT_NONE;
       node_addr = traversal_stack[stack_ptr];
       --stack_ptr;
@@ -350,20 +282,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          const uint max_hits,
                                          uint *num_hits)
 {
-  switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
-    case BVH_LAYOUT_BVH8:
-      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
-#ifdef __QBVH__
-    case BVH_LAYOUT_BVH4:
-      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-#endif
-    case BVH_LAYOUT_BVH2:
-      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
-  }
-  kernel_assert(!"Should not happen");
-  return false;
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits);
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 2a4677928c5..e6236c93caa 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -17,13 +17,6 @@
  * limitations under the License.
  */
 
-#ifdef __QBVH__
-#  include "kernel/bvh/qbvh_traversal.h"
-#endif
-#ifdef __KERNEL_AVX2__
-#  include "kernel/bvh/obvh_traversal.h"
-#endif
-
 #if BVH_FEATURE(BVH_HAIR)
 #  define NODE_INTERSECT bvh_node_intersect
 #else
@@ -76,26 +69,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 
   BVH_DEBUG_INIT();
 
-#if defined(__KERNEL_SSE2__)
-  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
-  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-  ssef Psplat[3], idirsplat[3];
-#  if BVH_FEATURE(BVH_HAIR)
-  ssef tnear(0.0f), tfar(isect->t);
-#  endif
-  shuffle_swap_t shufflexyz[3];
-
-  Psplat[0] = ssef(P.x);
-  Psplat[1] = ssef(P.y);
-  Psplat[2] = ssef(P.z);
-
-  ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
-  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
   /* traversal loop */
   do {
     do {
@@ -105,37 +78,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
         float dist[2];
         float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
-#if !defined(__KERNEL_SSE2__)
         {
           traverse_mask = NODE_INTERSECT(kg,
                                          P,
-#  if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
                                          dir,
-#  endif
+#endif
                                          idir,
                                          isect->t,
                                          node_addr,
                                          visibility,
                                          dist);
         }
-#else  // __KERNEL_SSE2__
-        {
-          traverse_mask = NODE_INTERSECT(kg,
-                                         P,
-                                         dir,
-#  if BVH_FEATURE(BVH_HAIR)
-                                         tnear,
-                                         tfar,
-#  endif
-                                         tsplat,
-                                         Psplat,
-                                         idirsplat,
-                                         shufflexyz,
-                                         node_addr,
-                                         visibility,
-                                         dist);
-        }
-#endif  // __KERNEL_SSE2__
 
         node_addr = __float_as_int(cnodes.z);
         node_addr_child1 = __float_as_int(cnodes.w);
@@ -188,17 +142,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
                 if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
                   /* shadow ray early termination */
-#if defined(__KERNEL_SSE2__)
                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
                     return true;
-                  tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-                  tfar = ssef(isect->t);
-#  endif
-#else
-                if (visibility & PATH_RAY_SHADOW_OPAQUE)
-                  return true;
-#endif
                 }
               }
               break;
@@ -211,17 +156,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                 if (motion_triangle_intersect(
                         kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
                   /* shadow ray early termination */
-#  if defined(__KERNEL_SSE2__)
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
-                    return true;
-                  tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#    if BVH_FEATURE(BVH_HAIR)
-                  tfar = ssef(isect->t);
-#    endif
-#  else
                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
                     return true;
-#  endif
                 }
               }
               break;
@@ -238,17 +174,8 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                     kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
                 if (hit) {
                   /* shadow ray early termination */
-#  if defined(__KERNEL_SSE2__)
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE)
-                    return true;
-                  tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#    if BVH_FEATURE(BVH_HAIR)
-                  tfar = ssef(isect->t);
-#    endif
-#  else
                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
                     return true;
-#  endif
                 }
               }
               break;
@@ -267,19 +194,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
           isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #endif
 
-#  if defined(__KERNEL_SSE2__)
-          Psplat[0] = ssef(P.x);
-          Psplat[1] = ssef(P.y);
-          Psplat[2] = ssef(P.z);
-
-          tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#    if BVH_FEATURE(BVH_HAIR)
-          tfar = ssef(isect->t);
-#    endif
-
-          gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
           ++stack_ptr;
           kernel_assert(stack_ptr < BVH_STACK_SIZE);
           traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -301,19 +215,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #endif
 
-#  if defined(__KERNEL_SSE2__)
-      Psplat[0] = ssef(P.x);
-      Psplat[1] = ssef(P.y);
-      Psplat[2] = ssef(P.z);
-
-      tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#    if BVH_FEATURE(BVH_HAIR)
-      tfar = ssef(isect->t);
-#    endif
-
-      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
       object = OBJECT_NONE;
       node_addr = traversal_stack[stack_ptr];
       --stack_ptr;
@@ -328,20 +229,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          Intersection *isect,
                                          const uint visibility)
 {
-  switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
-    case BVH_LAYOUT_BVH8:
-      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
-    case BVH_LAYOUT_BVH4:
-      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif /* __QBVH__ */
-    case BVH_LAYOUT_BVH2:
-      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
-  }
-  kernel_assert(!"Should not happen");
-  return false;
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h
index a7bc52d4435..b173568266b 100644
--- a/intern/cycles/kernel/bvh/bvh_types.h
+++ b/intern/cycles/kernel/bvh/bvh_types.h
@@ -31,8 +31,6 @@ CCL_NAMESPACE_BEGIN
 
 /* 64 object BVH + 64 mesh BVH + 64 object node splitting */
 #define BVH_STACK_SIZE 192
-#define BVH_QSTACK_SIZE 384
-#define BVH_OSTACK_SIZE 768
 /* BVH intersection function variations */
 
 #define BVH_MOTION 1
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index d8b0bbccd22..1f2ea47269b 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -17,13 +17,6 @@
  * limitations under the License.
  */
 
-#ifdef __QBVH__
-#  include "kernel/bvh/qbvh_volume.h"
-#  ifdef __KERNEL_AVX2__
-#    include "kernel/bvh/obvh_volume.h"
-#  endif
-#endif
-
 #if BVH_FEATURE(BVH_HAIR)
 #  define NODE_INTERSECT bvh_node_intersect
 #else
@@ -78,26 +71,6 @@ ccl_device_inline
   isect->prim = PRIM_NONE;
   isect->object = OBJECT_NONE;
 
-#if defined(__KERNEL_SSE2__)
-  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
-  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-  ssef Psplat[3], idirsplat[3];
-#  if BVH_FEATURE(BVH_HAIR)
-  ssef tnear(0.0f), tfar(isect->t);
-#  endif
-  shuffle_swap_t shufflexyz[3];
-
-  Psplat[0] = ssef(P.x);
-  Psplat[1] = ssef(P.y);
-  Psplat[2] = ssef(P.z);
-
-  ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
-
-  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif
-
   /* traversal loop */
   do {
     do {
@@ -107,33 +80,16 @@ ccl_device_inline
         float dist[2];
         float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
-#if !defined(__KERNEL_SSE2__)
         traverse_mask = NODE_INTERSECT(kg,
                                        P,
-#  if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
                                        dir,
-#  endif
+#endif
                                        idir,
                                        isect->t,
                                        node_addr,
                                        visibility,
                                        dist);
-#else  // __KERNEL_SSE2__
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-                                       dir,
-#  if BVH_FEATURE(BVH_HAIR)
-                                       tnear,
-                                       tfar,
-#  endif
-                                       tsplat,
-                                       Psplat,
-                                       idirsplat,
-                                       shufflexyz,
-                                       node_addr,
-                                       visibility,
-                                       dist);
-#endif  // __KERNEL_SSE2__
 
         node_addr = __float_as_int(cnodes.z);
         node_addr_child1 = __float_as_int(cnodes.w);
@@ -231,19 +187,6 @@ ccl_device_inline
             isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #endif
 
-#  if defined(__KERNEL_SSE2__)
-            Psplat[0] = ssef(P.x);
-            Psplat[1] = ssef(P.y);
-            Psplat[2] = ssef(P.z);
-
-            tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#    if BVH_FEATURE(BVH_HAIR)
-            tfar = ssef(isect->t);
-#    endif
-
-            gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
             ++stack_ptr;
             kernel_assert(stack_ptr < BVH_STACK_SIZE);
             traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -270,19 +213,6 @@ ccl_device_inline
       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #endif
 
-#  if defined(__KERNEL_SSE2__)
-      Psplat[0] = ssef(P.x);
-      Psplat[1] = ssef(P.y);
-      Psplat[2] = ssef(P.z);
-
-      tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
-#    if BVH_FEATURE(BVH_HAIR)
-      tfar = ssef(isect->t);
-#    endif
-
-      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
       object = OBJECT_NONE;
       node_addr = traversal_stack[stack_ptr];
       --stack_ptr;
@@ -297,20 +227,7 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          Intersection *isect,
                                          const uint visibility)
 {
-  switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
-    case BVH_LAYOUT_BVH8:
-      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility);
-#endif
-#ifdef __QBVH__
-    case BVH_LAYOUT_BVH4:
-      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility);
-#endif
-    case BVH_LAYOUT_BVH2:
-      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
-  }
-  kernel_assert(!"Should not happen");
-  return false;
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility);
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 7b1834c7c6f..a8664cc4331 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -17,13 +17,6 @@
  * limitations under the License.
  */
 
-#ifdef __QBVH__
-#  include "kernel/bvh/qbvh_volume_all.h"
-#  ifdef __KERNEL_AVX2__
-#    include "kernel/bvh/obvh_volume_all.h"
-#  endif
-#endif
-
 #if BVH_FEATURE(BVH_HAIR)
 #  define NODE_INTERSECT bvh_node_intersect
 #else
@@ -80,26 +73,6 @@ ccl_device_inline
   uint num_hits = 0;
   isect_array->t = tmax;
 
-#if defined(__KERNEL_SSE2__)
-  const shuffle_swap_t shuf_identity = shuffle_swap_identity();
-  const shuffle_swap_t shuf_swap = shuffle_swap_swap();
-
-  const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
-  ssef Psplat[3], idirsplat[3];
-#  if BVH_FEATURE(BVH_HAIR)
-  ssef tnear(0.0f), tfar(isect_t);
-#  endif
-  shuffle_swap_t shufflexyz[3];
-
-  Psplat[0] = ssef(P.x);
-  Psplat[1] = ssef(P.y);
-  Psplat[2] = ssef(P.z);
-
-  ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
-
-  gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#endif /* __KERNEL_SSE2__ */
-
   /* traversal loop */
   do {
     do {
@@ -109,33 +82,16 @@ ccl_device_inline
         float dist[2];
         float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
 
-#if !defined(__KERNEL_SSE2__)
         traverse_mask = NODE_INTERSECT(kg,
                                        P,
-#  if BVH_FEATURE(BVH_HAIR)
+#if BVH_FEATURE(BVH_HAIR)
                                        dir,
-#  endif
+#endif
                                        idir,
                                        isect_t,
                                        node_addr,
                                        visibility,
                                        dist);
-#else  // __KERNEL_SSE2__
-        traverse_mask = NODE_INTERSECT(kg,
-                                       P,
-                                       dir,
-#  if BVH_FEATURE(BVH_HAIR)
-                                       tnear,
-                                       tfar,
-#  endif
-                                       tsplat,
-                                       Psplat,
-                                       idirsplat,
-                                       shufflexyz,
-                                       node_addr,
-                                       visibility,
-                                       dist);
-#endif  // __KERNEL_SSE2__
 
         node_addr = __float_as_int(cnodes.z);
         node_addr_child1 = __float_as_int(cnodes.w);
@@ -281,19 +237,6 @@ ccl_device_inline
             num_hits_in_instance = 0;
             isect_array->t = isect_t;
 
-#  if defined(__KERNEL_SSE2__)
-            Psplat[0] = ssef(P.x);
-            Psplat[1] = ssef(P.y);
-            Psplat[2] = ssef(P.z);
-
-            tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-#    if BVH_FEATURE(BVH_HAIR)
-            tfar = ssef(isect_t);
-#    endif
-
-            gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
             ++stack_ptr;
             kernel_assert(stack_ptr < BVH_STACK_SIZE);
             traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -337,19 +280,6 @@ ccl_device_inline
       isect_t = tmax;
       isect_array->t = isect_t;
 
-#  if defined(__KERNEL_SSE2__)
-      Psplat[0] = ssef(P.x);
-      Psplat[1] = ssef(P.y);
-      Psplat[2] = ssef(P.z);
-
-      tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
-#    if BVH_FEATURE(BVH_HAIR)
-      tfar = ssef(isect_t);
-#    endif
-
-      gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
-#  endif
-
       object = OBJECT_NONE;
       node_addr = traversal_stack[stack_ptr];
       --stack_ptr;
@@ -365,20 +295,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          const uint max_hits,
                                          const uint visibility)
 {
-  switch (kernel_data.bvh.bvh_layout) {
-#ifdef __KERNEL_AVX2__
-    case BVH_LAYOUT_BVH8:
-      return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
-#ifdef __QBVH__
-    case BVH_LAYOUT_BVH4:
-      return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility);
-#endif
-    case BVH_LAYOUT_BVH2:
-      return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
-  }
-  kernel_assert(!"Should not happen");
-  return 0;
+  return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility);
 }
 
 #undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h
deleted file mode 100644
index e6bb548bc5b..00000000000
--- a/intern/cycles/kernel/bvh/obvh_local.h
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for subsurface scattering, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT obvh_node_intersect
-#else
-#  define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             LocalIntersection *local_isect,
-                                             int local_object,
-                                             uint *lcg_state,
-                                             int max_hits)
-{
-  /* Traversal stack in CUDA thread-local memory. */
-  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_tex_fetch(__object_node, local_object);
-
-  /* Ray parameters in registers. */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = ray->t;
-
-  if (local_isect != NULL) {
-    local_isect->num_hits = 0;
-  }
-  kernel_assert((local_isect == NULL) == (max_hits == 0));
-
-  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
-    Transform ob_itfm;
-    isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
-    isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
-    object = local_object;
-  }
-
-  avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
-  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
-  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        avxf dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-          avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c3;
-            traversal_stack[stack_ptr].dist = d3;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-
-          /* Five children are hit, push all onto stack and sort 5
-           * stack items, continue with closest child
-           */
-          r = __bscf(child_mask);
-          int c4 = __float_as_int(cnodes[r]);
-          float d4 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-          /* Six children are hit, push all onto stack and sort 6
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c5 = __float_as_int(cnodes[r]);
-          float d5 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c5;
-            traversal_stack[stack_ptr].dist = d5;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c5;
-          traversal_stack[stack_ptr].dist = d5;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c4;
-          traversal_stack[stack_ptr].dist = d4;
-
-          /* Seven children are hit, push all onto stack and sort 7
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c6 = __float_as_int(cnodes[r]);
-          float d6 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c6;
-            traversal_stack[stack_ptr].dist = d6;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5],
-                            &traversal_stack[stack_ptr - 6]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-          /* Eight children are hit, push all onto stack and sort 8
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c7 = __float_as_int(cnodes[r]);
-          float d7 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c7;
-          traversal_stack[stack_ptr].dist = d7;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c6;
-          traversal_stack[stack_ptr].dist = d6;
-          obvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3],
-                          &traversal_stack[stack_ptr - 4],
-                          &traversal_stack[stack_ptr - 5],
-                          &traversal_stack[stack_ptr - 6],
-                          &traversal_stack[stack_ptr - 7]);
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        int prim_addr2 = __float_as_int(leaf.y);
-        const uint type = __float_as_int(leaf.w);
-
-        /* Pop. */
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-
-        /* Primitive intersection. */
-        switch (type & PRIMITIVE_ALL) {
-          case PRIMITIVE_TRIANGLE: {
-            /* Intersect ray against primitive, */
-            for (; prim_addr < prim_addr2; prim_addr++) {
-              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-              if (triangle_intersect_local(kg,
-                                           local_isect,
-                                           P,
-                                           dir,
-                                           object,
-                                           local_object,
-                                           prim_addr,
-                                           isect_t,
-                                           lcg_state,
-                                           max_hits)) {
-                return true;
-              }
-            }
-            break;
-          }
-#if BVH_FEATURE(BVH_MOTION)
-          case PRIMITIVE_MOTION_TRIANGLE: {
-            /* Intersect ray against primitive. */
-            for (; prim_addr < prim_addr2; prim_addr++) {
-              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-              if (motion_triangle_intersect_local(kg,
-                                                  local_isect,
-                                                  P,
-                                                  dir,
-                                                  ray->time,
-                                                  object,
-                                                  local_object,
-                                                  prim_addr,
-                                                  isect_t,
-                                                  lcg_state,
-                                                  max_hits)) {
-                return true;
-              }
-            }
-            break;
-          }
-#endif
-          default:
-            break;
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-  return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h
deleted file mode 100644
index e5c935b75ed..00000000000
--- a/intern/cycles/kernel/bvh/obvh_nodes.h
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection AVX code is adopted from Embree,
- */
-
-struct OBVHStackItem {
-  int addr;
-  float dist;
-};
-
-ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir,
-                                              int *ccl_restrict near_x,
-                                              int *ccl_restrict near_y,
-                                              int *ccl_restrict near_z,
-                                              int *ccl_restrict far_x,
-                                              int *ccl_restrict far_y,
-                                              int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
-  *near_x = 0;
-  *far_x = 1;
-  *near_y = 2;
-  *far_y = 3;
-  *near_z = 4;
-  *far_z = 5;
-
-  const size_t mask = movemask(ssef(idir.m128));
-
-  const int mask_x = mask & 1;
-  const int mask_y = (mask & 2) >> 1;
-  const int mask_z = (mask & 4) >> 2;
-
-  *near_x += mask_x;
-  *far_x -= mask_x;
-  *near_y += mask_y;
-  *far_y -= mask_y;
-  *near_z += mask_z;
-  *far_z -= mask_z;
-#else
-  if (idir.x >= 0.0f) {
-    *near_x = 0;
-    *far_x = 1;
-  }
-  else {
-    *near_x = 1;
-    *far_x = 0;
-  }
-  if (idir.y >= 0.0f) {
-    *near_y = 2;
-    *far_y = 3;
-  }
-  else {
-    *near_y = 3;
-    *far_y = 2;
-  }
-  if (idir.z >= 0.0f) {
-    *near_z = 4;
-    *far_z = 5;
-  }
-  else {
-    *near_z = 5;
-    *far_z = 4;
-  }
-#endif
-}
-
-ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b)
-{
-  OBVHStackItem tmp = *a;
-  *a = *b;
-  *b = tmp;
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
-                                       OBVHStackItem *ccl_restrict s2,
-                                       OBVHStackItem *ccl_restrict s3)
-{
-  if (s2->dist < s1->dist) {
-    obvh_item_swap(s2, s1);
-  }
-  if (s3->dist < s2->dist) {
-    obvh_item_swap(s3, s2);
-  }
-  if (s2->dist < s1->dist) {
-    obvh_item_swap(s2, s1);
-  }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
-                                       OBVHStackItem *ccl_restrict s2,
-                                       OBVHStackItem *ccl_restrict s3,
-                                       OBVHStackItem *ccl_restrict s4)
-{
-  if (s2->dist < s1->dist) {
-    obvh_item_swap(s2, s1);
-  }
-  if (s4->dist < s3->dist) {
-    obvh_item_swap(s4, s3);
-  }
-  if (s3->dist < s1->dist) {
-    obvh_item_swap(s3, s1);
-  }
-  if (s4->dist < s2->dist) {
-    obvh_item_swap(s4, s2);
-  }
-  if (s3->dist < s2->dist) {
-    obvh_item_swap(s3, s2);
-  }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
-                                       OBVHStackItem *ccl_restrict s2,
-                                       OBVHStackItem *ccl_restrict s3,
-                                       OBVHStackItem *ccl_restrict s4,
-                                       OBVHStackItem *ccl_restrict s5)
-{
-  obvh_stack_sort(s1, s2, s3, s4);
-  if (s5->dist < s4->dist) {
-    obvh_item_swap(s4, s5);
-    if (s4->dist < s3->dist) {
-      obvh_item_swap(s3, s4);
-      if (s3->dist < s2->dist) {
-        obvh_item_swap(s2, s3);
-        if (s2->dist < s1->dist) {
-          obvh_item_swap(s1, s2);
-        }
-      }
-    }
-  }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
-                                       OBVHStackItem *ccl_restrict s2,
-                                       OBVHStackItem *ccl_restrict s3,
-                                       OBVHStackItem *ccl_restrict s4,
-                                       OBVHStackItem *ccl_restrict s5,
-                                       OBVHStackItem *ccl_restrict s6)
-{
-  obvh_stack_sort(s1, s2, s3, s4, s5);
-  if (s6->dist < s5->dist) {
-    obvh_item_swap(s5, s6);
-    if (s5->dist < s4->dist) {
-      obvh_item_swap(s4, s5);
-      if (s4->dist < s3->dist) {
-        obvh_item_swap(s3, s4);
-        if (s3->dist < s2->dist) {
-          obvh_item_swap(s2, s3);
-          if (s2->dist < s1->dist) {
-            obvh_item_swap(s1, s2);
-          }
-        }
-      }
-    }
-  }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
-                                       OBVHStackItem *ccl_restrict s2,
-                                       OBVHStackItem *ccl_restrict s3,
-                                       OBVHStackItem *ccl_restrict s4,
-                                       OBVHStackItem *ccl_restrict s5,
-                                       OBVHStackItem *ccl_restrict s6,
-                                       OBVHStackItem *ccl_restrict s7)
-{
-  obvh_stack_sort(s1, s2, s3, s4, s5, s6);
-  if (s7->dist < s6->dist) {
-    obvh_item_swap(s6, s7);
-    if (s6->dist < s5->dist) {
-      obvh_item_swap(s5, s6);
-      if (s5->dist < s4->dist) {
-        obvh_item_swap(s4, s5);
-        if (s4->dist < s3->dist) {
-          obvh_item_swap(s3, s4);
-          if (s3->dist < s2->dist) {
-            obvh_item_swap(s2, s3);
-            if (s2->dist < s1->dist) {
-              obvh_item_swap(s1, s2);
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1,
-                                       OBVHStackItem *ccl_restrict s2,
-                                       OBVHStackItem *ccl_restrict s3,
-                                       OBVHStackItem *ccl_restrict s4,
-                                       OBVHStackItem *ccl_restrict s5,
-                                       OBVHStackItem *ccl_restrict s6,
-                                       OBVHStackItem *ccl_restrict s7,
-                                       OBVHStackItem *ccl_restrict s8)
-{
-  obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7);
-  if (s8->dist < s7->dist) {
-    obvh_item_swap(s7, s8);
-    if (s7->dist < s6->dist) {
-      obvh_item_swap(s6, s7);
-      if (s6->dist < s5->dist) {
-        obvh_item_swap(s5, s6);
-        if (s5->dist < s4->dist) {
-          obvh_item_swap(s4, s5);
-          if (s4->dist < s3->dist) {
-            obvh_item_swap(s3, s4);
-            if (s3->dist < s2->dist) {
-              obvh_item_swap(s2, s3);
-              if (s2->dist < s1->dist) {
-                obvh_item_swap(s1, s2);
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-/* Axis-aligned nodes intersection */
-
-ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-                                                  const avxf &isect_near,
-                                                  const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
-                                                  const avx3f &org_idir,
-#else
-                                                  const avx3f &org,
-#endif
-                                                  const avx3f &idir,
-                                                  const int near_x,
-                                                  const int near_y,
-                                                  const int near_z,
-                                                  const int far_x,
-                                                  const int far_y,
-                                                  const int far_z,
-                                                  const int node_addr,
-                                                  avxf *ccl_restrict dist)
-{
-  const int offset = node_addr + 2;
-#ifdef __KERNEL_AVX2__
-  const avxf tnear_x = msub(
-      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x);
-  const avxf tnear_y = msub(
-      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y);
-  const avxf tnear_z = msub(
-      kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z);
-  const avxf tfar_x = msub(
-      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x);
-  const avxf tfar_y = msub(
-      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y);
-  const avxf tfar_z = msub(
-      kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z);
-
-  const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near);
-  const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far);
-  const avxb vmask = tnear <= tfar;
-  int mask = (int)movemask(vmask);
-  *dist = tnear;
-  return mask;
-#else
-  return 0;
-#endif
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
-                                                    const avxf &isect_near,
-                                                    const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
-                                                    const avx3f &org_idir,
-#endif
-                                                    const avx3f &org,
-                                                    const avx3f &dir,
-                                                    const avx3f &idir,
-                                                    const int near_x,
-                                                    const int near_y,
-                                                    const int near_z,
-                                                    const int far_x,
-                                                    const int far_y,
-                                                    const int far_z,
-                                                    const int node_addr,
-                                                    avxf *ccl_restrict dist)
-{
-  const int offset = node_addr;
-  const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2);
-  const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4);
-  const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6);
-
-  const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8);
-  const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10);
-  const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12);
-
-  const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14);
-  const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16);
-  const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18);
-
-  const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20);
-  const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22);
-  const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24);
-
-  const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
-             aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
-             aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
-  const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
-             aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
-             aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
-  const avxf neg_one(-1.0f);
-  const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
-             nrdir_z = neg_one / aligned_dir_z;
-
-  const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
-             tlower_z = aligned_P_z * nrdir_z;
-
-  const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
-             tupper_z = tlower_z - nrdir_z;
-
-  const avxf tnear_x = min(tlower_x, tupper_x);
-  const avxf tnear_y = min(tlower_y, tupper_y);
-  const avxf tnear_z = min(tlower_z, tupper_z);
-  const avxf tfar_x = max(tlower_x, tupper_x);
-  const avxf tfar_y = max(tlower_y, tupper_y);
-  const avxf tfar_z = max(tlower_z, tupper_z);
-  const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-  const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-  const avxb vmask = tnear <= tfar;
-  *dist = tnear;
-  return movemask(vmask);
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg,
-                                          const avxf &isect_near,
-                                          const avxf &isect_far,
-#ifdef __KERNEL_AVX2__
-                                          const avx3f &org_idir,
-#endif
-                                          const avx3f &org,
-                                          const avx3f &dir,
-                                          const avx3f &idir,
-                                          const int near_x,
-                                          const int near_y,
-                                          const int near_z,
-                                          const int far_x,
-                                          const int far_y,
-                                          const int far_z,
-                                          const int node_addr,
-                                          avxf *ccl_restrict dist)
-{
-  const int offset = node_addr;
-  const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
-  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-    return obvh_unaligned_node_intersect(kg,
-                                         isect_near,
-                                         isect_far,
-#ifdef __KERNEL_AVX2__
-                                         org_idir,
-#endif
-                                         org,
-                                         dir,
-                                         idir,
-                                         near_x,
-                                         near_y,
-                                         near_z,
-                                         far_x,
-                                         far_y,
-                                         far_z,
-                                         node_addr,
-                                         dist);
-  }
-  else {
-    return obvh_aligned_node_intersect(kg,
-                                       isect_near,
-                                       isect_far,
-#ifdef __KERNEL_AVX2__
-                                       org_idir,
-#else
-                                       org,
-#endif
-                                       idir,
-                                       near_x,
-                                       near_y,
-                                       near_z,
-                                       far_x,
-                                       far_y,
-                                       far_z,
-                                       node_addr,
-                                       dist);
-  }
-}
diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h
deleted file mode 100644
index ff82d5105a4..00000000000
--- a/intern/cycles/kernel/bvh/obvh_shadow_all.h
+++ /dev/null
@@ -1,651 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT obvh_node_intersect
-#else
-#  define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect_array,
-                                             const int skip_object,
-                                             const uint max_hits,
-                                             uint *num_hits)
-{
-  /* TODO(sergey):
-   *  - Test if pushing distance on the stack helps.
-   * - Likely and unlikely for if() statements.
-   * - Test restrict attribute for pointers.
-   */
-
-  /* Traversal stack in CUDA thread-local memory. */
-  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* Ray parameters in registers. */
-  const float tmax = ray->t;
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  *num_hits = 0;
-  isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
-  int num_hits_in_instance = 0;
-#endif
-
-  avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
-  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
-  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-        (void)inodes;
-
-        if (false
-#ifdef __VISIBILITY_FLAG__
-            || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
-            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-        ) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        avxf dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        //#if !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c3;
-            traversal_stack[stack_ptr].dist = d3;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-
-          /* Five children are hit, push all onto stack and sort 5
-           * stack items, continue with closest child
-           */
-          r = __bscf(child_mask);
-          int c4 = __float_as_int(cnodes[r]);
-          float d4 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Six children are hit, push all onto stack and sort 6
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c5 = __float_as_int(cnodes[r]);
-          float d5 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c5;
-            traversal_stack[stack_ptr].dist = d5;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c5;
-          traversal_stack[stack_ptr].dist = d5;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c4;
-          traversal_stack[stack_ptr].dist = d4;
-
-          /* Seven children are hit, push all onto stack and sort 7
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c6 = __float_as_int(cnodes[r]);
-          float d6 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c6;
-            traversal_stack[stack_ptr].dist = d6;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5],
-                            &traversal_stack[stack_ptr - 6]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Eight children are hit, push all onto stack and sort 8
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c7 = __float_as_int(cnodes[r]);
-          float d7 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c7;
-          traversal_stack[stack_ptr].dist = d7;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c6;
-          traversal_stack[stack_ptr].dist = d6;
-          obvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3],
-                          &traversal_stack[stack_ptr - 4],
-                          &traversal_stack[stack_ptr - 5],
-                          &traversal_stack[stack_ptr - 6],
-                          &traversal_stack[stack_ptr - 7]);
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
-        if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-#endif
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          const uint p_type = type & PRIMITIVE_ALL;
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          if (p_type == PRIMITIVE_TRIANGLE) {
-            int prim_count = prim_addr2 - prim_addr;
-            if (prim_count < 3) {
-              while (prim_addr < prim_addr2) {
-                kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) ==
-                              p_type);
-                int hit = triangle_intersect(
-                    kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr);
-                /* Shadow ray early termination. */
-                if (hit) {
-                  /* detect if this surface has a shader with transparent shadows */
-
-                  /* todo: optimize so primitive visibility flag indicates if
-                   * the primitive has a transparent shadow shader? */
-                  int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-                  int shader = 0;
-
-#ifdef __HAIR__
-                  if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
-                  {
-                    shader = kernel_tex_fetch(__tri_shader, prim);
-                  }
-#ifdef __HAIR__
-                  else {
-                    float4 str = kernel_tex_fetch(__curves, prim);
-                    shader = __float_as_int(str.z);
-                  }
-#endif
-                  int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-                  /* if no transparent shadows, all light is blocked */
-                  if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-                    return true;
-                  }
-                  /* if maximum number of hits reached, block all light */
-                  else if (*num_hits == max_hits) {
-                    return true;
-                  }
-
-                  /* move on to next entry in intersections array */
-                  isect_array++;
-                  (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
-                  num_hits_in_instance++;
-#endif
-
-                  isect_array->t = isect_t;
-                }
-
-                prim_addr++;
-              }  // while
-            }
-            else {
-              kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) ==
-                            p_type);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-              int *nhiptr = &num_hits_in_instance;
-#else
-            int nhi = 0;
-            int *nhiptr = &nhi;
-#endif
-
-              int result = triangle_intersect8(kg,
-                                               &isect_array,
-                                               P,
-                                               dir,
-                                               PATH_RAY_SHADOW,
-                                               object,
-                                               prim_addr,
-                                               prim_count,
-                                               num_hits,
-                                               max_hits,
-                                               nhiptr,
-                                               isect_t);
-              if (result == 2) {
-                return true;
-              }
-            }  // prim_count
-          }    // PRIMITIVE_TRIANGLE
-          else {
-            while (prim_addr < prim_addr2) {
-              kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-
-#ifdef __SHADOW_TRICKS__
-              uint tri_object = (object == OBJECT_NONE) ?
-                                    kernel_tex_fetch(__prim_object, prim_addr) :
-                                    object;
-              if (tri_object == skip_object) {
-                ++prim_addr;
-                continue;
-              }
-#endif
-
-              bool hit;
-
-              /* todo: specialized intersect functions which don't fill in
-               * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
-               * might give a few % performance improvement */
-
-              switch (p_type) {
-
-#if BVH_FEATURE(BVH_MOTION)
-                case PRIMITIVE_MOTION_TRIANGLE: {
-                  hit = motion_triangle_intersect(
-                      kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr);
-                  break;
-                }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                case PRIMITIVE_CURVE:
-                case PRIMITIVE_MOTION_CURVE: {
-                  const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                  hit = curve_intersect(kg,
-                                                 isect_array,
-                                                 P,
-                                                 dir,
-                                                 PATH_RAY_SHADOW,
-                                                 object,
-                                                 prim_addr,
-                                                 ray->time,
-                                                 curve_type);
-                  break;
-                }
-#endif
-                default: {
-                  hit = false;
-                  break;
-                }
-              }
-
-              /* Shadow ray early termination. */
-              if (hit) {
-                /* detect if this surface has a shader with transparent shadows */
-
-                /* todo: optimize so primitive visibility flag indicates if
-                 * the primitive has a transparent shadow shader? */
-                int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-                int shader = 0;
-
-#ifdef __HAIR__
-                if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
-                {
-                  shader = kernel_tex_fetch(__tri_shader, prim);
-                }
-#ifdef __HAIR__
-                else {
-                  float4 str = kernel_tex_fetch(__curves, prim);
-                  shader = __float_as_int(str.z);
-                }
-#endif
-                int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-                /* if no transparent shadows, all light is blocked */
-                if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-                  return true;
-                }
-                /* if maximum number of hits reached, block all light */
-                else if (*num_hits == max_hits) {
-                  return true;
-                }
-
-                /* move on to next entry in intersections array */
-                isect_array++;
-                (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
-                num_hits_in_instance++;
-#endif
-
-                isect_array->t = isect_t;
-              }
-
-              prim_addr++;
-            }  // while prim
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-#  if BVH_FEATURE(BVH_MOTION)
-          isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#  else
-          isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-#  endif
-
-          num_hits_in_instance = 0;
-          isect_array->t = isect_t;
-
-          obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-          tfar = avxf(isect_t);
-#  if BVH_FEATURE(BVH_HAIR)
-          dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-          idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-          P_idir = P * idir;
-          P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-          org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-          node_addr = kernel_tex_fetch(__object_node, object);
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-      if (num_hits_in_instance) {
-        float t_fac;
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-#  else
-        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-#  endif
-        /* Scale isect->t to adjust for instancing. */
-        for (int i = 0; i < num_hits_in_instance; i++) {
-          (isect_array - i - 1)->t *= t_fac;
-        }
-      }
-      else {
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#  else
-        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#  endif
-      }
-
-      isect_t = tmax;
-      isect_array->t = isect_t;
-
-      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = avxf(isect_t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h
deleted file mode 100644
index 3f1e03693c3..00000000000
--- a/intern/cycles/kernel/bvh/obvh_traversal.h
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT obvh_node_intersect
-#else
-#  define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect,
-                                             const uint visibility)
-{
-  /* Traversal stack in CUDA thread-local memory. */
-  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-  traversal_stack[0].dist = -FLT_MAX;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-  float node_dist = -FLT_MAX;
-
-  /* Ray parameters in registers. */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  isect->t = ray->t;
-  isect->u = 0.0f;
-  isect->v = 0.0f;
-  isect->prim = PRIM_NONE;
-  isect->object = OBJECT_NONE;
-
-  BVH_DEBUG_INIT();
-  avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
-  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
-  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-        (void)inodes;
-
-        if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
-            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
-            || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
-        ) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-          continue;
-        }
-
-        int child_mask;
-        avxf dist;
-
-        BVH_DEBUG_NEXT_NODE();
-
-        {
-          child_mask = NODE_INTERSECT(kg,
-                                      tnear,
-                                      tfar,
-#ifdef __KERNEL_AVX2__
-                                      P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                      org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                      dir4,
-#endif
-                                      idir4,
-                                      near_x,
-                                      near_y,
-                                      near_z,
-                                      far_x,
-                                      far_y,
-                                      far_z,
-                                      node_addr,
-                                      &dist);
-        }
-
-        if (child_mask != 0) {
-          avxf cnodes;
-          /* TODO(sergey): Investigate whether moving cnodes upwards
-           * gives a speedup (will be different cache pattern but will
-           * avoid extra check here).
-           */
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          float d0 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            node_dist = d0;
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              node_dist = d1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              node_dist = d0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            node_dist = traversal_stack[stack_ptr].dist;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c3;
-            traversal_stack[stack_ptr].dist = d3;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            node_dist = traversal_stack[stack_ptr].dist;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-
-          /* Five children are hit, push all onto stack and sort 5
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c4 = __float_as_int(cnodes[r]);
-          float d4 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            node_dist = traversal_stack[stack_ptr].dist;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Six children are hit, push all onto stack and sort 6
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c5 = __float_as_int(cnodes[r]);
-          float d5 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c5;
-            traversal_stack[stack_ptr].dist = d5;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            node_dist = traversal_stack[stack_ptr].dist;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c5;
-          traversal_stack[stack_ptr].dist = d5;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c4;
-          traversal_stack[stack_ptr].dist = d4;
-
-          /* Seven children are hit, push all onto stack and sort 7
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c6 = __float_as_int(cnodes[r]);
-          float d6 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c6;
-            traversal_stack[stack_ptr].dist = d6;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5],
-                            &traversal_stack[stack_ptr - 6]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            node_dist = traversal_stack[stack_ptr].dist;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Eight children are hit, push all onto stack and sort 8
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c7 = __float_as_int(cnodes[r]);
-          float d7 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c7;
-          traversal_stack[stack_ptr].dist = d7;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c6;
-          traversal_stack[stack_ptr].dist = d6;
-          obvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3],
-                          &traversal_stack[stack_ptr - 4],
-                          &traversal_stack[stack_ptr - 5],
-                          &traversal_stack[stack_ptr - 6],
-                          &traversal_stack[stack_ptr - 7]);
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-          continue;
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        node_dist = traversal_stack[stack_ptr].dist;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
-        if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
-        if (UNLIKELY((node_dist > isect->t)))
-#endif
-        {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-          continue;
-        }
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          switch (type & PRIMITIVE_ALL) {
-            case PRIMITIVE_TRIANGLE: {
-              int prim_count = prim_addr2 - prim_addr;
-              if (prim_count < 3) {
-                for (; prim_addr < prim_addr2; prim_addr++) {
-                  BVH_DEBUG_NEXT_INTERSECTION();
-                  kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                  if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
-                    tfar = avxf(isect->t);
-                    /* Shadow ray early termination. */
-                    if (visibility == PATH_RAY_SHADOW_OPAQUE) {
-                      return true;
-                    }
-                  }
-                }  // for
-              }
-              else {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                if (triangle_intersect8(kg,
-                                        &isect,
-                                        P,
-                                        dir,
-                                        visibility,
-                                        object,
-                                        prim_addr,
-                                        prim_count,
-                                        0,
-                                        0,
-                                        NULL,
-                                        0.0f)) {
-                  tfar = avxf(isect->t);
-                  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
-                    return true;
-                  }
-                }
-              }  // prim count
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                BVH_DEBUG_NEXT_INTERSECTION();
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                if (motion_triangle_intersect(
-                        kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
-                  tfar = avxf(isect->t);
-                  /* Shadow ray early termination. */
-                  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
-                    return true;
-                  }
-                }
-              }
-              break;
-            }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
-            case PRIMITIVE_CURVE:
-            case PRIMITIVE_MOTION_CURVE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                BVH_DEBUG_NEXT_INTERSECTION();
-                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
-                bool hit = curve_intersect(
-                    kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
-                if (hit) {
-                  tfar = avxf(isect->t);
-                  /* Shadow ray early termination. */
-                  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
-                    return true;
-                  }
-                }
-              }
-              break;
-            }
-#endif /* BVH_FEATURE(BVH_HAIR) */
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-#  if BVH_FEATURE(BVH_MOTION)
-          qbvh_instance_motion_push(
-              kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-#  else
-          qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-#  endif
-
-          obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-          tfar = avxf(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-          dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-          idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-          P_idir = P * idir;
-          P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-          org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-          traversal_stack[stack_ptr].dist = -FLT_MAX;
-
-          node_addr = kernel_tex_fetch(__object_node, object);
-
-          BVH_DEBUG_NEXT_INSTANCE();
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-#  if BVH_FEATURE(BVH_MOTION)
-      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#  else
-      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#  endif
-
-      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = avxf(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      node_dist = traversal_stack[stack_ptr].dist;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h
deleted file mode 100644
index fb41ae783ab..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume.h
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT obvh_node_intersect
-#else
-#  define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect,
-                                             const uint visibility)
-{
-  /* Traversal stack in CUDA thread-local memory. */
-  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* Ray parameters in registers. */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  isect->t = ray->t;
-  isect->u = 0.0f;
-  isect->v = 0.0f;
-  isect->prim = PRIM_NONE;
-  isect->object = OBJECT_NONE;
-
-  avxf tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
-  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
-  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
-        if ((__float_as_uint(inodes.x) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-#endif
-
-        avxf dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c3;
-            traversal_stack[stack_ptr].dist = d3;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-
-          /* Five children are hit, push all onto stack and sort 5
-           * stack items, continue with closest child
-           */
-          r = __bscf(child_mask);
-          int c4 = __float_as_int(cnodes[r]);
-          float d4 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Six children are hit, push all onto stack and sort 6
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c5 = __float_as_int(cnodes[r]);
-          float d5 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c5;
-            traversal_stack[stack_ptr].dist = d5;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c5;
-          traversal_stack[stack_ptr].dist = d5;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c4;
-          traversal_stack[stack_ptr].dist = d4;
-
-          /* Seven children are hit, push all onto stack and sort 7
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c6 = __float_as_int(cnodes[r]);
-          float d6 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c6;
-            traversal_stack[stack_ptr].dist = d6;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5],
-                            &traversal_stack[stack_ptr - 6]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Eight children are hit, push all onto stack and sort 8
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c7 = __float_as_int(cnodes[r]);
-          float d7 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c7;
-          traversal_stack[stack_ptr].dist = d7;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c6;
-          traversal_stack[stack_ptr].dist = d6;
-          obvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3],
-                          &traversal_stack[stack_ptr - 4],
-                          &traversal_stack[stack_ptr - 5],
-                          &traversal_stack[stack_ptr - 6],
-                          &traversal_stack[stack_ptr - 7]);
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-        if ((__float_as_uint(leaf.z) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          const uint p_type = type & PRIMITIVE_ALL;
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          switch (p_type) {
-            case PRIMITIVE_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                motion_triangle_intersect(
-                    kg, isect, P, dir, ray->time, visibility, object, prim_addr);
-              }
-              break;
-            }
-#endif
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-          int object_flag = kernel_tex_fetch(__object_flag, object);
-          if (object_flag & SD_OBJECT_HAS_VOLUME) {
-#  if BVH_FEATURE(BVH_MOTION)
-            isect->t = bvh_instance_motion_push(
-                kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#  else
-            isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-#  endif
-
-            obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-            tfar = avxf(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-            dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-            idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-            P_idir = P * idir;
-            P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-            org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-            node_addr = kernel_tex_fetch(__object_node, object);
-          }
-          else {
-            /* Pop. */
-            object = OBJECT_NONE;
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-          }
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-#  if BVH_FEATURE(BVH_MOTION)
-      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#  else
-      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#  endif
-
-      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = avxf(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h
deleted file mode 100644
index 56e2afd4a11..00000000000
--- a/intern/cycles/kernel/bvh/obvh_volume_all.h
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT obvh_node_intersect
-#else
-#  define NODE_INTERSECT obvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect_array,
-                                             const uint max_hits,
-                                             const uint visibility)
-{
-  /* Traversal stack in CUDA thread-local memory. */
-  OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* Ray parameters in registers. */
-  const float tmax = ray->t;
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  uint num_hits = 0;
-  isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
-  int num_hits_in_instance = 0;
-#endif
-
-  avxf tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
-  avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#endif
-  avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
-        if ((__float_as_uint(inodes.x) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-#endif
-
-        avxf dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          avxf cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c3;
-            traversal_stack[stack_ptr].dist = d3;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-
-          /* Five children are hit, push all onto stack and sort 5
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c4 = __float_as_int(cnodes[r]);
-          float d4 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Six children are hit, push all onto stack and sort 6
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c5 = __float_as_int(cnodes[r]);
-          float d5 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c5;
-            traversal_stack[stack_ptr].dist = d5;
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c4;
-            traversal_stack[stack_ptr].dist = d4;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c5;
-          traversal_stack[stack_ptr].dist = d5;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c4;
-          traversal_stack[stack_ptr].dist = d4;
-
-          /* Seven children are hit, push all onto stack and sort 7
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c6 = __float_as_int(cnodes[r]);
-          float d6 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c6;
-            traversal_stack[stack_ptr].dist = d6;
-            obvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2],
-                            &traversal_stack[stack_ptr - 3],
-                            &traversal_stack[stack_ptr - 4],
-                            &traversal_stack[stack_ptr - 5],
-                            &traversal_stack[stack_ptr - 6]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Eight children are hit, push all onto stack and sort 8
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c7 = __float_as_int(cnodes[r]);
-          float d7 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c7;
-          traversal_stack[stack_ptr].dist = d7;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c6;
-          traversal_stack[stack_ptr].dist = d6;
-          obvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3],
-                          &traversal_stack[stack_ptr - 4],
-                          &traversal_stack[stack_ptr - 5],
-                          &traversal_stack[stack_ptr - 6],
-                          &traversal_stack[stack_ptr - 7]);
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-        if ((__float_as_uint(leaf.z) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          const uint p_type = type & PRIMITIVE_ALL;
-          bool hit;
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          switch (p_type) {
-            case PRIMITIVE_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
-                if (hit) {
-                  /* Move on to next entry in intersections array. */
-                  isect_array++;
-                  num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
-                  num_hits_in_instance++;
-#endif
-                  isect_array->t = isect_t;
-                  if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-#  if BVH_FEATURE(BVH_MOTION)
-                    float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#  else
-                    Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-                    float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#  endif
-                    for (int i = 0; i < num_hits_in_instance; i++) {
-                      (isect_array - i - 1)->t *= t_fac;
-                    }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
-                    return num_hits;
-                  }
-                }
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                hit = motion_triangle_intersect(
-                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
-                if (hit) {
-                  /* Move on to next entry in intersections array. */
-                  isect_array++;
-                  num_hits++;
-#  if BVH_FEATURE(BVH_INSTANCING)
-                  num_hits_in_instance++;
-#  endif
-                  isect_array->t = isect_t;
-                  if (num_hits == max_hits) {
-#  if BVH_FEATURE(BVH_INSTANCING)
-#    if BVH_FEATURE(BVH_MOTION)
-                    float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#    else
-                    Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-                    float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#    endif
-                    for (int i = 0; i < num_hits_in_instance; i++) {
-                      (isect_array - i - 1)->t *= t_fac;
-                    }
-#  endif /* BVH_FEATURE(BVH_INSTANCING) */
-                    return num_hits;
-                  }
-                }
-              }
-              break;
-            }
-#endif
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-          int object_flag = kernel_tex_fetch(__object_flag, object);
-          if (object_flag & SD_OBJECT_HAS_VOLUME) {
-#  if BVH_FEATURE(BVH_MOTION)
-            isect_t = bvh_instance_motion_push(
-                kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#  else
-            isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-#  endif
-
-            obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-            tfar = avxf(isect_t);
-            idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  if BVH_FEATURE(BVH_HAIR)
-            dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-#  ifdef __KERNEL_AVX2__
-            P_idir = P * idir;
-            P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-            org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-            num_hits_in_instance = 0;
-            isect_array->t = isect_t;
-
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-            node_addr = kernel_tex_fetch(__object_node, object);
-          }
-          else {
-            /* Pop. */
-            object = OBJECT_NONE;
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-          }
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-      if (num_hits_in_instance) {
-        float t_fac;
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-#  else
-        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-#  endif
-        /* Scale isect->t to adjust for instancing. */
-        for (int i = 0; i < num_hits_in_instance; i++) {
-          (isect_array - i - 1)->t *= t_fac;
-        }
-      }
-      else {
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#  else
-        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#  endif
-      }
-
-      isect_t = tmax;
-      isect_array->t = isect_t;
-
-      obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = avxf(isect_t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
-#  endif
-      idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h
deleted file mode 100644
index b21f79bd3a0..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_local.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for finding local intersections
- * around the shading point, for subsurface scattering and bevel. We disable
- * various features for performance, and for instanced objects avoid traversing
- * other parts of the scene.
- *
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT qbvh_node_intersect
-#else
-#  define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             LocalIntersection *local_isect,
-                                             int local_object,
-                                             uint *lcg_state,
-                                             int max_hits)
-{
-  /* TODO(sergey):
-   * - Test if pushing distance on the stack helps (for non shadow rays).
-   * - Separate version for shadow rays.
-   * - Likely and unlikely for if() statements.
-   * - SSE for hair.
-   * - Test restrict attribute for pointers.
-   */
-
-  /* Traversal stack in CUDA thread-local memory. */
-  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_tex_fetch(__object_node, local_object);
-
-  /* Ray parameters in registers. */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = ray->t;
-
-  if (local_isect != NULL) {
-    local_isect->num_hits = 0;
-  }
-  kernel_assert((local_isect == NULL) == (max_hits == 0));
-
-  const int object_flag = kernel_tex_fetch(__object_flag, local_object);
-  if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
-#if BVH_FEATURE(BVH_MOTION)
-    Transform ob_itfm;
-    isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#else
-    isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
-#endif
-    object = local_object;
-  }
-
-  ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
-  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
-  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        ssef dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-          float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            qbvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-          qbvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3]);
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-        int prim_addr = __float_as_int(leaf.x);
-
-        int prim_addr2 = __float_as_int(leaf.y);
-        const uint type = __float_as_int(leaf.w);
-
-        /* Pop. */
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-
-        /* Primitive intersection. */
-        switch (type & PRIMITIVE_ALL) {
-          case PRIMITIVE_TRIANGLE: {
-            /* Intersect ray against primitive, */
-            for (; prim_addr < prim_addr2; prim_addr++) {
-              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-              if (triangle_intersect_local(kg,
-                                           local_isect,
-                                           P,
-                                           dir,
-                                           object,
-                                           local_object,
-                                           prim_addr,
-                                           isect_t,
-                                           lcg_state,
-                                           max_hits)) {
-                return true;
-              }
-            }
-            break;
-          }
-#if BVH_FEATURE(BVH_MOTION)
-          case PRIMITIVE_MOTION_TRIANGLE: {
-            /* Intersect ray against primitive. */
-            for (; prim_addr < prim_addr2; prim_addr++) {
-              kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-              if (motion_triangle_intersect_local(kg,
-                                                  local_isect,
-                                                  P,
-                                                  dir,
-                                                  ray->time,
-                                                  object,
-                                                  local_object,
-                                                  prim_addr,
-                                                  isect_t,
-                                                  lcg_state,
-                                                  max_hits)) {
-                return true;
-              }
-            }
-            break;
-          }
-#endif
-          default:
-            break;
-        }
-      }
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
deleted file mode 100644
index 070406fb18a..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright 2011-2014, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Aligned nodes intersection SSE code is adopted from Embree,
- */
-
-struct QBVHStackItem {
-  int addr;
-  float dist;
-};
-
-ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir,
-                                              int *ccl_restrict near_x,
-                                              int *ccl_restrict near_y,
-                                              int *ccl_restrict near_z,
-                                              int *ccl_restrict far_x,
-                                              int *ccl_restrict far_y,
-                                              int *ccl_restrict far_z)
-
-{
-#ifdef __KERNEL_SSE__
-  *near_x = 0;
-  *far_x = 1;
-  *near_y = 2;
-  *far_y = 3;
-  *near_z = 4;
-  *far_z = 5;
-
-  const size_t mask = movemask(ssef(idir.m128));
-
-  const int mask_x = mask & 1;
-  const int mask_y = (mask & 2) >> 1;
-  const int mask_z = (mask & 4) >> 2;
-
-  *near_x += mask_x;
-  *far_x -= mask_x;
-  *near_y += mask_y;
-  *far_y -= mask_y;
-  *near_z += mask_z;
-  *far_z -= mask_z;
-#else
-  if (idir.x >= 0.0f) {
-    *near_x = 0;
-    *far_x = 1;
-  }
-  else {
-    *near_x = 1;
-    *far_x = 0;
-  }
-  if (idir.y >= 0.0f) {
-    *near_y = 2;
-    *far_y = 3;
-  }
-  else {
-    *near_y = 3;
-    *far_y = 2;
-  }
-  if (idir.z >= 0.0f) {
-    *near_z = 4;
-    *far_z = 5;
-  }
-  else {
-    *near_z = 5;
-    *far_z = 4;
-  }
-#endif
-}
-
-/* TOOD(sergey): Investigate if using intrinsics helps for both
- * stack item swap and float comparison.
- */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b)
-{
-  QBVHStackItem tmp = *a;
-  *a = *b;
-  *b = tmp;
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
-                                       QBVHStackItem *ccl_restrict s2,
-                                       QBVHStackItem *ccl_restrict s3)
-{
-  if (s2->dist < s1->dist) {
-    qbvh_item_swap(s2, s1);
-  }
-  if (s3->dist < s2->dist) {
-    qbvh_item_swap(s3, s2);
-  }
-  if (s2->dist < s1->dist) {
-    qbvh_item_swap(s2, s1);
-  }
-}
-
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
-                                       QBVHStackItem *ccl_restrict s2,
-                                       QBVHStackItem *ccl_restrict s3,
-                                       QBVHStackItem *ccl_restrict s4)
-{
-  if (s2->dist < s1->dist) {
-    qbvh_item_swap(s2, s1);
-  }
-  if (s4->dist < s3->dist) {
-    qbvh_item_swap(s4, s3);
-  }
-  if (s3->dist < s1->dist) {
-    qbvh_item_swap(s3, s1);
-  }
-  if (s4->dist < s2->dist) {
-    qbvh_item_swap(s4, s2);
-  }
-  if (s3->dist < s2->dist) {
-    qbvh_item_swap(s3, s2);
-  }
-}
-
-/* Axis-aligned nodes intersection */
-
-// ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
-                                       const ssef &isect_near,
-                                       const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
-                                       const sse3f &org_idir,
-#else
-                                       const sse3f &org,
-#endif
-                                       const sse3f &idir,
-                                       const int near_x,
-                                       const int near_y,
-                                       const int near_z,
-                                       const int far_x,
-                                       const int far_y,
-                                       const int far_z,
-                                       const int node_addr,
-                                       ssef *ccl_restrict dist)
-{
-  const int offset = node_addr + 1;
-#ifdef __KERNEL_AVX2__
-  const ssef tnear_x = msub(
-      kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x);
-  const ssef tnear_y = msub(
-      kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y);
-  const ssef tnear_z = msub(
-      kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z);
-  const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x);
-  const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y);
-  const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z);
-#else
-  const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x;
-  const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y;
-  const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z;
-  const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x;
-  const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y;
-  const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z;
-#endif
-
-#ifdef __KERNEL_SSE41__
-  const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near));
-  const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far));
-  const sseb vmask = cast(tnear) > cast(tfar);
-  int mask = (int)movemask(vmask) ^ 0xf;
-#else
-  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-  const sseb vmask = tnear <= tfar;
-  int mask = (int)movemask(vmask);
-#endif
-  *dist = tnear;
-  return mask;
-}
-
-/* Unaligned nodes intersection */
-
-ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg,
-                                                    const ssef &isect_near,
-                                                    const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
-                                                    const sse3f &org_idir,
-#endif
-                                                    const sse3f &org,
-                                                    const sse3f &dir,
-                                                    const sse3f &idir,
-                                                    const int near_x,
-                                                    const int near_y,
-                                                    const int near_z,
-                                                    const int far_x,
-                                                    const int far_y,
-                                                    const int far_z,
-                                                    const int node_addr,
-                                                    ssef *ccl_restrict dist)
-{
-  const int offset = node_addr;
-  const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1);
-  const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2);
-  const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3);
-
-  const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4);
-  const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5);
-  const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6);
-
-  const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7);
-  const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8);
-  const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9);
-
-  const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10);
-  const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11);
-  const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12);
-
-  const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z,
-             aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z,
-             aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z;
-
-  const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x,
-             aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y,
-             aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z;
-
-  const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
-  const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y,
-             nrdir_z = neg_one / aligned_dir_z;
-
-  const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y,
-             tlower_z = aligned_P_z * nrdir_z;
-
-  const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y,
-             tupper_z = tlower_z - nrdir_z;
-
-#ifdef __KERNEL_SSE41__
-  const ssef tnear_x = mini(tlower_x, tupper_x);
-  const ssef tnear_y = mini(tlower_y, tupper_y);
-  const ssef tnear_z = mini(tlower_z, tupper_z);
-  const ssef tfar_x = maxi(tlower_x, tupper_x);
-  const ssef tfar_y = maxi(tlower_y, tupper_y);
-  const ssef tfar_z = maxi(tlower_z, tupper_z);
-  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-  const sseb vmask = tnear <= tfar;
-  *dist = tnear;
-  return movemask(vmask);
-#else
-  const ssef tnear_x = min(tlower_x, tupper_x);
-  const ssef tnear_y = min(tlower_y, tupper_y);
-  const ssef tnear_z = min(tlower_z, tupper_z);
-  const ssef tfar_x = max(tlower_x, tupper_x);
-  const ssef tfar_y = max(tlower_y, tupper_y);
-  const ssef tfar_z = max(tlower_z, tupper_z);
-  const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z);
-  const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z);
-  const sseb vmask = tnear <= tfar;
-  *dist = tnear;
-  return movemask(vmask);
-#endif
-}
-
-/* Intersectors wrappers.
- *
- * They'll check node type and call appropriate intersection code.
- */
-
-ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg,
-                                          const ssef &isect_near,
-                                          const ssef &isect_far,
-#ifdef __KERNEL_AVX2__
-                                          const sse3f &org_idir,
-#endif
-                                          const sse3f &org,
-                                          const sse3f &dir,
-                                          const sse3f &idir,
-                                          const int near_x,
-                                          const int near_y,
-                                          const int near_z,
-                                          const int far_x,
-                                          const int far_y,
-                                          const int far_z,
-                                          const int node_addr,
-                                          ssef *ccl_restrict dist)
-{
-  const int offset = node_addr;
-  const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
-  if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) {
-    return qbvh_unaligned_node_intersect(kg,
-                                         isect_near,
-                                         isect_far,
-#ifdef __KERNEL_AVX2__
-                                         org_idir,
-#endif
-                                         org,
-                                         dir,
-                                         idir,
-                                         near_x,
-                                         near_y,
-                                         near_z,
-                                         far_x,
-                                         far_y,
-                                         far_z,
-                                         node_addr,
-                                         dist);
-  }
-  else {
-    return qbvh_aligned_node_intersect(kg,
-                                       isect_near,
-                                       isect_far,
-#ifdef __KERNEL_AVX2__
-                                       org_idir,
-#else
-                                       org,
-#endif
-                                       idir,
-                                       near_x,
-                                       near_y,
-                                       near_z,
-                                       far_x,
-                                       far_y,
-                                       far_z,
-                                       node_addr,
-                                       dist);
-  }
-}
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
deleted file mode 100644
index 9d428c3e1a7..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT qbvh_node_intersect
-#else
-#  define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect_array,
-                                             const uint visibility,
-                                             const uint max_hits,
-                                             uint *num_hits)
-{
-  /* TODO(sergey):
-   *  - Test if pushing distance on the stack helps.
-   * - Likely and unlikely for if() statements.
-   * - Test restrict attribute for pointers.
-   */
-
-  /* Traversal stack in CUDA thread-local memory. */
-  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* Ray parameters in registers. */
-  const float tmax = ray->t;
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  *num_hits = 0;
-  isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
-  int num_hits_in_instance = 0;
-#endif
-
-  ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
-  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
-  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-        (void)inodes;
-
-        if (false
-#ifdef __VISIBILITY_FLAG__
-            || ((__float_as_uint(inodes.x) & visibility) == 0)
-#endif
-#if BVH_FEATURE(BVH_MOTION)
-            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-        ) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        ssef dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            qbvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-          qbvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3]);
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-#ifdef __VISIBILITY_FLAG__
-        if ((__float_as_uint(leaf.z) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-#endif
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          const uint p_type = type & PRIMITIVE_ALL;
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          while (prim_addr < prim_addr2) {
-            kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
-            bool hit;
-
-            /* todo: specialized intersect functions which don't fill in
-             * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
-             * might give a few % performance improvement */
-
-            switch (p_type) {
-              case PRIMITIVE_TRIANGLE: {
-                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
-                break;
-              }
-#if BVH_FEATURE(BVH_MOTION)
-              case PRIMITIVE_MOTION_TRIANGLE: {
-                hit = motion_triangle_intersect(
-                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
-                break;
-              }
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-              case PRIMITIVE_CURVE:
-              case PRIMITIVE_MOTION_CURVE: {
-                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                hit = curve_intersect(
-                    kg, isect_array, P, dir, visibility, object, prim_addr, ray->time, curve_type);
-                break;
-              }
-#endif
-              default: {
-                hit = false;
-                break;
-              }
-            }
-
-            /* Shadow ray early termination. */
-            if (hit) {
-              /* detect if this surface has a shader with transparent shadows */
-
-              /* todo: optimize so primitive visibility flag indicates if
-               * the primitive has a transparent shadow shader? */
-              int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
-              int shader = 0;
-
-#ifdef __HAIR__
-              if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
-#endif
-              {
-                shader = kernel_tex_fetch(__tri_shader, prim);
-              }
-#ifdef __HAIR__
-              else {
-                float4 str = kernel_tex_fetch(__curves, prim);
-                shader = __float_as_int(str.z);
-              }
-#endif
-              int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-
-              /* if no transparent shadows, all light is blocked */
-              if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-                return true;
-              }
-              /* if maximum number of hits reached, block all light */
-              else if (*num_hits == max_hits) {
-                return true;
-              }
-
-              /* move on to next entry in intersections array */
-              isect_array++;
-              (*num_hits)++;
-#if BVH_FEATURE(BVH_INSTANCING)
-              num_hits_in_instance++;
-#endif
-
-              isect_array->t = isect_t;
-            }
-
-            prim_addr++;
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-#  if BVH_FEATURE(BVH_MOTION)
-          isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#  else
-          isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-#  endif
-
-          num_hits_in_instance = 0;
-          isect_array->t = isect_t;
-
-          qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-          tfar = ssef(isect_t);
-#  if BVH_FEATURE(BVH_HAIR)
-          dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-          idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-          P_idir = P * idir;
-          P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-          org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-          node_addr = kernel_tex_fetch(__object_node, object);
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-      if (num_hits_in_instance) {
-        float t_fac;
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-#  else
-        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-#  endif
-        /* Scale isect->t to adjust for instancing. */
-        for (int i = 0; i < num_hits_in_instance; i++) {
-          (isect_array - i - 1)->t *= t_fac;
-        }
-      }
-      else {
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#  else
-        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#  endif
-      }
-
-      isect_t = tmax;
-      isect_array->t = isect_t;
-
-      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = ssef(isect_t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return false;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h
deleted file mode 100644
index f68579b4d69..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_traversal.h
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function, where various features can be
- * enabled/disabled. This way we can compile optimized versions for each case
- * without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_HAIR: hair curve rendering
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT qbvh_node_intersect
-#else
-#  define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect,
-                                             const uint visibility)
-{
-  /* TODO(sergey):
-   * - Test if pushing distance on the stack helps (for non shadow rays).
-   * - Separate version for shadow rays.
-   * - Likely and unlikely for if() statements.
-   * - Test restrict attribute for pointers.
-   */
-
-  /* Traversal stack in CUDA thread-local memory. */
-  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-  traversal_stack[0].dist = -FLT_MAX;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-  float node_dist = -FLT_MAX;
-
-  /* Ray parameters in registers. */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  isect->t = ray->t;
-  isect->u = 0.0f;
-  isect->v = 0.0f;
-  isect->prim = PRIM_NONE;
-  isect->object = OBJECT_NONE;
-
-  BVH_DEBUG_INIT();
-
-  ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
-  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
-  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-        (void)inodes;
-
-        if (UNLIKELY(node_dist > isect->t)
-#if BVH_FEATURE(BVH_MOTION)
-            || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
-#endif
-#ifdef __VISIBILITY_FLAG__
-            || (__float_as_uint(inodes.x) & visibility) == 0
-#endif
-        ) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-          continue;
-        }
-
-        int child_mask;
-        ssef dist;
-
-        BVH_DEBUG_NEXT_NODE();
-
-        {
-          child_mask = NODE_INTERSECT(kg,
-                                      tnear,
-                                      tfar,
-#ifdef __KERNEL_AVX2__
-                                      P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                      org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                      dir4,
-#endif
-                                      idir4,
-                                      near_x,
-                                      near_y,
-                                      near_z,
-                                      far_x,
-                                      far_y,
-                                      far_z,
-                                      node_addr,
-                                      &dist);
-        }
-
-        if (child_mask != 0) {
-          float4 cnodes;
-          /* TODO(sergey): Investigate whether moving cnodes upwards
-           * gives a speedup (will be different cache pattern but will
-           * avoid extra check here).
-           */
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          float d0 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            node_dist = d0;
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              node_dist = d1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              node_dist = d0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            qbvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            node_dist = traversal_stack[stack_ptr].dist;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-          qbvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3]);
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        node_dist = traversal_stack[stack_ptr].dist;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-#ifdef __VISIBILITY_FLAG__
-        if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
-#else
-        if (UNLIKELY((node_dist > isect->t)))
-#endif
-        {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-          continue;
-        }
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          node_dist = traversal_stack[stack_ptr].dist;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          switch (type & PRIMITIVE_ALL) {
-            case PRIMITIVE_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                BVH_DEBUG_NEXT_INTERSECTION();
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
-                  tfar = ssef(isect->t);
-                  /* Shadow ray early termination. */
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-                    return true;
-                  }
-                }
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                BVH_DEBUG_NEXT_INTERSECTION();
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                if (motion_triangle_intersect(
-                        kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
-                  tfar = ssef(isect->t);
-                  /* Shadow ray early termination. */
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-                    return true;
-                  }
-                }
-              }
-              break;
-            }
-#endif /* BVH_FEATURE(BVH_MOTION) */
-#if BVH_FEATURE(BVH_HAIR)
-            case PRIMITIVE_CURVE:
-            case PRIMITIVE_MOTION_CURVE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                BVH_DEBUG_NEXT_INTERSECTION();
-                const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
-                kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
-                bool hit = curve_intersect(
-                    kg, isect, P, dir, visibility, object, prim_addr, ray->time, curve_type);
-                if (hit) {
-                  tfar = ssef(isect->t);
-                  /* Shadow ray early termination. */
-                  if (visibility & PATH_RAY_SHADOW_OPAQUE) {
-                    return true;
-                  }
-                }
-              }
-              break;
-            }
-#endif /* BVH_FEATURE(BVH_HAIR) */
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-
-#  if BVH_FEATURE(BVH_MOTION)
-          qbvh_instance_motion_push(
-              kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
-#  else
-          qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
-#  endif
-
-          qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-          tfar = ssef(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-          dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-          idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-          P_idir = P * idir;
-          P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-          org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-          traversal_stack[stack_ptr].dist = -FLT_MAX;
-
-          node_addr = kernel_tex_fetch(__object_node, object);
-
-          BVH_DEBUG_NEXT_INSTANCE();
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-#  if BVH_FEATURE(BVH_MOTION)
-      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#  else
-      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#  endif
-
-      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = ssef(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      node_dist = traversal_stack[stack_ptr].dist;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
deleted file mode 100644
index e4eaed04467..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT qbvh_node_intersect
-#else
-#  define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect,
-                                             const uint visibility)
-{
-  /* TODO(sergey):
-   * - Test if pushing distance on the stack helps.
-   * - Likely and unlikely for if() statements.
-   * - Test restrict attribute for pointers.
-   */
-
-  /* Traversal stack in CUDA thread-local memory. */
-  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* Ray parameters in registers. */
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  isect->t = ray->t;
-  isect->u = 0.0f;
-  isect->v = 0.0f;
-  isect->prim = PRIM_NONE;
-  isect->object = OBJECT_NONE;
-
-  ssef tnear(0.0f), tfar(ray->t);
-#if BVH_FEATURE(BVH_HAIR)
-  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
-  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
-        if ((__float_as_uint(inodes.x) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-#endif
-
-        ssef dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            qbvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-          qbvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3]);
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-        if ((__float_as_uint(leaf.z) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          const uint p_type = type & PRIMITIVE_ALL;
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          switch (p_type) {
-            case PRIMITIVE_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                motion_triangle_intersect(
-                    kg, isect, P, dir, ray->time, visibility, object, prim_addr);
-              }
-              break;
-            }
-#endif
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-          int object_flag = kernel_tex_fetch(__object_flag, object);
-          if (object_flag & SD_OBJECT_HAS_VOLUME) {
-#  if BVH_FEATURE(BVH_MOTION)
-            isect->t = bvh_instance_motion_push(
-                kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#  else
-            isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
-#  endif
-
-            qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-            tfar = ssef(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-            dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-            idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-            P_idir = P * idir;
-            P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-            org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-            node_addr = kernel_tex_fetch(__object_node, object);
-          }
-          else {
-            /* Pop. */
-            object = OBJECT_NONE;
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-          }
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-#  if BVH_FEATURE(BVH_MOTION)
-      isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#  else
-      isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#  endif
-
-      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = ssef(isect->t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return (isect->prim != PRIM_NONE);
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
deleted file mode 100644
index eddc48c487e..00000000000
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* This is a template BVH traversal function for volumes, where
- * various features can be enabled/disabled. This way we can compile optimized
- * versions for each case without new features slowing things down.
- *
- * BVH_INSTANCING: object instancing
- * BVH_MOTION: motion blur rendering
- */
-
-#if BVH_FEATURE(BVH_HAIR)
-#  define NODE_INTERSECT qbvh_node_intersect
-#else
-#  define NODE_INTERSECT qbvh_aligned_node_intersect
-#endif
-
-ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
-                                             const Ray *ray,
-                                             Intersection *isect_array,
-                                             const uint max_hits,
-                                             const uint visibility)
-{
-  /* TODO(sergey):
-   * - Test if pushing distance on the stack helps.
-   * - Likely and unlikely for if() statements.
-   * - Test restrict attribute for pointers.
-   */
-
-  /* Traversal stack in CUDA thread-local memory. */
-  QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
-  traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
-
-  /* Traversal variables in registers. */
-  int stack_ptr = 0;
-  int node_addr = kernel_data.bvh.root;
-
-  /* Ray parameters in registers. */
-  const float tmax = ray->t;
-  float3 P = ray->P;
-  float3 dir = bvh_clamp_direction(ray->D);
-  float3 idir = bvh_inverse_direction(dir);
-  int object = OBJECT_NONE;
-  float isect_t = tmax;
-
-#if BVH_FEATURE(BVH_MOTION)
-  Transform ob_itfm;
-#endif
-
-  uint num_hits = 0;
-  isect_array->t = tmax;
-
-#if BVH_FEATURE(BVH_INSTANCING)
-  int num_hits_in_instance = 0;
-#endif
-
-  ssef tnear(0.0f), tfar(isect_t);
-#if BVH_FEATURE(BVH_HAIR)
-  sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#endif
-  sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-
-#ifdef __KERNEL_AVX2__
-  float3 P_idir = P * idir;
-  sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-  sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
-#endif
-
-  /* Offsets to select the side that becomes the lower or upper bound. */
-  int near_x, near_y, near_z;
-  int far_x, far_y, far_z;
-  qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-
-  /* Traversal loop. */
-  do {
-    do {
-      /* Traverse internal nodes. */
-      while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
-        float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
-
-#ifdef __VISIBILITY_FLAG__
-        if ((__float_as_uint(inodes.x) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-#endif
-
-        ssef dist;
-        int child_mask = NODE_INTERSECT(kg,
-                                        tnear,
-                                        tfar,
-#ifdef __KERNEL_AVX2__
-                                        P_idir4,
-#endif
-#if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-                                        org4,
-#endif
-#if BVH_FEATURE(BVH_HAIR)
-                                        dir4,
-#endif
-                                        idir4,
-                                        near_x,
-                                        near_y,
-                                        near_z,
-                                        far_x,
-                                        far_y,
-                                        far_z,
-                                        node_addr,
-                                        &dist);
-
-        if (child_mask != 0) {
-          float4 cnodes;
-#if BVH_FEATURE(BVH_HAIR)
-          if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
-          }
-          else
-#endif
-          {
-            cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
-          }
-
-          /* One child is hit, continue with that child. */
-          int r = __bscf(child_mask);
-          if (child_mask == 0) {
-            node_addr = __float_as_int(cnodes[r]);
-            continue;
-          }
-
-          /* Two children are hit, push far child, and continue with
-           * closer child.
-           */
-          int c0 = __float_as_int(cnodes[r]);
-          float d0 = ((float *)&dist)[r];
-          r = __bscf(child_mask);
-          int c1 = __float_as_int(cnodes[r]);
-          float d1 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            if (d1 < d0) {
-              node_addr = c1;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c0;
-              traversal_stack[stack_ptr].dist = d0;
-              continue;
-            }
-            else {
-              node_addr = c0;
-              ++stack_ptr;
-              kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-              traversal_stack[stack_ptr].addr = c1;
-              traversal_stack[stack_ptr].dist = d1;
-              continue;
-            }
-          }
-
-          /* Here starts the slow path for 3 or 4 hit children. We push
-           * all nodes onto the stack to sort them there.
-           */
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c1;
-          traversal_stack[stack_ptr].dist = d1;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c0;
-          traversal_stack[stack_ptr].dist = d0;
-
-          /* Three children are hit, push all onto stack and sort 3
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c2 = __float_as_int(cnodes[r]);
-          float d2 = ((float *)&dist)[r];
-          if (child_mask == 0) {
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = c2;
-            traversal_stack[stack_ptr].dist = d2;
-            qbvh_stack_sort(&traversal_stack[stack_ptr],
-                            &traversal_stack[stack_ptr - 1],
-                            &traversal_stack[stack_ptr - 2]);
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-            continue;
-          }
-
-          /* Four children are hit, push all onto stack and sort 4
-           * stack items, continue with closest child.
-           */
-          r = __bscf(child_mask);
-          int c3 = __float_as_int(cnodes[r]);
-          float d3 = ((float *)&dist)[r];
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c3;
-          traversal_stack[stack_ptr].dist = d3;
-          ++stack_ptr;
-          kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-          traversal_stack[stack_ptr].addr = c2;
-          traversal_stack[stack_ptr].dist = d2;
-          qbvh_stack_sort(&traversal_stack[stack_ptr],
-                          &traversal_stack[stack_ptr - 1],
-                          &traversal_stack[stack_ptr - 2],
-                          &traversal_stack[stack_ptr - 3]);
-        }
-
-        node_addr = traversal_stack[stack_ptr].addr;
-        --stack_ptr;
-      }
-
-      /* If node is leaf, fetch triangle list. */
-      if (node_addr < 0) {
-        float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
-
-        if ((__float_as_uint(leaf.z) & visibility) == 0) {
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-          continue;
-        }
-
-        int prim_addr = __float_as_int(leaf.x);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-        if (prim_addr >= 0) {
-#endif
-          int prim_addr2 = __float_as_int(leaf.y);
-          const uint type = __float_as_int(leaf.w);
-          const uint p_type = type & PRIMITIVE_ALL;
-          bool hit;
-
-          /* Pop. */
-          node_addr = traversal_stack[stack_ptr].addr;
-          --stack_ptr;
-
-          /* Primitive intersection. */
-          switch (p_type) {
-            case PRIMITIVE_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
-                if (hit) {
-                  /* Move on to next entry in intersections array. */
-                  isect_array++;
-                  num_hits++;
-#if BVH_FEATURE(BVH_INSTANCING)
-                  num_hits_in_instance++;
-#endif
-                  isect_array->t = isect_t;
-                  if (num_hits == max_hits) {
-#if BVH_FEATURE(BVH_INSTANCING)
-                    if (object != OBJECT_NONE) {
-#  if BVH_FEATURE(BVH_MOTION)
-                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#  else
-                      Transform itfm = object_fetch_transform(
-                          kg, object, OBJECT_INVERSE_TRANSFORM);
-                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#  endif
-                      for (int i = 0; i < num_hits_in_instance; i++) {
-                        (isect_array - i - 1)->t *= t_fac;
-                      }
-                    }
-#endif /* BVH_FEATURE(BVH_INSTANCING) */
-                    return num_hits;
-                  }
-                }
-              }
-              break;
-            }
-#if BVH_FEATURE(BVH_MOTION)
-            case PRIMITIVE_MOTION_TRIANGLE: {
-              for (; prim_addr < prim_addr2; prim_addr++) {
-                kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
-                /* Only primitives from volume object. */
-                uint tri_object = (object == OBJECT_NONE) ?
-                                      kernel_tex_fetch(__prim_object, prim_addr) :
-                                      object;
-                int object_flag = kernel_tex_fetch(__object_flag, tri_object);
-                if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
-                  continue;
-                }
-                /* Intersect ray against primitive. */
-                hit = motion_triangle_intersect(
-                    kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
-                if (hit) {
-                  /* Move on to next entry in intersections array. */
-                  isect_array++;
-                  num_hits++;
-#  if BVH_FEATURE(BVH_INSTANCING)
-                  num_hits_in_instance++;
-#  endif
-                  isect_array->t = isect_t;
-                  if (num_hits == max_hits) {
-#  if BVH_FEATURE(BVH_INSTANCING)
-                    if (object != OBJECT_NONE) {
-#    if BVH_FEATURE(BVH_MOTION)
-                      float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#    else
-                      Transform itfm = object_fetch_transform(
-                          kg, object, OBJECT_INVERSE_TRANSFORM);
-                      float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#    endif
-                      for (int i = 0; i < num_hits_in_instance; i++) {
-                        (isect_array - i - 1)->t *= t_fac;
-                      }
-                    }
-#  endif /* BVH_FEATURE(BVH_INSTANCING) */
-                    return num_hits;
-                  }
-                }
-              }
-              break;
-            }
-#endif
-          }
-        }
-#if BVH_FEATURE(BVH_INSTANCING)
-        else {
-          /* Instance push. */
-          object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
-          int object_flag = kernel_tex_fetch(__object_flag, object);
-          if (object_flag & SD_OBJECT_HAS_VOLUME) {
-#  if BVH_FEATURE(BVH_MOTION)
-            isect_t = bvh_instance_motion_push(
-                kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
-#  else
-            isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
-#  endif
-
-            qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-            tfar = ssef(isect_t);
-            idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  if BVH_FEATURE(BVH_HAIR)
-            dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-#  ifdef __KERNEL_AVX2__
-            P_idir = P * idir;
-            P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-            org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-            num_hits_in_instance = 0;
-            isect_array->t = isect_t;
-
-            ++stack_ptr;
-            kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
-            traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
-
-            node_addr = kernel_tex_fetch(__object_node, object);
-          }
-          else {
-            /* Pop. */
-            object = OBJECT_NONE;
-            node_addr = traversal_stack[stack_ptr].addr;
-            --stack_ptr;
-          }
-        }
-      }
-#endif /* FEATURE(BVH_INSTANCING) */
-    } while (node_addr != ENTRYPOINT_SENTINEL);
-
-#if BVH_FEATURE(BVH_INSTANCING)
-    if (stack_ptr >= 0) {
-      kernel_assert(object != OBJECT_NONE);
-
-      /* Instance pop. */
-      if (num_hits_in_instance) {
-        float t_fac;
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-#  else
-        bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-#  endif
-        /* Scale isect->t to adjust for instancing. */
-        for (int i = 0; i < num_hits_in_instance; i++) {
-          (isect_array - i - 1)->t *= t_fac;
-        }
-      }
-      else {
-#  if BVH_FEATURE(BVH_MOTION)
-        bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#  else
-        bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#  endif
-      }
-
-      isect_t = tmax;
-      isect_array->t = isect_t;
-
-      qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
-      tfar = ssef(isect_t);
-#  if BVH_FEATURE(BVH_HAIR)
-      dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
-#  endif
-      idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
-#  ifdef __KERNEL_AVX2__
-      P_idir = P * idir;
-      P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
-#  endif
-#  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
-      org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
-#  endif
-
-      object = OBJECT_NONE;
-      node_addr = traversal_stack[stack_ptr].addr;
-      --stack_ptr;
-    }
-#endif /* FEATURE(BVH_INSTANCING) */
-  } while (node_addr != ENTRYPOINT_SENTINEL);
-
-  return num_hits;
-}
-
-#undef NODE_INTERSECT
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index 88963bea6ef..87ed0bf201f 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -18,13 +18,6 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __HAIR__
 
-#  ifdef __KERNEL_SSE2__
-ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a)
-{
-  return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2]));
-}
-#  endif
-
 /* On CPU pass P and dir by reference to aligned vector. */
 ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
                                             Intersection *isect,
@@ -55,108 +48,6 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
   int flags = kernel_data.curve.curveflags;
   int prim = kernel_tex_fetch(__prim_index, curveAddr);
 
-#  ifdef __KERNEL_SSE2__
-  ssef vdir = load4f(dir);
-  ssef vcurve_coef[4];
-  const float3 *curve_coef = (float3 *)vcurve_coef;
-
-  {
-    ssef dtmp = vdir * vdir;
-    ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp));
-    ssef rd_ss = load1f_first(1.0f) / d_ss;
-
-    ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]);
-    int2 &v00 = (int2 &)v00vec;
-
-    int k0 = v00.x + segment;
-    int k1 = k0 + 1;
-    int ka = max(k0 - 1, v00.x);
-    int kb = min(k1 + 1, v00.x + v00.y - 1);
-
-#    if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
-        (!defined(_MSC_VER) || _MSC_VER > 1800)
-    avxf P_curve_0_1, P_curve_2_3;
-    if (is_curve_primitive) {
-      P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
-      P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
-    }
-    else {
-      int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
-      motion_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3);
-    }
-#    else  /* __KERNEL_AVX2__ */
-    ssef P_curve[4];
-
-    if (is_curve_primitive) {
-      P_curve[0] = load4f(&kg->__curve_keys.data[ka].x);
-      P_curve[1] = load4f(&kg->__curve_keys.data[k0].x);
-      P_curve[2] = load4f(&kg->__curve_keys.data[k1].x);
-      P_curve[3] = load4f(&kg->__curve_keys.data[kb].x);
-    }
-    else {
-      int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
-      motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve);
-    }
-#    endif /* __KERNEL_AVX2__ */
-
-    ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
-    ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
-    ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy;
-    ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
-    ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0));
-
-    ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
-    ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
-    ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-
-#    if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \
-        (!defined(_MSC_VER) || _MSC_VER > 1800)
-    const avxf vPP = _mm256_broadcast_ps(&P.m128);
-    const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
-    const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
-    const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
-
-    const avxf p01 = madd(
-        shuffle<0>(P_curve_0_1 - vPP),
-        htfm00,
-        madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22));
-    const avxf p23 = madd(
-        shuffle<0>(P_curve_2_3 - vPP),
-        htfm00,
-        madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22));
-
-    const ssef p0 = _mm256_castps256_ps128(p01);
-    const ssef p1 = _mm256_extractf128_ps(p01, 1);
-    const ssef p2 = _mm256_castps256_ps128(p23);
-    const ssef p3 = _mm256_extractf128_ps(p23, 1);
-
-    const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
-    r_st = ((float4 &)P_curve_1).w;
-    const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
-    r_en = ((float4 &)P_curve_2).w;
-#    else  /* __KERNEL_AVX2__ */
-    ssef htfm[] = {htfm0, htfm1, htfm2};
-    ssef vP = load4f(P);
-    ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
-    ssef p1 = transform_point_T3(htfm, P_curve[1] - vP);
-    ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
-    ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
-
-    r_st = ((float4 &)P_curve[1]).w;
-    r_en = ((float4 &)P_curve[2]).w;
-#    endif /* __KERNEL_AVX2__ */
-
-    float fc = 0.71f;
-    ssef vfc = ssef(fc);
-    ssef vfcxp3 = vfc * p3;
-
-    vcurve_coef[0] = p1;
-    vcurve_coef[1] = vfc * (p2 - p0);
-    vcurve_coef[2] = madd(
-        ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
-    vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
-  }
-#  else
   float3 curve_coef[4];
 
   /* curve Intersection check */
@@ -212,7 +103,6 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
     r_st = P_curve[1].w;
     r_en = P_curve[2].w;
   }
-#  endif
 
   float r_curr = max(r_st, r_en);
 
@@ -275,23 +165,6 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
     const float i_st = tree * resol;
     const float i_en = i_st + (level * resol);
 
-#  ifdef __KERNEL_SSE2__
-    ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
-    ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]),
-                      vi_st,
-                      vcurve_coef[0]);
-    ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]),
-                      vi_en,
-                      vcurve_coef[0]);
-
-    ssef vbmin = min(vp_st, vp_en);
-    ssef vbmax = max(vp_st, vp_en);
-
-    float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
-    float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
-    float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
-    float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
-#  else
     float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st +
                   curve_coef[0];
     float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en +
@@ -303,7 +176,6 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
     float bmaxy = max(p_st.y, p_en.y);
     float bminz = min(p_st.z, p_en.z);
     float bmaxz = max(p_st.z, p_en.z);
-#  endif
 
     if (xextrem[0] >= i_st && xextrem[0] <= i_en) {
       bminx = min(bminx, xextrem[1]);
@@ -351,23 +223,13 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg,
 
       if (flags & CURVE_KN_RIBBONS) {
         float3 tg = (p_en - p_st);
-#  ifdef __KERNEL_SSE__
-        const float3 tg_sq = tg * tg;
-        float w = tg_sq.x + tg_sq.y;
-#  else
         float w = tg.x * tg.x + tg.y * tg.y;
-#  endif
         if (w == 0) {
           tree++;
           level = tree & -tree;
           continue;
         }
-#  ifdef __KERNEL_SSE__
-        const float3 p_sttg = p_st * tg;
-        w = -(p_sttg.x + p_sttg.y) / w;
-#  else
         w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
-#  endif
         w = saturate(w);
 
         /* compute u on the curve segment */
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index dd7429c02bd..0e7a05eaac2 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -106,15 +106,15 @@ ccl_device_inline void motion_curve_keys(
 }
 
 ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
-                                                           int offset,
-                                                           int numkeys,
-                                                           int numsteps,
-                                                           int step,
-                                                           int k0,
-                                                           int k1,
-                                                           int k2,
-                                                           int k3,
-                                                           float4 keys[4])
+                                                  int offset,
+                                                  int numkeys,
+                                                  int numsteps,
+                                                  int step,
+                                                  int k0,
+                                                  int k1,
+                                                  int k2,
+                                                  int k3,
+                                                  float4 keys[4])
 {
   if (step == numsteps) {
     /* center step: regular key location */
@@ -139,14 +139,14 @@ ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg,
 
 /* return 2 curve key locations */
 ccl_device_inline void motion_curve_keys(KernelGlobals *kg,
-                                                  int object,
-                                                  int prim,
-                                                  float time,
-                                                  int k0,
-                                                  int k1,
-                                                  int k2,
-                                                  int k3,
-                                                  float4 keys[4])
+                                         int object,
+                                         int prim,
+                                         float time,
+                                         int k0,
+                                         int k1,
+                                         int k2,
+                                         int k3,
+                                         float4 keys[4])
 {
   /* get motion info */
   int numsteps, numkeys;
@@ -166,8 +166,7 @@ ccl_device_inline void motion_curve_keys(KernelGlobals *kg,
   float4 next_keys[4];
 
   motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
-  motion_curve_keys_for_step(
-      kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
+  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
 
   /* interpolate between steps */
   keys[0] = (1.0f - t) * keys[0] + t * next_keys[0];
@@ -176,53 +175,6 @@ ccl_device_inline void motion_curve_keys(KernelGlobals *kg,
   keys[3] = (1.0f - t) * keys[3] + t * next_keys[3];
 }
 
-#  if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
-/* Similar to above, but returns keys as pair of two AVX registers with each
- * holding two float4.
- */
-ccl_device_inline void motion_curve_keys_avx(KernelGlobals *kg,
-                                                      int object,
-                                                      int prim,
-                                                      float time,
-                                                      int k0,
-                                                      int k1,
-                                                      int k2,
-                                                      int k3,
-                                                      avxf *out_keys_0_1,
-                                                      avxf *out_keys_2_3)
-{
-  /* Get motion info. */
-  int numsteps, numkeys;
-  object_motion_info(kg, object, &numsteps, NULL, &numkeys);
-
-  /* Figure out which steps we need to fetch and their interpolation factor. */
-  int maxstep = numsteps * 2;
-  int step = min((int)(time * maxstep), maxstep - 1);
-  float t = time * maxstep - step;
-
-  /* Find attribute. */
-  AttributeElement elem;
-  int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
-  kernel_assert(offset != ATTR_STD_NOT_FOUND);
-
-  /* Fetch key coordinates. */
-  float4 next_keys[4];
-  float4 keys[4];
-  motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
-  motion_curve_keys_for_step(
-      kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys);
-
-  const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
-  const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
-  const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
-  const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
-
-  /* Interpolate between steps. */
-  *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1;
-  *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3;
-}
-#  endif
-
 #endif
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 3aa68e1f84e..614e2e3b92b 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -411,25 +411,10 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
 
 ccl_device_inline float3 bvh_clamp_direction(float3 dir)
 {
-  /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse
-   * direction */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
-  const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f);
-  const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes);
-  const ssef signdir = signmsk(dir.m128) | oopes;
-#  ifndef __KERNEL_AVX__
-  ssef res = mask & ssef(dir);
-  res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir));
-#  else
-  ssef res = _mm_blendv_ps(signdir, dir, mask);
-#  endif
-  return float3(res);
-#else  /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
   const float ooeps = 8.271806E-25f;
   return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
                      (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
                      (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
-#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */
 }
 
 ccl_device_inline float3 bvh_inverse_direction(float3 dir)
@@ -457,38 +442,6 @@ ccl_device_inline float bvh_instance_push(
   return t;
 }
 
-#ifdef __QBVH__
-/* Same as above, but optimized for QBVH scene intersection,
- * which needs to modify two max distances.
- *
- * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
- * so we can avoid having this duplication.
- */
-ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
-                                          int object,
-                                          const Ray *ray,
-                                          float3 *P,
-                                          float3 *dir,
-                                          float3 *idir,
-                                          float *t,
-                                          float *t1)
-{
-  Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
-  *P = transform_point(&tfm, ray->P);
-
-  float len;
-  *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
-  *idir = bvh_inverse_direction(*dir);
-
-  if (*t != FLT_MAX)
-    *t *= len;
-
-  if (*t1 != -FLT_MAX)
-    *t1 *= len;
-}
-#endif
-
 /* Transorm ray to exit static object in BVH */
 
 ccl_device_inline float bvh_instance_pop(
@@ -551,39 +504,6 @@ ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
   return t;
 }
 
-#  ifdef __QBVH__
-/* Same as above, but optimized for QBVH scene intersection,
- * which needs to modify two max distances.
- *
- * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
- * so we can avoid having this duplication.
- */
-ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
-                                                 int object,
-                                                 const Ray *ray,
-                                                 float3 *P,
-                                                 float3 *dir,
-                                                 float3 *idir,
-                                                 float *t,
-                                                 float *t1,
-                                                 Transform *itfm)
-{
-  object_fetch_transform_motion_test(kg, object, ray->time, itfm);
-
-  *P = transform_point(itfm, ray->P);
-
-  float len;
-  *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
-  *idir = bvh_inverse_direction(*dir);
-
-  if (*t != FLT_MAX)
-    *t *= len;
-
-  if (*t1 != -FLT_MAX)
-    *t1 *= len;
-}
-#  endif
-
 /* Transorm ray to exit motion blurred object in BVH */
 
 ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 6604806f73b..b0cce274b94 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -71,433 +71,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
   return false;
 }
 
-#ifdef __KERNEL_AVX2__
-#  define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D))
-ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg,
-                                              float3 ray_P,
-                                              float3 ray_dir,
-                                              Intersection **isect,
-                                              uint visibility,
-                                              int object,
-                                              __m256 *triA,
-                                              __m256 *triB,
-                                              __m256 *triC,
-                                              int prim_addr,
-                                              int prim_num,
-                                              uint *num_hits,
-                                              uint max_hits,
-                                              int *num_hits_in_instance,
-                                              float isect_t)
-{
-
-  const unsigned char prim_num_mask = (1 << prim_num) - 1;
-
-  const __m256i zero256 = _mm256_setzero_si256();
-
-  const __m256 Px256 = _mm256_set1_ps(ray_P.x);
-  const __m256 Py256 = _mm256_set1_ps(ray_P.y);
-  const __m256 Pz256 = _mm256_set1_ps(ray_P.z);
-
-  const __m256 dirx256 = _mm256_set1_ps(ray_dir.x);
-  const __m256 diry256 = _mm256_set1_ps(ray_dir.y);
-  const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
-
-  /* Calculate vertices relative to ray origin. */
-  __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
-  __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
-  __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
-
-  __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256);
-  __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256);
-  __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256);
-
-  __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256);
-  __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256);
-  __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256);
-
-  __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256);
-  __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256);
-  __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256);
-
-  __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256);
-  __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256);
-  __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256);
-
-  __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256);
-  __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
-  __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
-
-  /* Calculate triangle edges. */
-  __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
-  __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
-  __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
-
-  __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256);
-  __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256);
-  __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256);
-
-  __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256);
-  __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
-  __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
-
-  /* Perform edge tests. */
-  /* cross (AyBz - AzBy, AzBx -AxBz,  AxBy - AyBx) */
-  __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
-  __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
-  __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
-  /* vertical dot */
-  __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
-  U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
-  U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
-
-  __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
-  __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
-  __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
-  /* vertical dot */
-  __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
-  V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
-  V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
-
-  __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
-  __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
-  __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
-  /* vertical dot */
-  __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
-  W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256);
-  W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256);
-
-  __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
-  __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
-  __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
-  __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1);
-
-  const __m256i one256 = _mm256_set1_epi32(1);
-  const __m256i two256 = _mm256_set1_epi32(2);
-
-  __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1),
-                                               _mm256_cmpeq_epi32(two256, UVW_256_1));
-
-  unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
-  if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) {  // all bits set
-    return false;
-  }
-
-  /* Calculate geometry normal and denominator. */
-  __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
-  __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
-  __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
-
-  Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
-  Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
-  Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
-
-  /* vertical dot */
-  __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
-  den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256);
-  den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256);
-
-  /* Perform depth test. */
-  __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
-  T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256);
-  T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256);
-
-  const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
-  __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
-
-  __m256 sign_T_256 = _mm256_castsi256_ps(
-      _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
-
-  unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
-  if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
-    return false;
-  }
-
-  __m256 xor_signmask_256 = _mm256_castsi256_ps(
-      _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
-
-  ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
-  ccl_align(32) unsigned int mask_minmaxUVW8[8];
-
-  if (visibility == PATH_RAY_SHADOW_OPAQUE) {
-    __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
-    __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
-    __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
-    __m256 rayt_256 = _mm256_set1_ps((*isect)->t);
-    __m256i mask1 = _mm256_cmpgt_epi32(
-        _mm256_castps_si256(sign_T_256),
-        _mm256_castps_si256(_mm256_mul_ps(
-            _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)),
-            rayt_256)));
-    mask0 = _mm256_or_si256(mask1, mask0);
-    mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256);  //(~mask_minmaxUVW_pos) &(~mask)
-    mask_final_256 = _mm256_andnot_si256(
-        maskden256, mask_final_256);  //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
-    int mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
-    if ((mask_final & prim_num_mask) == 0) {
-      return false;
-    }
-    while (mask_final != 0) {
-      const int i = __bscf(mask_final);
-      if (i >= prim_num) {
-        return false;
-      }
-#  ifdef __VISIBILITY_FLAG__
-      if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
-        continue;
-      }
-#  endif
-      __m256 inv_den_256 = _mm256_rcp_ps(den_256);
-      U_256 = _mm256_mul_ps(U_256, inv_den_256);
-      V_256 = _mm256_mul_ps(V_256, inv_den_256);
-      T_256 = _mm256_mul_ps(T_256, inv_den_256);
-      _mm256_store_ps(U8, U_256);
-      _mm256_store_ps(V8, V_256);
-      _mm256_store_ps(T8, T_256);
-      (*isect)->u = U8[i];
-      (*isect)->v = V8[i];
-      (*isect)->t = T8[i];
-      (*isect)->prim = (prim_addr + i);
-      (*isect)->object = object;
-      (*isect)->type = PRIMITIVE_TRIANGLE;
-      return true;
-    }
-    return false;
-  }
-  else {
-    _mm256_store_ps(den8, den_256);
-    _mm256_store_ps(U8, U_256);
-    _mm256_store_ps(V8, V_256);
-    _mm256_store_ps(T8, T_256);
-
-    _mm256_store_ps(sign_T8, sign_T_256);
-    _mm256_store_ps(xor_signmask8, xor_signmask_256);
-    _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256);
-
-    int ret = false;
-
-    if (visibility == PATH_RAY_SHADOW) {
-      for (int i = 0; i < prim_num; i++) {
-        if (mask_minmaxUVW8[i]) {
-          continue;
-        }
-#  ifdef __VISIBILITY_FLAG__
-        if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
-          continue;
-        }
-#  endif
-        if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
-          continue;
-        }
-        if (!den8[i]) {
-          continue;
-        }
-        const float inv_den = 1.0f / den8[i];
-        (*isect)->u = U8[i] * inv_den;
-        (*isect)->v = V8[i] * inv_den;
-        (*isect)->t = T8[i] * inv_den;
-        (*isect)->prim = (prim_addr + i);
-        (*isect)->object = object;
-        (*isect)->type = PRIMITIVE_TRIANGLE;
-        const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
-        int shader = 0;
-#  ifdef __HAIR__
-        if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
-#  endif
-        {
-          shader = kernel_tex_fetch(__tri_shader, prim);
-        }
-#  ifdef __HAIR__
-        else {
-          float4 str = kernel_tex_fetch(__curves, prim);
-          shader = __float_as_int(str.z);
-        }
-#  endif
-        const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
-        /* If no transparent shadows, all light is blocked. */
-        if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
-          return 2;
-        }
-        /* If maximum number of hits reached, block all light. */
-        else if (num_hits == NULL || *num_hits == max_hits) {
-          return 2;
-        }
-        /* Move on to next entry in intersections array. */
-        ret = true;
-        (*isect)++;
-        (*num_hits)++;
-        (*num_hits_in_instance)++;
-        (*isect)->t = isect_t;
-      }
-    }
-    else {
-      for (int i = 0; i < prim_num; i++) {
-        if (mask_minmaxUVW8[i]) {
-          continue;
-        }
-#  ifdef __VISIBILITY_FLAG__
-        if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
-          continue;
-        }
-#  endif
-        if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) {
-          continue;
-        }
-        if (!den8[i]) {
-          continue;
-        }
-        const float inv_den = 1.0f / den8[i];
-        (*isect)->u = U8[i] * inv_den;
-        (*isect)->v = V8[i] * inv_den;
-        (*isect)->t = T8[i] * inv_den;
-        (*isect)->prim = (prim_addr + i);
-        (*isect)->object = object;
-        (*isect)->type = PRIMITIVE_TRIANGLE;
-        ret = true;
-      }
-    }
-    return ret;
-  }
-}
-
-ccl_device_inline int triangle_intersect8(KernelGlobals *kg,
-                                          Intersection **isect,
-                                          float3 P,
-                                          float3 dir,
-                                          uint visibility,
-                                          int object,
-                                          int prim_addr,
-                                          int prim_num,
-                                          uint *num_hits,
-                                          uint max_hits,
-                                          int *num_hits_in_instance,
-                                          float isect_t)
-{
-  __m128 tri_a[8], tri_b[8], tri_c[8];
-  __m256 tritmp[12], tri[12];
-  __m256 triA[3], triB[3], triC[3];
-
-  int i, r;
-
-  uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
-  for (i = 0; i < prim_num; i++) {
-    tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
-    tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
-    tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++];
-  }
-  // create 9 or  12 placeholders
-  tri[0] = _mm256_castps128_ps256(tri_a[0]);  //_mm256_zextps128_ps256
-  tri[1] = _mm256_castps128_ps256(tri_b[0]);  //_mm256_zextps128_ps256
-  tri[2] = _mm256_castps128_ps256(tri_c[0]);  //_mm256_zextps128_ps256
-
-  tri[3] = _mm256_castps128_ps256(tri_a[1]);  //_mm256_zextps128_ps256
-  tri[4] = _mm256_castps128_ps256(tri_b[1]);  //_mm256_zextps128_ps256
-  tri[5] = _mm256_castps128_ps256(tri_c[1]);  //_mm256_zextps128_ps256
-
-  tri[6] = _mm256_castps128_ps256(tri_a[2]);  //_mm256_zextps128_ps256
-  tri[7] = _mm256_castps128_ps256(tri_b[2]);  //_mm256_zextps128_ps256
-  tri[8] = _mm256_castps128_ps256(tri_c[2]);  //_mm256_zextps128_ps256
-
-  if (prim_num > 3) {
-    tri[9] = _mm256_castps128_ps256(tri_a[3]);   //_mm256_zextps128_ps256
-    tri[10] = _mm256_castps128_ps256(tri_b[3]);  //_mm256_zextps128_ps256
-    tri[11] = _mm256_castps128_ps256(tri_c[3]);  //_mm256_zextps128_ps256
-  }
-
-  for (i = 4, r = 0; i < prim_num; i++, r += 3) {
-    tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1);
-    tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1);
-    tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1);
-  }
-
-  //------------------------------------------------
-  // 0!  Xa0 Ya0 Za0 1 Xa4 Ya4 Za4  1
-  // 1!  Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1
-  // 2!  Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1
-
-  // 3!  Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1
-  // 4!  Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5  1
-  // 5!  Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1
-
-  // 6!  Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1
-  // 7!  Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6  1
-  // 8!  Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1
-
-  // 9!  Xa3 Ya3 Za3 1 Xa7 Ya7 Za7  1
-  // 10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7  1
-  // 11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7  1
-
-  //"transpose"
-  tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]);  // 0!  Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5
-  tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]);  // 1!  Za0 Za1 1   1   Za4 Za5  1   1
-
-  tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]);  // 2!  Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7
-  tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]);  // 3!  Za2 Za3  1   1  Za6 Za7  1   1
-
-  tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]);  // 4!  Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5
-  tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]);  // 5!  Zb0 Zb1  1  1   Zb4 Zb5  1   1
-
-  tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]);  // 6!  Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7
-  tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]);  // 7!  Zb2 Zb3  1    1 Zb6 Zb7  1   1
-
-  tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]);  // 8!  Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5
-  tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]);  // 9!  Zc0 Zc1  1   1  Zc4 Zc5  1   1
-
-  tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]);  // 10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7
-  tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]);  // 11! Zc2 Zc3  1   1  Zc6 Zc7  1   1
-
-  /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-  triA[0] = _mm256_castpd_ps(
-      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]),
-                         _mm256_castps_pd(tritmp[2])));  //  Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7
-  triA[1] = _mm256_castpd_ps(
-      _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]),
-                         _mm256_castps_pd(tritmp[2])));  //  Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7
-  triA[2] = _mm256_castpd_ps(
-      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]),
-                         _mm256_castps_pd(tritmp[3])));  //  Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7
-
-  triB[0] = _mm256_castpd_ps(
-      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]),
-                         _mm256_castps_pd(tritmp[6])));  //  Xb0 Xb1  Xb2 Xb3 Xb4 Xb5 Xb5 Xb7
-  triB[1] = _mm256_castpd_ps(
-      _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]),
-                         _mm256_castps_pd(tritmp[6])));  //  Yb0 Yb1  Yb2 Yb3 Yb4 Yb5 Yb5 Yb7
-  triB[2] = _mm256_castpd_ps(
-      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]),
-                         _mm256_castps_pd(tritmp[7])));  //    Zb0 Zb1  Zb2 Zb3 Zb4 Zb5 Zb5 Zb7
-
-  triC[0] = _mm256_castpd_ps(
-      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]),
-                         _mm256_castps_pd(tritmp[10])));  // Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7
-  triC[1] = _mm256_castpd_ps(
-      _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]),
-                         _mm256_castps_pd(tritmp[10])));  // Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7
-  triC[2] = _mm256_castpd_ps(
-      _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]),
-                         _mm256_castps_pd(tritmp[11])));  // Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7
-
-  /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
-
-  int result = ray_triangle_intersect8(kg,
-                                       P,
-                                       dir,
-                                       isect,
-                                       visibility,
-                                       object,
-                                       triA,
-                                       triB,
-                                       triC,
-                                       prim_addr,
-                                       prim_num,
-                                       num_hits,
-                                       max_hits,
-                                       num_hits_in_instance,
-                                       isect_t);
-  return result;
-}
-
-#endif /* __KERNEL_AVX2__ */
-
 /* Special ray intersection routines for subsurface scattering. In that case we
  * only want to intersect with primitives in the same object, and if case of
  * multiple hits we pick a single random primitive as the intersection point.
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 799daa68450..02de2db8bd2 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -123,9 +123,6 @@ CCL_NAMESPACE_BEGIN
 
 /* Device specific features */
 #ifdef __KERNEL_CPU__
-#  ifdef __KERNEL_SSE2__
-#    define __QBVH__
-#  endif
 #  ifdef WITH_OSL
 #    define __OSL__
 #  endif
@@ -1389,13 +1386,10 @@ typedef enum KernelBVHLayout {
   BVH_LAYOUT_NONE = 0,
 
   BVH_LAYOUT_BVH2 = (1 << 0),
-  BVH_LAYOUT_BVH4 = (1 << 1),
-  BVH_LAYOUT_BVH8 = (1 << 2),
-
-  BVH_LAYOUT_EMBREE = (1 << 3),
-  BVH_LAYOUT_OPTIX = (1 << 4),
+  BVH_LAYOUT_EMBREE = (1 << 1),
+  BVH_LAYOUT_OPTIX = (1 << 2),
 
-  BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8,
+  BVH_LAYOUT_AUTO = BVH_LAYOUT_BVH2,
   BVH_LAYOUT_ALL = (unsigned int)(~0u),
 } KernelBVHLayout;
 
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 3ce65802cff..6ad4f709ab5 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -31,7 +31,7 @@ DebugFlags::CPU::CPU()
       sse41(true),
       sse3(true),
       sse2(true),
-      bvh_layout(BVH_LAYOUT_DEFAULT),
+      bvh_layout(BVH_LAYOUT_AUTO),
       split_kernel(false)
 {
   reset();
@@ -57,18 +57,7 @@ void DebugFlags::CPU::reset()
 #undef STRINGIFY
 #undef CHECK_CPU_FLAGS
 
-  if (getenv("CYCLES_BVH2") != NULL) {
-    bvh_layout = BVH_LAYOUT_BVH2;
-  }
-  else if (getenv("CYCLES_BVH4") != NULL) {
-    bvh_layout = BVH_LAYOUT_BVH4;
-  }
-  else if (getenv("CYCLES_BVH8") != NULL) {
-    bvh_layout = BVH_LAYOUT_BVH8;
-  }
-  else {
-    bvh_layout = BVH_LAYOUT_DEFAULT;
-  }
+  bvh_layout = BVH_LAYOUT_AUTO;
 
   split_kernel = false;
 }
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index cf6b442b878..da9f5408b59 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -73,10 +73,10 @@ class DebugFlags {
       return sse2;
     }
 
-    /* Requested BVH size.
+    /* Requested BVH layout.
      *
-     * Rendering will use widest possible BVH which is below or equal
-     * this one.
+     * By default the fastest will be used. For debugging the BVH used by other
+     * CPUs and GPUs can be selected here instead.
      */
     BVHLayout bvh_layout;
author	Brecht Van Lommel <brecht@blender.org>	2020-06-10 19:55:33 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2020-06-22 14:28:01 +0300
commit	d1ef5146d72d40f97fdcbf28e96da49193c21dea (patch)
tree	7a19a24bd6b809c7de72b4e2499d62b8740e639a /intern
parent	1de0e13af619e405f351bf42924f819dc3a9bc44 (diff)