1 files changed, 1014 insertions, 1087 deletions
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index c0b3d683e37..1d9b006e8cb 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -41,72 +41,65 @@ CCL_NAMESPACE_BEGIN
 /* BVH Build Task */
 
 class BVHBuildTask : public Task {
-public:
-	BVHBuildTask(BVHBuild *build,
-	             InnerNode *node,
-	             int child,
-	             const BVHObjectBinning& range,
-	             int level)
-	: range_(range)
-	{
-		run = function_bind(&BVHBuild::thread_build_node,
-		                    build,
-		                    node,
-		                    child,
-		                    &range_,
-		                    level);
-	}
-private:
-	BVHObjectBinning range_;
+ public:
+  BVHBuildTask(
+      BVHBuild *build, InnerNode *node, int child, const BVHObjectBinning &range, int level)
+      : range_(range)
+  {
+    run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range_, level);
+  }
+
+ private:
+  BVHObjectBinning range_;
 };
 
 class BVHSpatialSplitBuildTask : public Task {
-public:
-	BVHSpatialSplitBuildTask(BVHBuild *build,
-	                         InnerNode *node,
-	                         int child,
-	                         const BVHRange& range,
-	                         const vector<BVHReference>& references,
-	                         int level)
-	: range_(range),
-	  references_(references.begin() + range.start(),
-	              references.begin() + range.end())
-	{
-		range_.set_start(0);
-		run = function_bind(&BVHBuild::thread_build_spatial_split_node,
-		                    build,
-		                    node,
-		                    child,
-		                    &range_,
-		                    &references_,
-		                    level,
-		                    _1);
-	}
-private:
-	BVHRange range_;
-	vector<BVHReference> references_;
+ public:
+  BVHSpatialSplitBuildTask(BVHBuild *build,
+                           InnerNode *node,
+                           int child,
+                           const BVHRange &range,
+                           const vector<BVHReference> &references,
+                           int level)
+      : range_(range),
+        references_(references.begin() + range.start(), references.begin() + range.end())
+  {
+    range_.set_start(0);
+    run = function_bind(&BVHBuild::thread_build_spatial_split_node,
+                        build,
+                        node,
+                        child,
+                        &range_,
+                        &references_,
+                        level,
+                        _1);
+  }
+
+ private:
+  BVHRange range_;
+  vector<BVHReference> references_;
 };
 
 /* Constructor / Destructor */
 
-BVHBuild::BVHBuild(const vector<Object*>& objects_,
-                   array<int>& prim_type_,
-                   array<int>& prim_index_,
-                   array<int>& prim_object_,
-                   array<float2>& prim_time_,
-                   const BVHParams& params_,
-                   Progress& progress_)
- : objects(objects_),
-   prim_type(prim_type_),
-   prim_index(prim_index_),
-   prim_object(prim_object_),
-   prim_time(prim_time_),
-   params(params_),
-   progress(progress_),
-   progress_start_time(0.0),
-   unaligned_heuristic(objects_)
+BVHBuild::BVHBuild(const vector<Object *> &objects_,
+                   array<int> &prim_type_,
+                   array<int> &prim_index_,
+                   array<int> &prim_object_,
+                   array<float2> &prim_time_,
+                   const BVHParams &params_,
+                   Progress &progress_)
+    : objects(objects_),
+      prim_type(prim_type_),
+      prim_index(prim_index_),
+      prim_object(prim_object_),
+      prim_time(prim_time_),
+      params(params_),
+      progress(progress_),
+      progress_start_time(0.0),
+      unaligned_heuristic(objects_)
 {
-	spatial_min_overlap = 0.0f;
+  spatial_min_overlap = 0.0f;
 }
 
 BVHBuild::~BVHBuild()
@@ -115,467 +108,440 @@ BVHBuild::~BVHBuild()
 
 /* Adding References */
 
-void BVHBuild::add_reference_triangles(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
 {
-	const Attribute *attr_mP = NULL;
-	if(mesh->has_motion_blur()) {
-		attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-	}
-	const size_t num_triangles = mesh->num_triangles();
-	for(uint j = 0; j < num_triangles; j++) {
-		Mesh::Triangle t = mesh->get_triangle(j);
-		const float3 *verts = &mesh->verts[0];
-		if(attr_mP == NULL) {
-			BoundBox bounds = BoundBox::empty;
-			t.bounds_grow(verts, bounds);
-			if(bounds.valid() && t.valid(verts)) {
-				references.push_back(BVHReference(bounds,
-				                                  j,
-				                                  i,
-				                                  PRIMITIVE_TRIANGLE));
-				root.grow(bounds);
-				center.grow(bounds.center2());
-			}
-		}
-		else if(params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
-			/* Motion triangles, simple case: single node for the whole
-			 * primitive. Lowest memory footprint and faster BVH build but
-			 * least optimal ray-tracing.
-			 */
-			/* TODO(sergey): Support motion steps for spatially split BVH. */
-			const size_t num_verts = mesh->verts.size();
-			const size_t num_steps = mesh->motion_steps;
-			const float3 *vert_steps = attr_mP->data_float3();
-			BoundBox bounds = BoundBox::empty;
-			t.bounds_grow(verts, bounds);
-			for(size_t step = 0; step < num_steps - 1; step++) {
-				t.bounds_grow(vert_steps + step*num_verts, bounds);
-			}
-			if(bounds.valid()) {
-				references.push_back(
-				        BVHReference(bounds,
-				                     j,
-				                     i,
-				                     PRIMITIVE_MOTION_TRIANGLE));
-				root.grow(bounds);
-				center.grow(bounds.center2());
-			}
-		}
-		else {
-			/* Motion triangles, trace optimized case:  we split triangle
-			 * primitives into separate nodes for each of the time steps.
-			 * This way we minimize overlap of neighbor curve primitives.
-			 */
-			const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
-			const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-			const size_t num_verts = mesh->verts.size();
-			const size_t num_steps = mesh->motion_steps;
-			const float3 *vert_steps = attr_mP->data_float3();
-			/* Calculate bounding box of the previous time step.
-			 * Will be reused later to avoid duplicated work on
-			 * calculating BVH time step boundbox.
-			 */
-			float3 prev_verts[3];
-			t.motion_verts(verts,
-			               vert_steps,
-			               num_verts,
-			               num_steps,
-			               0.0f,
-			               prev_verts);
-			BoundBox prev_bounds = BoundBox::empty;
-			prev_bounds.grow(prev_verts[0]);
-			prev_bounds.grow(prev_verts[1]);
-			prev_bounds.grow(prev_verts[2]);
-			/* Create all primitive time steps, */
-			for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-				const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
-				float3 curr_verts[3];
-				t.motion_verts(verts,
-				               vert_steps,
-				               num_verts,
-				               num_steps,
-				               curr_time,
-				               curr_verts);
-				BoundBox curr_bounds = BoundBox::empty;
-				curr_bounds.grow(curr_verts[0]);
-				curr_bounds.grow(curr_verts[1]);
-				curr_bounds.grow(curr_verts[2]);
-				BoundBox bounds = prev_bounds;
-				bounds.grow(curr_bounds);
-				if(bounds.valid()) {
-					const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-					references.push_back(
-					        BVHReference(bounds,
-					                     j,
-					                     i,
-					                     PRIMITIVE_MOTION_TRIANGLE,
-					                     prev_time,
-					                     curr_time));
-					root.grow(bounds);
-					center.grow(bounds.center2());
-				}
-				/* Current time boundbox becomes previous one for the
-				 * next time step.
-				 */
-				prev_bounds = curr_bounds;
-			}
-		}
-	}
+  const Attribute *attr_mP = NULL;
+  if (mesh->has_motion_blur()) {
+    attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+  const size_t num_triangles = mesh->num_triangles();
+  for (uint j = 0; j < num_triangles; j++) {
+    Mesh::Triangle t = mesh->get_triangle(j);
+    const float3 *verts = &mesh->verts[0];
+    if (attr_mP == NULL) {
+      BoundBox bounds = BoundBox::empty;
+      t.bounds_grow(verts, bounds);
+      if (bounds.valid() && t.valid(verts)) {
+        references.push_back(BVHReference(bounds, j, i, PRIMITIVE_TRIANGLE));
+        root.grow(bounds);
+        center.grow(bounds.center2());
+      }
+    }
+    else if (params.num_motion_triangle_steps == 0 || params.use_spatial_split) {
+      /* Motion triangles, simple case: single node for the whole
+       * primitive. Lowest memory footprint and faster BVH build but
+       * least optimal ray-tracing.
+       */
+      /* TODO(sergey): Support motion steps for spatially split BVH. */
+      const size_t num_verts = mesh->verts.size();
+      const size_t num_steps = mesh->motion_steps;
+      const float3 *vert_steps = attr_mP->data_float3();
+      BoundBox bounds = BoundBox::empty;
+      t.bounds_grow(verts, bounds);
+      for (size_t step = 0; step < num_steps - 1; step++) {
+        t.bounds_grow(vert_steps + step * num_verts, bounds);
+      }
+      if (bounds.valid()) {
+        references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE));
+        root.grow(bounds);
+        center.grow(bounds.center2());
+      }
+    }
+    else {
+      /* Motion triangles, trace optimized case:  we split triangle
+       * primitives into separate nodes for each of the time steps.
+       * This way we minimize overlap of neighbor curve primitives.
+       */
+      const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
+      const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+      const size_t num_verts = mesh->verts.size();
+      const size_t num_steps = mesh->motion_steps;
+      const float3 *vert_steps = attr_mP->data_float3();
+      /* Calculate bounding box of the previous time step.
+       * Will be reused later to avoid duplicated work on
+       * calculating BVH time step boundbox.
+       */
+      float3 prev_verts[3];
+      t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
+      BoundBox prev_bounds = BoundBox::empty;
+      prev_bounds.grow(prev_verts[0]);
+      prev_bounds.grow(prev_verts[1]);
+      prev_bounds.grow(prev_verts[2]);
+      /* Create all primitive time steps, */
+      for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+        const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+        float3 curr_verts[3];
+        t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
+        BoundBox curr_bounds = BoundBox::empty;
+        curr_bounds.grow(curr_verts[0]);
+        curr_bounds.grow(curr_verts[1]);
+        curr_bounds.grow(curr_verts[2]);
+        BoundBox bounds = prev_bounds;
+        bounds.grow(curr_bounds);
+        if (bounds.valid()) {
+          const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+          references.push_back(
+              BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE, prev_time, curr_time));
+          root.grow(bounds);
+          center.grow(bounds.center2());
+        }
+        /* Current time boundbox becomes previous one for the
+         * next time step.
+         */
+        prev_bounds = curr_bounds;
+      }
+    }
+  }
 }
 
-void BVHBuild::add_reference_curves(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+void BVHBuild::add_reference_curves(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
 {
-	const Attribute *curve_attr_mP = NULL;
-	if(mesh->has_motion_blur()) {
-		curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-	}
-	const size_t num_curves = mesh->num_curves();
-	for(uint j = 0; j < num_curves; j++) {
-		const Mesh::Curve curve = mesh->get_curve(j);
-		const float *curve_radius = &mesh->curve_radius[0];
-		for(int k = 0; k < curve.num_keys - 1; k++) {
-			if(curve_attr_mP == NULL) {
-				/* Really simple logic for static hair. */
-				BoundBox bounds = BoundBox::empty;
-				curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
-				if(bounds.valid()) {
-					int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
-					references.push_back(BVHReference(bounds, j, i, packed_type));
-					root.grow(bounds);
-					center.grow(bounds.center2());
-				}
-			}
-			else if(params.num_motion_curve_steps == 0 || params.use_spatial_split) {
-				/* Simple case of motion curves: single node for the while
-				 * shutter time. Lowest memory usage but less optimal
-				 * rendering.
-				 */
-				/* TODO(sergey): Support motion steps for spatially split BVH. */
-				BoundBox bounds = BoundBox::empty;
-				curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
-				const size_t num_keys = mesh->curve_keys.size();
-				const size_t num_steps = mesh->motion_steps;
-				const float3 *key_steps = curve_attr_mP->data_float3();
-				for(size_t step = 0; step < num_steps - 1; step++) {
-					curve.bounds_grow(k,
-					                  key_steps + step*num_keys,
-					                  curve_radius,
-					                  bounds);
-				}
-				if(bounds.valid()) {
-					int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
-					references.push_back(BVHReference(bounds,
-					                                  j,
-					                                  i,
-					                                  packed_type));
-					root.grow(bounds);
-					center.grow(bounds.center2());
-				}
-			}
-			else {
-				/* Motion curves, trace optimized case:  we split curve keys
-				 * primitives into separate nodes for each of the time steps.
-				 * This way we minimize overlap of neighbor curve primitives.
-				 */
-				const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
-				const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
-				const size_t num_steps = mesh->motion_steps;
-				const float3 *curve_keys = &mesh->curve_keys[0];
-				const float3 *key_steps = curve_attr_mP->data_float3();
-				const size_t num_keys = mesh->curve_keys.size();
-				/* Calculate bounding box of the previous time step.
-				 * Will be reused later to avoid duplicated work on
-				 * calculating BVH time step boundbox.
-				 */
-				float4 prev_keys[4];
-				curve.cardinal_motion_keys(curve_keys,
-				                           curve_radius,
-				                           key_steps,
-				                           num_keys,
-				                           num_steps,
-				                           0.0f,
-				                           k - 1, k, k + 1, k + 2,
-				                           prev_keys);
-				BoundBox prev_bounds = BoundBox::empty;
-				curve.bounds_grow(prev_keys, prev_bounds);
-				/* Create all primitive time steps, */
-				for(int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
-					const float curr_time = (float)(bvh_step) * num_bvh_steps_inv_1;
-					float4 curr_keys[4];
-					curve.cardinal_motion_keys(curve_keys,
-					                           curve_radius,
-					                           key_steps,
-					                           num_keys,
-					                           num_steps,
-					                           curr_time,
-					                           k - 1, k, k + 1, k + 2,
-					                           curr_keys);
-					BoundBox curr_bounds = BoundBox::empty;
-					curve.bounds_grow(curr_keys, curr_bounds);
-					BoundBox bounds = prev_bounds;
-					bounds.grow(curr_bounds);
-					if(bounds.valid()) {
-						const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
-						int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
-						references.push_back(BVHReference(bounds,
-						                                  j,
-						                                  i,
-						                                  packed_type,
-						                                  prev_time,
-						                                  curr_time));
-						root.grow(bounds);
-						center.grow(bounds.center2());
-					}
-					/* Current time boundbox becomes previous one for the
-					 * next time step.
-					 */
-					prev_bounds = curr_bounds;
-				}
-			}
-		}
-	}
+  const Attribute *curve_attr_mP = NULL;
+  if (mesh->has_motion_blur()) {
+    curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+  }
+  const size_t num_curves = mesh->num_curves();
+  for (uint j = 0; j < num_curves; j++) {
+    const Mesh::Curve curve = mesh->get_curve(j);
+    const float *curve_radius = &mesh->curve_radius[0];
+    for (int k = 0; k < curve.num_keys - 1; k++) {
+      if (curve_attr_mP == NULL) {
+        /* Really simple logic for static hair. */
+        BoundBox bounds = BoundBox::empty;
+        curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
+        if (bounds.valid()) {
+          int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE, k);
+          references.push_back(BVHReference(bounds, j, i, packed_type));
+          root.grow(bounds);
+          center.grow(bounds.center2());
+        }
+      }
+      else if (params.num_motion_curve_steps == 0 || params.use_spatial_split) {
+        /* Simple case of motion curves: single node for the while
+         * shutter time. Lowest memory usage but less optimal
+         * rendering.
+         */
+        /* TODO(sergey): Support motion steps for spatially split BVH. */
+        BoundBox bounds = BoundBox::empty;
+        curve.bounds_grow(k, &mesh->curve_keys[0], curve_radius, bounds);
+        const size_t num_keys = mesh->curve_keys.size();
+        const size_t num_steps = mesh->motion_steps;
+        const float3 *key_steps = curve_attr_mP->data_float3();
+        for (size_t step = 0; step < num_steps - 1; step++) {
+          curve.bounds_grow(k, key_steps + step * num_keys, curve_radius, bounds);
+        }
+        if (bounds.valid()) {
+          int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+          references.push_back(BVHReference(bounds, j, i, packed_type));
+          root.grow(bounds);
+          center.grow(bounds.center2());
+        }
+      }
+      else {
+        /* Motion curves, trace optimized case:  we split curve keys
+         * primitives into separate nodes for each of the time steps.
+         * This way we minimize overlap of neighbor curve primitives.
+         */
+        const int num_bvh_steps = params.num_motion_curve_steps * 2 + 1;
+        const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
+        const size_t num_steps = mesh->motion_steps;
+        const float3 *curve_keys = &mesh->curve_keys[0];
+        const float3 *key_steps = curve_attr_mP->data_float3();
+        const size_t num_keys = mesh->curve_keys.size();
+        /* Calculate bounding box of the previous time step.
+         * Will be reused later to avoid duplicated work on
+         * calculating BVH time step boundbox.
+         */
+        float4 prev_keys[4];
+        curve.cardinal_motion_keys(curve_keys,
+                                   curve_radius,
+                                   key_steps,
+                                   num_keys,
+                                   num_steps,
+                                   0.0f,
+                                   k - 1,
+                                   k,
+                                   k + 1,
+                                   k + 2,
+                                   prev_keys);
+        BoundBox prev_bounds = BoundBox::empty;
+        curve.bounds_grow(prev_keys, prev_bounds);
+        /* Create all primitive time steps, */
+        for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
+          const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
+          float4 curr_keys[4];
+          curve.cardinal_motion_keys(curve_keys,
+                                     curve_radius,
+                                     key_steps,
+                                     num_keys,
+                                     num_steps,
+                                     curr_time,
+                                     k - 1,
+                                     k,
+                                     k + 1,
+                                     k + 2,
+                                     curr_keys);
+          BoundBox curr_bounds = BoundBox::empty;
+          curve.bounds_grow(curr_keys, curr_bounds);
+          BoundBox bounds = prev_bounds;
+          bounds.grow(curr_bounds);
+          if (bounds.valid()) {
+            const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
+            int packed_type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_MOTION_CURVE, k);
+            references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time));
+            root.grow(bounds);
+            center.grow(bounds.center2());
+          }
+          /* Current time boundbox becomes previous one for the
+           * next time step.
+           */
+          prev_bounds = curr_bounds;
+        }
+      }
+    }
+  }
 }
 
-void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
+void BVHBuild::add_reference_mesh(BoundBox &root, BoundBox &center, Mesh *mesh, int i)
 {
-	if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
-		add_reference_triangles(root, center, mesh, i);
-	}
-	if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
-		add_reference_curves(root, center, mesh, i);
-	}
+  if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+    add_reference_triangles(root, center, mesh, i);
+  }
+  if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+    add_reference_curves(root, center, mesh, i);
+  }
 }
 
-void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
+void BVHBuild::add_reference_object(BoundBox &root, BoundBox &center, Object *ob, int i)
 {
-	references.push_back(BVHReference(ob->bounds, -1, i, 0));
-	root.grow(ob->bounds);
-	center.grow(ob->bounds.center2());
+  references.push_back(BVHReference(ob->bounds, -1, i, 0));
+  root.grow(ob->bounds);
+  center.grow(ob->bounds.center2());
 }
 
 static size_t count_curve_segments(Mesh *mesh)
 {
-	size_t num = 0, num_curves = mesh->num_curves();
+  size_t num = 0, num_curves = mesh->num_curves();
 
-	for(size_t i = 0; i < num_curves; i++)
-		num += mesh->get_curve(i).num_keys - 1;
+  for (size_t i = 0; i < num_curves; i++)
+    num += mesh->get_curve(i).num_keys - 1;
 
-	return num;
+  return num;
 }
 
-void BVHBuild::add_references(BVHRange& root)
+void BVHBuild::add_references(BVHRange &root)
 {
-	/* reserve space for references */
-	size_t num_alloc_references = 0;
-
-	foreach(Object *ob, objects) {
-		if(params.top_level) {
-			if(!ob->is_traceable()) {
-				continue;
-			}
-			if(!ob->mesh->is_instanced()) {
-				if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
-					num_alloc_references += ob->mesh->num_triangles();
-				}
-				if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
-					num_alloc_references += count_curve_segments(ob->mesh);
-				}
-			}
-			else
-				num_alloc_references++;
-		}
-		else {
-			if(params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
-				num_alloc_references += ob->mesh->num_triangles();
-			}
-			if(params.primitive_mask & PRIMITIVE_ALL_CURVE) {
-				num_alloc_references += count_curve_segments(ob->mesh);
-			}
-		}
-	}
-
-	references.reserve(num_alloc_references);
-
-	/* add references from objects */
-	BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
-	int i = 0;
-
-	foreach(Object *ob, objects) {
-		if(params.top_level) {
-			if(!ob->is_traceable()) {
-				++i;
-				continue;
-			}
-			if(!ob->mesh->is_instanced())
-				add_reference_mesh(bounds, center, ob->mesh, i);
-			else
-				add_reference_object(bounds, center, ob, i);
-		}
-		else
-			add_reference_mesh(bounds, center, ob->mesh, i);
-
-		i++;
-
-		if(progress.get_cancel()) return;
-	}
-
-	/* happens mostly on empty meshes */
-	if(!bounds.valid())
-		bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
-
-	root = BVHRange(bounds, center, 0, references.size());
+  /* reserve space for references */
+  size_t num_alloc_references = 0;
+
+  foreach (Object *ob, objects) {
+    if (params.top_level) {
+      if (!ob->is_traceable()) {
+        continue;
+      }
+      if (!ob->mesh->is_instanced()) {
+        if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+          num_alloc_references += ob->mesh->num_triangles();
+        }
+        if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+          num_alloc_references += count_curve_segments(ob->mesh);
+        }
+      }
+      else
+        num_alloc_references++;
+    }
+    else {
+      if (params.primitive_mask & PRIMITIVE_ALL_TRIANGLE) {
+        num_alloc_references += ob->mesh->num_triangles();
+      }
+      if (params.primitive_mask & PRIMITIVE_ALL_CURVE) {
+        num_alloc_references += count_curve_segments(ob->mesh);
+      }
+    }
+  }
+
+  references.reserve(num_alloc_references);
+
+  /* add references from objects */
+  BoundBox bounds = BoundBox::empty, center = BoundBox::empty;
+  int i = 0;
+
+  foreach (Object *ob, objects) {
+    if (params.top_level) {
+      if (!ob->is_traceable()) {
+        ++i;
+        continue;
+      }
+      if (!ob->mesh->is_instanced())
+        add_reference_mesh(bounds, center, ob->mesh, i);
+      else
+        add_reference_object(bounds, center, ob, i);
+    }
+    else
+      add_reference_mesh(bounds, center, ob->mesh, i);
+
+    i++;
+
+    if (progress.get_cancel())
+      return;
+  }
+
+  /* happens mostly on empty meshes */
+  if (!bounds.valid())
+    bounds.grow(make_float3(0.0f, 0.0f, 0.0f));
+
+  root = BVHRange(bounds, center, 0, references.size());
 }
 
 /* Build */
 
-BVHNode* BVHBuild::run()
+BVHNode *BVHBuild::run()
 {
-	BVHRange root;
-
-	/* add references */
-	add_references(root);
-
-	if(progress.get_cancel())
-		return NULL;
-
-	/* init spatial splits */
-	if(params.top_level) {
-		/* NOTE: Technically it is supported by the builder but it's not really
-		 * optimized for speed yet and not really clear yet if it has measurable
-		 * improvement on render time. Needs some extra investigation before
-		 * enabling spatial split for top level BVH.
-		 */
-		params.use_spatial_split = false;
-	}
-
-	spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
-	if(params.use_spatial_split) {
-		/* NOTE: The API here tries to be as much ready for multi-threaded build
-		 * as possible, but at the same time it tries not to introduce any
-		 * changes in behavior for until all refactoring needed for threading is
-		 * finished.
-		 *
-		 * So we currently allocate single storage for now, which is only used by
-		 * the only thread working on the spatial BVH build.
-		 */
-		spatial_storage.resize(TaskScheduler::num_threads() + 1);
-		size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
-		foreach(BVHSpatialStorage &storage, spatial_storage) {
-			storage.right_bounds.clear();
-		}
-		spatial_storage[0].right_bounds.resize(num_bins);
-	}
-	spatial_free_index = 0;
-
-	need_prim_time = params.num_motion_curve_steps > 0 ||
-	                 params.num_motion_triangle_steps > 0;
-
-	/* init progress updates */
-	double build_start_time;
-	build_start_time = progress_start_time = time_dt();
-	progress_count = 0;
-	progress_total = references.size();
-	progress_original_total = progress_total;
-
-	prim_type.resize(references.size());
-	prim_index.resize(references.size());
-	prim_object.resize(references.size());
-	if(need_prim_time) {
-		prim_time.resize(references.size());
-	}
-	else {
-		prim_time.resize(0);
-	}
-
-	/* build recursively */
-	BVHNode *rootnode;
-
-	if(params.use_spatial_split) {
-		/* Perform multithreaded spatial split build. */
-		rootnode = build_node(root, &references, 0, 0);
-		task_pool.wait_work();
-	}
-	else {
-		/* Perform multithreaded binning build. */
-		BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
-		rootnode = build_node(rootbin, 0);
-		task_pool.wait_work();
-	}
-
-	/* delete if we canceled */
-	if(rootnode) {
-		if(progress.get_cancel()) {
-			rootnode->deleteSubtree();
-			rootnode = NULL;
-			VLOG(1) << "BVH build cancelled.";
-		}
-		else {
-			/*rotate(rootnode, 4, 5);*/
-			rootnode->update_visibility();
-			rootnode->update_time();
-		}
-		if(rootnode != NULL) {
-			VLOG(1) << "BVH build statistics:\n"
-			        << "  Build time: " << time_dt() - build_start_time << "\n"
-			        << "  Total number of nodes: "
-			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT)) << "\n"
-			        << "  Number of inner nodes: "
-			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT)) << "\n"
-			        << "  Number of leaf nodes: "
-			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT)) << "\n"
-			        << "  Number of unaligned nodes: "
-			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))  << "\n"
-			        << "  Allocation slop factor: "
-			               << ((prim_type.capacity() != 0)
-			                       ? (float)prim_type.size() / prim_type.capacity()
-			                       : 1.0f) << "\n"
-			        << "  Maximum depth: "
-			        << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH))  << "\n";
-		}
-	}
-
-
-	return rootnode;
+  BVHRange root;
+
+  /* add references */
+  add_references(root);
+
+  if (progress.get_cancel())
+    return NULL;
+
+  /* init spatial splits */
+  if (params.top_level) {
+    /* NOTE: Technically it is supported by the builder but it's not really
+     * optimized for speed yet and not really clear yet if it has measurable
+     * improvement on render time. Needs some extra investigation before
+     * enabling spatial split for top level BVH.
+     */
+    params.use_spatial_split = false;
+  }
+
+  spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
+  if (params.use_spatial_split) {
+    /* NOTE: The API here tries to be as much ready for multi-threaded build
+     * as possible, but at the same time it tries not to introduce any
+     * changes in behavior for until all refactoring needed for threading is
+     * finished.
+     *
+     * So we currently allocate single storage for now, which is only used by
+     * the only thread working on the spatial BVH build.
+     */
+    spatial_storage.resize(TaskScheduler::num_threads() + 1);
+    size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
+    foreach (BVHSpatialStorage &storage, spatial_storage) {
+      storage.right_bounds.clear();
+    }
+    spatial_storage[0].right_bounds.resize(num_bins);
+  }
+  spatial_free_index = 0;
+
+  need_prim_time = params.num_motion_curve_steps > 0 || params.num_motion_triangle_steps > 0;
+
+  /* init progress updates */
+  double build_start_time;
+  build_start_time = progress_start_time = time_dt();
+  progress_count = 0;
+  progress_total = references.size();
+  progress_original_total = progress_total;
+
+  prim_type.resize(references.size());
+  prim_index.resize(references.size());
+  prim_object.resize(references.size());
+  if (need_prim_time) {
+    prim_time.resize(references.size());
+  }
+  else {
+    prim_time.resize(0);
+  }
+
+  /* build recursively */
+  BVHNode *rootnode;
+
+  if (params.use_spatial_split) {
+    /* Perform multithreaded spatial split build. */
+    rootnode = build_node(root, &references, 0, 0);
+    task_pool.wait_work();
+  }
+  else {
+    /* Perform multithreaded binning build. */
+    BVHObjectBinning rootbin(root, (references.size()) ? &references[0] : NULL);
+    rootnode = build_node(rootbin, 0);
+    task_pool.wait_work();
+  }
+
+  /* delete if we canceled */
+  if (rootnode) {
+    if (progress.get_cancel()) {
+      rootnode->deleteSubtree();
+      rootnode = NULL;
+      VLOG(1) << "BVH build cancelled.";
+    }
+    else {
+      /*rotate(rootnode, 4, 5);*/
+      rootnode->update_visibility();
+      rootnode->update_time();
+    }
+    if (rootnode != NULL) {
+      VLOG(1) << "BVH build statistics:\n"
+              << "  Build time: " << time_dt() - build_start_time << "\n"
+              << "  Total number of nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_NODE_COUNT))
+              << "\n"
+              << "  Number of inner nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_INNER_COUNT))
+              << "\n"
+              << "  Number of leaf nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_LEAF_COUNT))
+              << "\n"
+              << "  Number of unaligned nodes: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_UNALIGNED_COUNT))
+              << "\n"
+              << "  Allocation slop factor: "
+              << ((prim_type.capacity() != 0) ? (float)prim_type.size() / prim_type.capacity() :
+                                                1.0f)
+              << "\n"
+              << "  Maximum depth: "
+              << string_human_readable_number(rootnode->getSubtreeSize(BVH_STAT_DEPTH)) << "\n";
+    }
+  }
+
+  return rootnode;
 }
 
 void BVHBuild::progress_update()
 {
-	if(time_dt() - progress_start_time < 0.25)
-		return;
+  if (time_dt() - progress_start_time < 0.25)
+    return;
 
-	double progress_start = (double)progress_count/(double)progress_total;
-	double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total;
+  double progress_start = (double)progress_count / (double)progress_total;
+  double duplicates = (double)(progress_total - progress_original_total) / (double)progress_total;
 
-	string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%",
-	                           progress_start * 100.0, duplicates * 100.0);
+  string msg = string_printf(
+      "Building BVH %.0f%%, duplicates %.0f%%", progress_start * 100.0, duplicates * 100.0);
 
-	progress.set_substatus(msg);
-	progress_start_time = time_dt();
+  progress.set_substatus(msg);
+  progress_start_time = time_dt();
 }
 
-void BVHBuild::thread_build_node(InnerNode *inner,
-                                 int child,
-                                 BVHObjectBinning *range,
-                                 int level)
+void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
 {
-	if(progress.get_cancel())
-		return;
+  if (progress.get_cancel())
+    return;
 
-	/* build nodes */
-	BVHNode *node = build_node(*range, level);
+  /* build nodes */
+  BVHNode *node = build_node(*range, level);
 
-	/* set child in inner node */
-	inner->children[child] = node;
+  /* set child in inner node */
+  inner->children[child] = node;
 
-	/* update progress */
-	if(range->size() < THREAD_TASK_SIZE) {
-		/*rotate(node, INT_MAX, 5);*/
+  /* update progress */
+  if (range->size() < THREAD_TASK_SIZE) {
+    /*rotate(node, INT_MAX, 5);*/
 
-		thread_scoped_lock lock(build_mutex);
+    thread_scoped_lock lock(build_mutex);
 
-		progress_count += range->size();
-		progress_update();
-	}
+    progress_count += range->size();
+    progress_update();
+  }
 }
 
 void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
@@ -585,567 +551,528 @@ void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
                                                int level,
                                                int thread_id)
 {
-	if(progress.get_cancel()) {
-		return;
-	}
+  if (progress.get_cancel()) {
+    return;
+  }
 
-	/* build nodes */
-	BVHNode *node = build_node(*range, references, level, thread_id);
+  /* build nodes */
+  BVHNode *node = build_node(*range, references, level, thread_id);
 
-	/* set child in inner node */
-	inner->children[child] = node;
+  /* set child in inner node */
+  inner->children[child] = node;
 }
 
-bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
-                                          const vector<BVHReference>& references) const
+bool BVHBuild::range_within_max_leaf_size(const BVHRange &range,
+                                          const vector<BVHReference> &references) const
 {
-	size_t size = range.size();
-	size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
-
-	if(size > max_leaf_size)
-		return false;
-
-	size_t num_triangles = 0;
-	size_t num_motion_triangles = 0;
-	size_t num_curves = 0;
-	size_t num_motion_curves = 0;
-
-	for(int i = 0; i < size; i++) {
-		const BVHReference& ref = references[range.start() + i];
-
-		if(ref.prim_type() & PRIMITIVE_CURVE)
-			num_curves++;
-		if(ref.prim_type() & PRIMITIVE_MOTION_CURVE)
-			num_motion_curves++;
-		else if(ref.prim_type() & PRIMITIVE_TRIANGLE)
-			num_triangles++;
-		else if(ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
-			num_motion_triangles++;
-	}
-
-	return (num_triangles <= params.max_triangle_leaf_size) &&
-	       (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
-	       (num_curves <= params.max_curve_leaf_size) &&
-	       (num_motion_curves <= params.max_motion_curve_leaf_size);
+  size_t size = range.size();
+  size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
+
+  if (size > max_leaf_size)
+    return false;
+
+  size_t num_triangles = 0;
+  size_t num_motion_triangles = 0;
+  size_t num_curves = 0;
+  size_t num_motion_curves = 0;
+
+  for (int i = 0; i < size; i++) {
+    const BVHReference &ref = references[range.start() + i];
+
+    if (ref.prim_type() & PRIMITIVE_CURVE)
+      num_curves++;
+    if (ref.prim_type() & PRIMITIVE_MOTION_CURVE)
+      num_motion_curves++;
+    else if (ref.prim_type() & PRIMITIVE_TRIANGLE)
+      num_triangles++;
+    else if (ref.prim_type() & PRIMITIVE_MOTION_TRIANGLE)
+      num_motion_triangles++;
+  }
+
+  return (num_triangles <= params.max_triangle_leaf_size) &&
+         (num_motion_triangles <= params.max_motion_triangle_leaf_size) &&
+         (num_curves <= params.max_curve_leaf_size) &&
+         (num_motion_curves <= params.max_motion_curve_leaf_size);
 }
 
 /* multithreaded binning builder */
-BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
+BVHNode *BVHBuild::build_node(const BVHObjectBinning &range, int level)
 {
-	size_t size = range.size();
-	float leafSAH = params.sah_primitive_cost * range.leafSAH;
-	float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
-
-	/* Have at least one inner node on top level, for performance and correct
-	 * visibility tests, since object instances do not check visibility flag.
-	 */
-	if(!(range.size() > 0 && params.top_level && level == 0)) {
-		/* Make leaf node when threshold reached or SAH tells us. */
-		if((params.small_enough_for_leaf(size, level)) ||
-		   (range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
-		{
-			return create_leaf_node(range, references);
-		}
-	}
-
-	BVHObjectBinning unaligned_range;
-	float unalignedSplitSAH = FLT_MAX;
-	float unalignedLeafSAH = FLT_MAX;
-	Transform aligned_space;
-	bool do_unalinged_split = false;
-	if(params.use_unaligned_nodes &&
-	   splitSAH > params.unaligned_split_threshold*leafSAH)
-	{
-		aligned_space = unaligned_heuristic.compute_aligned_space(
-		        range, &references[0]);
-		unaligned_range = BVHObjectBinning(range,
-		                                   &references[0],
-		                                   &unaligned_heuristic,
-		                                   &aligned_space);
-		unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
-		                    params.sah_primitive_cost * unaligned_range.splitSAH;
-		unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
-		if(!(range.size() > 0 && params.top_level && level == 0)) {
-			if(unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
-			   range_within_max_leaf_size(range, references))
-			{
-				return create_leaf_node(range, references);
-			}
-		}
-		/* Check whether unaligned split is better than the regular one. */
-		if(unalignedSplitSAH < splitSAH) {
-			do_unalinged_split = true;
-		}
-	}
-
-	/* Perform split. */
-	BVHObjectBinning left, right;
-	if(do_unalinged_split) {
-		unaligned_range.split(&references[0], left, right);
-	}
-	else {
-		range.split(&references[0], left, right);
-	}
-
-	BoundBox bounds;
-	if(do_unalinged_split) {
-		bounds = unaligned_heuristic.compute_aligned_boundbox(
-		        range, &references[0], aligned_space);
-	}
-	else {
-		bounds = range.bounds();
-	}
-
-	/* Create inner node. */
-	InnerNode *inner;
-	if(range.size() < THREAD_TASK_SIZE) {
-		/* local build */
-		BVHNode *leftnode = build_node(left, level + 1);
-		BVHNode *rightnode = build_node(right, level + 1);
-
-		inner = new InnerNode(bounds, leftnode, rightnode);
-	}
-	else {
-		/* Threaded build */
-		inner = new InnerNode(bounds);
-
-		task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
-		task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
-	}
-
-	if(do_unalinged_split) {
-		inner->set_aligned_space(aligned_space);
-	}
-
-	return inner;
+  size_t size = range.size();
+  float leafSAH = params.sah_primitive_cost * range.leafSAH;
+  float splitSAH = params.sah_node_cost * range.bounds().half_area() +
+                   params.sah_primitive_cost * range.splitSAH;
+
+  /* Have at least one inner node on top level, for performance and correct
+   * visibility tests, since object instances do not check visibility flag.
+   */
+  if (!(range.size() > 0 && params.top_level && level == 0)) {
+    /* Make leaf node when threshold reached or SAH tells us. */
+    if ((params.small_enough_for_leaf(size, level)) ||
+        (range_within_max_leaf_size(range, references) && leafSAH < splitSAH)) {
+      return create_leaf_node(range, references);
+    }
+  }
+
+  BVHObjectBinning unaligned_range;
+  float unalignedSplitSAH = FLT_MAX;
+  float unalignedLeafSAH = FLT_MAX;
+  Transform aligned_space;
+  bool do_unalinged_split = false;
+  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
+    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references[0]);
+    unaligned_range = BVHObjectBinning(
+        range, &references[0], &unaligned_heuristic, &aligned_space);
+    unalignedSplitSAH = params.sah_node_cost * unaligned_range.unaligned_bounds().half_area() +
+                        params.sah_primitive_cost * unaligned_range.splitSAH;
+    unalignedLeafSAH = params.sah_primitive_cost * unaligned_range.leafSAH;
+    if (!(range.size() > 0 && params.top_level && level == 0)) {
+      if (unalignedLeafSAH < unalignedSplitSAH && unalignedSplitSAH < splitSAH &&
+          range_within_max_leaf_size(range, references)) {
+        return create_leaf_node(range, references);
+      }
+    }
+    /* Check whether unaligned split is better than the regular one. */
+    if (unalignedSplitSAH < splitSAH) {
+      do_unalinged_split = true;
+    }
+  }
+
+  /* Perform split. */
+  BVHObjectBinning left, right;
+  if (do_unalinged_split) {
+    unaligned_range.split(&references[0], left, right);
+  }
+  else {
+    range.split(&references[0], left, right);
+  }
+
+  BoundBox bounds;
+  if (do_unalinged_split) {
+    bounds = unaligned_heuristic.compute_aligned_boundbox(range, &references[0], aligned_space);
+  }
+  else {
+    bounds = range.bounds();
+  }
+
+  /* Create inner node. */
+  InnerNode *inner;
+  if (range.size() < THREAD_TASK_SIZE) {
+    /* local build */
+    BVHNode *leftnode = build_node(left, level + 1);
+    BVHNode *rightnode = build_node(right, level + 1);
+
+    inner = new InnerNode(bounds, leftnode, rightnode);
+  }
+  else {
+    /* Threaded build */
+    inner = new InnerNode(bounds);
+
+    task_pool.push(new BVHBuildTask(this, inner, 0, left, level + 1), true);
+    task_pool.push(new BVHBuildTask(this, inner, 1, right, level + 1), true);
+  }
+
+  if (do_unalinged_split) {
+    inner->set_aligned_space(aligned_space);
+  }
+
+  return inner;
 }
 
 /* multithreaded spatial split builder */
-BVHNode* BVHBuild::build_node(const BVHRange& range,
+BVHNode *BVHBuild::build_node(const BVHRange &range,
                               vector<BVHReference> *references,
                               int level,
                               int thread_id)
 {
-	/* Update progress.
-	 *
-	 * TODO(sergey): Currently it matches old behavior, but we can move it to the
-	 * task thread (which will mimic non=split builder) and save some CPU ticks
-	 * on checking cancel status.
-	 */
-	progress_update();
-	if(progress.get_cancel()) {
-		return NULL;
-	}
-
-	/* Small enough or too deep => create leaf. */
-	if(!(range.size() > 0 && params.top_level && level == 0)) {
-		if(params.small_enough_for_leaf(range.size(), level)) {
-			progress_count += range.size();
-			return create_leaf_node(range, *references);
-		}
-	}
-
-	/* Perform splitting test. */
-	BVHSpatialStorage *storage = &spatial_storage[thread_id];
-	BVHMixedSplit split(this, storage, range, references, level);
-
-	if(!(range.size() > 0 && params.top_level && level == 0)) {
-		if(split.no_split) {
-			progress_count += range.size();
-			return create_leaf_node(range, *references);
-		}
-	}
-	float leafSAH = params.sah_primitive_cost * split.leafSAH;
-	float splitSAH = params.sah_node_cost * range.bounds().half_area() +
-	                 params.sah_primitive_cost * split.nodeSAH;
-
-	BVHMixedSplit unaligned_split;
-	float unalignedSplitSAH = FLT_MAX;
-	/* float unalignedLeafSAH = FLT_MAX; */
-	Transform aligned_space;
-	bool do_unalinged_split = false;
-	if(params.use_unaligned_nodes &&
-	   splitSAH > params.unaligned_split_threshold*leafSAH)
-	{
-		aligned_space =
-		        unaligned_heuristic.compute_aligned_space(range, &references->at(0));
-		unaligned_split = BVHMixedSplit(this,
-		                                storage,
-		                                range,
-		                                references,
-		                                level,
-		                                &unaligned_heuristic,
-		                                &aligned_space);
-		/* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
-		unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
-		                    params.sah_primitive_cost * unaligned_split.nodeSAH;
-		/* TOOD(sergey): Check we can create leaf already. */
-		/* Check whether unaligned split is better than the regulat one. */
-		if(unalignedSplitSAH < splitSAH) {
-			do_unalinged_split = true;
-		}
-	}
-
-	/* Do split. */
-	BVHRange left, right;
-	if(do_unalinged_split) {
-		unaligned_split.split(this, left, right, range);
-	}
-	else {
-		split.split(this, left, right, range);
-	}
-
-	progress_total += left.size() + right.size() - range.size();
-
-	BoundBox bounds;
-	if(do_unalinged_split) {
-		bounds = unaligned_heuristic.compute_aligned_boundbox(
-		        range, &references->at(0), aligned_space);
-	}
-	else {
-		bounds = range.bounds();
-	}
-
-	/* Create inner node. */
-	InnerNode *inner;
-	if(range.size() < THREAD_TASK_SIZE) {
-		/* Local build. */
-
-		/* Build left node. */
-		vector<BVHReference> copy(references->begin() + right.start(),
-		                          references->begin() + right.end());
-		right.set_start(0);
-
-		BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
-
-		/* Build right node. */
-		BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
-
-		inner = new InnerNode(bounds, leftnode, rightnode);
-	}
-	else {
-		/* Threaded build. */
-		inner = new InnerNode(bounds);
-		task_pool.push(new BVHSpatialSplitBuildTask(this,
-		                                            inner,
-		                                            0,
-		                                            left,
-		                                            *references,
-		                                            level + 1),
-		               true);
-		task_pool.push(new BVHSpatialSplitBuildTask(this,
-		                                            inner,
-		                                            1,
-		                                            right,
-		                                            *references,
-		                                            level + 1),
-		               true);
-	}
-
-	if(do_unalinged_split) {
-		inner->set_aligned_space(aligned_space);
-	}
-
-	return inner;
+  /* Update progress.
+   *
+   * TODO(sergey): Currently it matches old behavior, but we can move it to the
+   * task thread (which will mimic non=split builder) and save some CPU ticks
+   * on checking cancel status.
+   */
+  progress_update();
+  if (progress.get_cancel()) {
+    return NULL;
+  }
+
+  /* Small enough or too deep => create leaf. */
+  if (!(range.size() > 0 && params.top_level && level == 0)) {
+    if (params.small_enough_for_leaf(range.size(), level)) {
+      progress_count += range.size();
+      return create_leaf_node(range, *references);
+    }
+  }
+
+  /* Perform splitting test. */
+  BVHSpatialStorage *storage = &spatial_storage[thread_id];
+  BVHMixedSplit split(this, storage, range, references, level);
+
+  if (!(range.size() > 0 && params.top_level && level == 0)) {
+    if (split.no_split) {
+      progress_count += range.size();
+      return create_leaf_node(range, *references);
+    }
+  }
+  float leafSAH = params.sah_primitive_cost * split.leafSAH;
+  float splitSAH = params.sah_node_cost * range.bounds().half_area() +
+                   params.sah_primitive_cost * split.nodeSAH;
+
+  BVHMixedSplit unaligned_split;
+  float unalignedSplitSAH = FLT_MAX;
+  /* float unalignedLeafSAH = FLT_MAX; */
+  Transform aligned_space;
+  bool do_unalinged_split = false;
+  if (params.use_unaligned_nodes && splitSAH > params.unaligned_split_threshold * leafSAH) {
+    aligned_space = unaligned_heuristic.compute_aligned_space(range, &references->at(0));
+    unaligned_split = BVHMixedSplit(
+        this, storage, range, references, level, &unaligned_heuristic, &aligned_space);
+    /* unalignedLeafSAH = params.sah_primitive_cost * split.leafSAH; */
+    unalignedSplitSAH = params.sah_node_cost * unaligned_split.bounds.half_area() +
+                        params.sah_primitive_cost * unaligned_split.nodeSAH;
+    /* TOOD(sergey): Check we can create leaf already. */
+    /* Check whether unaligned split is better than the regulat one. */
+    if (unalignedSplitSAH < splitSAH) {
+      do_unalinged_split = true;
+    }
+  }
+
+  /* Do split. */
+  BVHRange left, right;
+  if (do_unalinged_split) {
+    unaligned_split.split(this, left, right, range);
+  }
+  else {
+    split.split(this, left, right, range);
+  }
+
+  progress_total += left.size() + right.size() - range.size();
+
+  BoundBox bounds;
+  if (do_unalinged_split) {
+    bounds = unaligned_heuristic.compute_aligned_boundbox(
+        range, &references->at(0), aligned_space);
+  }
+  else {
+    bounds = range.bounds();
+  }
+
+  /* Create inner node. */
+  InnerNode *inner;
+  if (range.size() < THREAD_TASK_SIZE) {
+    /* Local build. */
+
+    /* Build left node. */
+    vector<BVHReference> copy(references->begin() + right.start(),
+                              references->begin() + right.end());
+    right.set_start(0);
+
+    BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
+
+    /* Build right node. */
+    BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
+
+    inner = new InnerNode(bounds, leftnode, rightnode);
+  }
+  else {
+    /* Threaded build. */
+    inner = new InnerNode(bounds);
+    task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 0, left, *references, level + 1),
+                   true);
+    task_pool.push(new BVHSpatialSplitBuildTask(this, inner, 1, right, *references, level + 1),
+                   true);
+  }
+
+  if (do_unalinged_split) {
+    inner->set_aligned_space(aligned_space);
+  }
+
+  return inner;
 }
 
 /* Create Nodes */
 
 BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, int num)
 {
-	if(num == 0) {
-		BoundBox bounds = BoundBox::empty;
-		return new LeafNode(bounds, 0, 0, 0);
-	}
-	else if(num == 1) {
-		assert(start < prim_type.size());
-		prim_type[start] = ref->prim_type();
-		prim_index[start] = ref->prim_index();
-		prim_object[start] = ref->prim_object();
-		if(need_prim_time) {
-			prim_time[start] = make_float2(ref->time_from(), ref->time_to());
-		}
-
-		const uint visibility = objects[ref->prim_object()]->visibility_for_tracing();
-		BVHNode *leaf_node =  new LeafNode(ref->bounds(), visibility, start, start+1);
-		leaf_node->time_from = ref->time_from();
-		leaf_node->time_to = ref->time_to();
-		return leaf_node;
-	}
-	else {
-		int mid = num/2;
-		BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
-		BVHNode *leaf1 = create_object_leaf_nodes(ref+mid, start+mid, num-mid);
-
-		BoundBox bounds = BoundBox::empty;
-		bounds.grow(leaf0->bounds);
-		bounds.grow(leaf1->bounds);
-
-		BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
-		inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
-		inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
-		return inner_node;
-	}
+  if (num == 0) {
+    BoundBox bounds = BoundBox::empty;
+    return new LeafNode(bounds, 0, 0, 0);
+  }
+  else if (num == 1) {
+    assert(start < prim_type.size());
+    prim_type[start] = ref->prim_type();
+    prim_index[start] = ref->prim_index();
+    prim_object[start] = ref->prim_object();
+    if (need_prim_time) {
+      prim_time[start] = make_float2(ref->time_from(), ref->time_to());
+    }
+
+    const uint visibility = objects[ref->prim_object()]->visibility_for_tracing();
+    BVHNode *leaf_node = new LeafNode(ref->bounds(), visibility, start, start + 1);
+    leaf_node->time_from = ref->time_from();
+    leaf_node->time_to = ref->time_to();
+    return leaf_node;
+  }
+  else {
+    int mid = num / 2;
+    BVHNode *leaf0 = create_object_leaf_nodes(ref, start, mid);
+    BVHNode *leaf1 = create_object_leaf_nodes(ref + mid, start + mid, num - mid);
+
+    BoundBox bounds = BoundBox::empty;
+    bounds.grow(leaf0->bounds);
+    bounds.grow(leaf1->bounds);
+
+    BVHNode *inner_node = new InnerNode(bounds, leaf0, leaf1);
+    inner_node->time_from = min(leaf0->time_from, leaf1->time_from);
+    inner_node->time_to = max(leaf0->time_to, leaf1->time_to);
+    return inner_node;
+  }
 }
 
-BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
-                                    const vector<BVHReference>& references)
+BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHReference> &references)
 {
-	/* This is a bit overallocating here (considering leaf size into account),
-	 * but chunk-based re-allocation in vector makes it difficult to use small
-	 * size of stack storage here. Some tweaks are possible tho.
-	 *
-	 * NOTES:
-	 *  - If the size is too big, we'll have inefficient stack usage,
-	 *    and lots of cache misses.
-	 *  - If the size is too small, then we can run out of memory
-	 *    allowed to be used by vector.
-	 *    In practice it wouldn't mean crash, just allocator will fallback
-	 *    to heap which is slower.
-	 *  - Optimistic re-allocation in STL could jump us out of stack usage
-	 *    because re-allocation happens in chunks and size of those chunks we
-	 *    can not control.
-	 */
-	typedef StackAllocator<256, int> LeafStackAllocator;
-	typedef StackAllocator<256, float2> LeafTimeStackAllocator;
-	typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
-
-	vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
-	vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
-	vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
-	vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
-	vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
-
-	/* TODO(sergey): In theory we should be able to store references. */
-	vector<BVHReference, LeafReferenceStackAllocator> object_references;
-
-	uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
-	/* NOTE: Keep initializtion in sync with actual number of primitives. */
-	BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {BoundBox::empty,
-	                                        BoundBox::empty,
-	                                        BoundBox::empty,
-	                                        BoundBox::empty};
-	int ob_num = 0;
-	int num_new_prims = 0;
-	/* Fill in per-type type/index array. */
-	for(int i = 0; i < range.size(); i++) {
-		const BVHReference& ref = references[range.start() + i];
-		if(ref.prim_index() != -1) {
-			int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
-			p_ref[type_index].push_back(ref);
-			p_type[type_index].push_back(ref.prim_type());
-			p_index[type_index].push_back(ref.prim_index());
-			p_object[type_index].push_back(ref.prim_object());
-			p_time[type_index].push_back(make_float2(ref.time_from(),
-			                                         ref.time_to()));
-
-			bounds[type_index].grow(ref.bounds());
-			visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
-			if(ref.prim_type() & PRIMITIVE_ALL_CURVE) {
-				visibility[type_index] |= PATH_RAY_CURVE;
-			}
-			++num_new_prims;
-		}
-		else {
-			object_references.push_back(ref);
-			++ob_num;
-		}
-	}
-
-	/* Create leaf nodes for every existing primitive.
-	 *
-	 * Here we write primitive types, indices and objects to a temporary array.
-	 * This way we keep all the heavy memory allocation code outside of the
-	 * thread lock in the case of spatial split building.
-	 *
-	 * TODO(sergey): With some pointer trickery we can write directly to the
-	 * destination buffers for the non-spatial split BVH.
-	 */
-	BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
-	int num_leaves = 0;
-	size_t start_index = 0;
-	vector<int, LeafStackAllocator> local_prim_type,
-	                                local_prim_index,
-	                                local_prim_object;
-	vector<float2, LeafTimeStackAllocator> local_prim_time;
-	local_prim_type.resize(num_new_prims);
-	local_prim_index.resize(num_new_prims);
-	local_prim_object.resize(num_new_prims);
-	if(need_prim_time) {
-		local_prim_time.resize(num_new_prims);
-	}
-	for(int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
-		int num = (int)p_type[i].size();
-		if(num != 0) {
-			assert(p_type[i].size() == p_index[i].size());
-			assert(p_type[i].size() == p_object[i].size());
-			Transform aligned_space;
-			bool alignment_found = false;
-			for(int j = 0; j < num; ++j) {
-				const int index = start_index + j;
-				local_prim_type[index] = p_type[i][j];
-				local_prim_index[index] = p_index[i][j];
-				local_prim_object[index] = p_object[i][j];
-				if(need_prim_time) {
-					local_prim_time[index] = p_time[i][j];
-				}
-				if(params.use_unaligned_nodes && !alignment_found) {
-					alignment_found =
-						unaligned_heuristic.compute_aligned_space(p_ref[i][j],
-						                                          &aligned_space);
-				}
-			}
-			LeafNode *leaf_node = new LeafNode(bounds[i],
-			                                   visibility[i],
-			                                   start_index,
-			                                   start_index + num);
-			if(true) {
-				float time_from = 1.0f, time_to = 0.0f;
-				for(int j = 0; j < num; ++j) {
-					const BVHReference &ref = p_ref[i][j];
-					time_from = min(time_from, ref.time_from());
-					time_to = max(time_to, ref.time_to());
-				}
-				leaf_node->time_from = time_from;
-				leaf_node->time_to = time_to;
-			}
-			if(alignment_found) {
-				/* Need to recalculate leaf bounds with new alignment. */
-				leaf_node->bounds = BoundBox::empty;
-				for(int j = 0; j < num; ++j) {
-					const BVHReference &ref = p_ref[i][j];
-					BoundBox ref_bounds =
-					        unaligned_heuristic.compute_aligned_prim_boundbox(
-					                ref,
-					                aligned_space);
-					leaf_node->bounds.grow(ref_bounds);
-				}
-				/* Set alignment space. */
-				leaf_node->set_aligned_space(aligned_space);
-			}
-			leaves[num_leaves++] = leaf_node;
-			start_index += num;
-		}
-	}
-	/* Get size of new data to be copied to the packed arrays. */
-	const int num_new_leaf_data = start_index;
-	const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
-	/* Copy actual data to the packed array. */
-	if(params.use_spatial_split) {
-		spatial_spin_lock.lock();
-		/* We use first free index in the packed arrays and mode pointer to the
-		 * end of the current range.
-		 *
-		 * This doesn't give deterministic packed arrays, but it shouldn't really
-		 * matter because order of children in BVH is deterministic.
-		 */
-		start_index = spatial_free_index;
-		spatial_free_index += range.size();
-		/* Extend an array when needed. */
-		const size_t range_end = start_index + range.size();
-		if(prim_type.size() < range_end) {
-			/* Avoid extra re-allocations by pre-allocating bigger array in an
-			 * advance.
-			 */
-			if(range_end >= prim_type.capacity()) {
-				float progress = (float)progress_count/(float)progress_total;
-				float factor = (1.0f - progress);
-				const size_t reserve = (size_t)(range_end + (float)range_end*factor);
-				prim_type.reserve(reserve);
-				prim_index.reserve(reserve);
-				prim_object.reserve(reserve);
-				if(need_prim_time) {
-					prim_time.reserve(reserve);
-				}
-			}
-
-			prim_type.resize(range_end);
-			prim_index.resize(range_end);
-			prim_object.resize(range_end);
-			if(need_prim_time) {
-				prim_time.resize(range_end);
-			}
-		}
-		/* Perform actual data copy. */
-		if(new_leaf_data_size > 0) {
-			memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
-			memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
-			memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
-			if(need_prim_time) {
-				memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
-			}
-		}
-		spatial_spin_lock.unlock();
-	}
-	else {
-		/* For the regular BVH builder we simply copy new data starting at the
-		 * range start. This is totally thread-safe, all threads are living
-		 * inside of their own range.
-		 */
-		start_index = range.start();
-		if(new_leaf_data_size > 0) {
-			memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
-			memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
-			memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
-			if(need_prim_time) {
-				memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2)*num_new_leaf_data);
-			}
-		}
-	}
-
-	/* So far leaves were created with the zero-based index in an arrays,
-	 * here we modify the indices to correspond to actual packed array start
-	 * index.
-	 */
-	for(int i = 0; i < num_leaves; ++i) {
-		LeafNode *leaf = (LeafNode *)leaves[i];
-		leaf->lo += start_index;
-		leaf->hi += start_index;
-	}
-
-	/* Create leaf node for object. */
-	if(num_leaves == 0 || ob_num) {
-		/* Only create object leaf nodes if there are objects or no other
-		 * nodes created.
-		 */
-		const BVHReference *ref = (ob_num)? &object_references[0]: NULL;
-		leaves[num_leaves] = create_object_leaf_nodes(ref,
-		                                              start_index + num_new_leaf_data,
-		                                              ob_num);
-		++num_leaves;
-	}
-
-	/* TODO(sergey): Need to take care of alignment when number of leaves
-	 * is more than 1.
-	 */
-	if(num_leaves == 1) {
-		/* Simplest case: single leaf, just return it.
-		 * In all the rest cases we'll be creating intermediate inner node with
-		 * an appropriate bounding box.
-		 */
-		return leaves[0];
-	}
-	else if(num_leaves == 2) {
-		return new InnerNode(range.bounds(), leaves[0], leaves[1]);
-	}
-	else if(num_leaves == 3) {
-		BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
-		BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
-		return new InnerNode(range.bounds(), leaves[0], inner);
-	} else {
-		/* Should be doing more branches if more primitive types added. */
-		assert(num_leaves <= 5);
-		BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
-		BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
-		BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
-		BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
-		BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
-		BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
-		if(num_leaves == 5) {
-			return new InnerNode(range.bounds(), inner_c, leaves[4]);
-		}
-		return inner_c;
-	}
+  /* This is a bit overallocating here (considering leaf size into account),
+   * but chunk-based re-allocation in vector makes it difficult to use small
+   * size of stack storage here. Some tweaks are possible tho.
+   *
+   * NOTES:
+   *  - If the size is too big, we'll have inefficient stack usage,
+   *    and lots of cache misses.
+   *  - If the size is too small, then we can run out of memory
+   *    allowed to be used by vector.
+   *    In practice it wouldn't mean crash, just allocator will fallback
+   *    to heap which is slower.
+   *  - Optimistic re-allocation in STL could jump us out of stack usage
+   *    because re-allocation happens in chunks and size of those chunks we
+   *    can not control.
+   */
+  typedef StackAllocator<256, int> LeafStackAllocator;
+  typedef StackAllocator<256, float2> LeafTimeStackAllocator;
+  typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
+
+  vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
+  vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
+  vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
+  vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
+  vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
+
+  /* TODO(sergey): In theory we should be able to store references. */
+  vector<BVHReference, LeafReferenceStackAllocator> object_references;
+
+  uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
+  /* NOTE: Keep initializtion in sync with actual number of primitives. */
+  BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {
+      BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
+  int ob_num = 0;
+  int num_new_prims = 0;
+  /* Fill in per-type type/index array. */
+  for (int i = 0; i < range.size(); i++) {
+    const BVHReference &ref = references[range.start() + i];
+    if (ref.prim_index() != -1) {
+      int type_index = bitscan(ref.prim_type() & PRIMITIVE_ALL);
+      p_ref[type_index].push_back(ref);
+      p_type[type_index].push_back(ref.prim_type());
+      p_index[type_index].push_back(ref.prim_index());
+      p_object[type_index].push_back(ref.prim_object());
+      p_time[type_index].push_back(make_float2(ref.time_from(), ref.time_to()));
+
+      bounds[type_index].grow(ref.bounds());
+      visibility[type_index] |= objects[ref.prim_object()]->visibility_for_tracing();
+      if (ref.prim_type() & PRIMITIVE_ALL_CURVE) {
+        visibility[type_index] |= PATH_RAY_CURVE;
+      }
+      ++num_new_prims;
+    }
+    else {
+      object_references.push_back(ref);
+      ++ob_num;
+    }
+  }
+
+  /* Create leaf nodes for every existing primitive.
+   *
+   * Here we write primitive types, indices and objects to a temporary array.
+   * This way we keep all the heavy memory allocation code outside of the
+   * thread lock in the case of spatial split building.
+   *
+   * TODO(sergey): With some pointer trickery we can write directly to the
+   * destination buffers for the non-spatial split BVH.
+   */
+  BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
+  int num_leaves = 0;
+  size_t start_index = 0;
+  vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object;
+  vector<float2, LeafTimeStackAllocator> local_prim_time;
+  local_prim_type.resize(num_new_prims);
+  local_prim_index.resize(num_new_prims);
+  local_prim_object.resize(num_new_prims);
+  if (need_prim_time) {
+    local_prim_time.resize(num_new_prims);
+  }
+  for (int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
+    int num = (int)p_type[i].size();
+    if (num != 0) {
+      assert(p_type[i].size() == p_index[i].size());
+      assert(p_type[i].size() == p_object[i].size());
+      Transform aligned_space;
+      bool alignment_found = false;
+      for (int j = 0; j < num; ++j) {
+        const int index = start_index + j;
+        local_prim_type[index] = p_type[i][j];
+        local_prim_index[index] = p_index[i][j];
+        local_prim_object[index] = p_object[i][j];
+        if (need_prim_time) {
+          local_prim_time[index] = p_time[i][j];
+        }
+        if (params.use_unaligned_nodes && !alignment_found) {
+          alignment_found = unaligned_heuristic.compute_aligned_space(p_ref[i][j], &aligned_space);
+        }
+      }
+      LeafNode *leaf_node = new LeafNode(bounds[i], visibility[i], start_index, start_index + num);
+      if (true) {
+        float time_from = 1.0f, time_to = 0.0f;
+        for (int j = 0; j < num; ++j) {
+          const BVHReference &ref = p_ref[i][j];
+          time_from = min(time_from, ref.time_from());
+          time_to = max(time_to, ref.time_to());
+        }
+        leaf_node->time_from = time_from;
+        leaf_node->time_to = time_to;
+      }
+      if (alignment_found) {
+        /* Need to recalculate leaf bounds with new alignment. */
+        leaf_node->bounds = BoundBox::empty;
+        for (int j = 0; j < num; ++j) {
+          const BVHReference &ref = p_ref[i][j];
+          BoundBox ref_bounds = unaligned_heuristic.compute_aligned_prim_boundbox(ref,
+                                                                                  aligned_space);
+          leaf_node->bounds.grow(ref_bounds);
+        }
+        /* Set alignment space. */
+        leaf_node->set_aligned_space(aligned_space);
+      }
+      leaves[num_leaves++] = leaf_node;
+      start_index += num;
+    }
+  }
+  /* Get size of new data to be copied to the packed arrays. */
+  const int num_new_leaf_data = start_index;
+  const size_t new_leaf_data_size = sizeof(int) * num_new_leaf_data;
+  /* Copy actual data to the packed array. */
+  if (params.use_spatial_split) {
+    spatial_spin_lock.lock();
+    /* We use first free index in the packed arrays and mode pointer to the
+     * end of the current range.
+     *
+     * This doesn't give deterministic packed arrays, but it shouldn't really
+     * matter because order of children in BVH is deterministic.
+     */
+    start_index = spatial_free_index;
+    spatial_free_index += range.size();
+    /* Extend an array when needed. */
+    const size_t range_end = start_index + range.size();
+    if (prim_type.size() < range_end) {
+      /* Avoid extra re-allocations by pre-allocating bigger array in an
+       * advance.
+       */
+      if (range_end >= prim_type.capacity()) {
+        float progress = (float)progress_count / (float)progress_total;
+        float factor = (1.0f - progress);
+        const size_t reserve = (size_t)(range_end + (float)range_end * factor);
+        prim_type.reserve(reserve);
+        prim_index.reserve(reserve);
+        prim_object.reserve(reserve);
+        if (need_prim_time) {
+          prim_time.reserve(reserve);
+        }
+      }
+
+      prim_type.resize(range_end);
+      prim_index.resize(range_end);
+      prim_object.resize(range_end);
+      if (need_prim_time) {
+        prim_time.resize(range_end);
+      }
+    }
+    /* Perform actual data copy. */
+    if (new_leaf_data_size > 0) {
+      memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
+      memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
+      memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
+      if (need_prim_time) {
+        memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data);
+      }
+    }
+    spatial_spin_lock.unlock();
+  }
+  else {
+    /* For the regular BVH builder we simply copy new data starting at the
+     * range start. This is totally thread-safe, all threads are living
+     * inside of their own range.
+     */
+    start_index = range.start();
+    if (new_leaf_data_size > 0) {
+      memcpy(&prim_type[start_index], &local_prim_type[0], new_leaf_data_size);
+      memcpy(&prim_index[start_index], &local_prim_index[0], new_leaf_data_size);
+      memcpy(&prim_object[start_index], &local_prim_object[0], new_leaf_data_size);
+      if (need_prim_time) {
+        memcpy(&prim_time[start_index], &local_prim_time[0], sizeof(float2) * num_new_leaf_data);
+      }
+    }
+  }
+
+  /* So far leaves were created with the zero-based index in an arrays,
+   * here we modify the indices to correspond to actual packed array start
+   * index.
+   */
+  for (int i = 0; i < num_leaves; ++i) {
+    LeafNode *leaf = (LeafNode *)leaves[i];
+    leaf->lo += start_index;
+    leaf->hi += start_index;
+  }
+
+  /* Create leaf node for object. */
+  if (num_leaves == 0 || ob_num) {
+    /* Only create object leaf nodes if there are objects or no other
+     * nodes created.
+     */
+    const BVHReference *ref = (ob_num) ? &object_references[0] : NULL;
+    leaves[num_leaves] = create_object_leaf_nodes(ref, start_index + num_new_leaf_data, ob_num);
+    ++num_leaves;
+  }
+
+  /* TODO(sergey): Need to take care of alignment when number of leaves
+   * is more than 1.
+   */
+  if (num_leaves == 1) {
+    /* Simplest case: single leaf, just return it.
+     * In all the rest cases we'll be creating intermediate inner node with
+     * an appropriate bounding box.
+     */
+    return leaves[0];
+  }
+  else if (num_leaves == 2) {
+    return new InnerNode(range.bounds(), leaves[0], leaves[1]);
+  }
+  else if (num_leaves == 3) {
+    BoundBox inner_bounds = merge(leaves[1]->bounds, leaves[2]->bounds);
+    BVHNode *inner = new InnerNode(inner_bounds, leaves[1], leaves[2]);
+    return new InnerNode(range.bounds(), leaves[0], inner);
+  }
+  else {
+    /* Should be doing more branches if more primitive types added. */
+    assert(num_leaves <= 5);
+    BoundBox inner_bounds_a = merge(leaves[0]->bounds, leaves[1]->bounds);
+    BoundBox inner_bounds_b = merge(leaves[2]->bounds, leaves[3]->bounds);
+    BVHNode *inner_a = new InnerNode(inner_bounds_a, leaves[0], leaves[1]);
+    BVHNode *inner_b = new InnerNode(inner_bounds_b, leaves[2], leaves[3]);
+    BoundBox inner_bounds_c = merge(inner_a->bounds, inner_b->bounds);
+    BVHNode *inner_c = new InnerNode(inner_bounds_c, inner_a, inner_b);
+    if (num_leaves == 5) {
+      return new InnerNode(range.bounds(), inner_c, leaves[4]);
+    }
+    return inner_c;
+  }
 
 #undef MAX_ITEMS_PER_LEAF
 }
@@ -1154,81 +1081,81 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
 
 void BVHBuild::rotate(BVHNode *node, int max_depth, int iterations)
 {
-	/* in tested scenes, this resulted in slightly slower raytracing, so disabled
-	 * it for now. could be implementation bug, or depend on the scene */
-	if(node)
-		for(int i = 0; i < iterations; i++)
-			rotate(node, max_depth);
+  /* in tested scenes, this resulted in slightly slower raytracing, so disabled
+   * it for now. could be implementation bug, or depend on the scene */
+  if (node)
+    for (int i = 0; i < iterations; i++)
+      rotate(node, max_depth);
 }
 
 void BVHBuild::rotate(BVHNode *node, int max_depth)
 {
-	/* nothing to rotate if we reached a leaf node. */
-	if(node->is_leaf() || max_depth < 0)
-		return;
-
-	InnerNode *parent = (InnerNode*)node;
-
-	/* rotate all children first */
-	for(size_t c = 0; c < 2; c++)
-		rotate(parent->children[c], max_depth-1);
-
-	/* compute current area of all children */
-	BoundBox bounds0 = parent->children[0]->bounds;
-	BoundBox bounds1 = parent->children[1]->bounds;
-
-	float area0 = bounds0.half_area();
-	float area1 = bounds1.half_area();
-	float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
-
-	/* find best rotation. we pick a target child of a first child, and swap
-	 * this with an other child. we perform the best such swap. */
-	float best_cost = FLT_MAX;
-	int best_child = -1, best_target = -1, best_other = -1;
-
-	for(size_t c = 0; c < 2; c++) {
-		/* ignore leaf nodes as we cannot descent into */
-		if(parent->children[c]->is_leaf())
-			continue;
-
-		InnerNode *child = (InnerNode*)parent->children[c];
-		BoundBox& other = (c == 0)? bounds1: bounds0;
-
-		/* transpose child bounds */
-		BoundBox target0 = child->children[0]->bounds;
-		BoundBox target1 = child->children[1]->bounds;
-
-		/* compute cost for both possible swaps */
-		float cost0 = merge(other, target1).half_area() - child_area[c];
-		float cost1 = merge(target0, other).half_area() - child_area[c];
-
-		if(min(cost0,cost1) < best_cost) {
-			best_child = (int)c;
-			best_other = (int)(1-c);
-
-			if(cost0 < cost1) {
-				best_cost = cost0;
-				best_target = 0;
-			}
-			else {
-				best_cost = cost0;
-				best_target = 1;
-			}
-		}
-	}
-
-	/* if we did not find a swap that improves the SAH then do nothing */
-	if(best_cost >= 0)
-		return;
-
-	assert(best_child == 0 || best_child == 1);
-	assert(best_target != -1);
-
-	/* perform the best found tree rotation */
-	InnerNode *child = (InnerNode*)parent->children[best_child];
-
-	swap(parent->children[best_other], child->children[best_target]);
-	child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
+  /* nothing to rotate if we reached a leaf node. */
+  if (node->is_leaf() || max_depth < 0)
+    return;
+
+  InnerNode *parent = (InnerNode *)node;
+
+  /* rotate all children first */
+  for (size_t c = 0; c < 2; c++)
+    rotate(parent->children[c], max_depth - 1);
+
+  /* compute current area of all children */
+  BoundBox bounds0 = parent->children[0]->bounds;
+  BoundBox bounds1 = parent->children[1]->bounds;
+
+  float area0 = bounds0.half_area();
+  float area1 = bounds1.half_area();
+  float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);
+
+  /* find best rotation. we pick a target child of a first child, and swap
+   * this with an other child. we perform the best such swap. */
+  float best_cost = FLT_MAX;
+  int best_child = -1, best_target = -1, best_other = -1;
+
+  for (size_t c = 0; c < 2; c++) {
+    /* ignore leaf nodes as we cannot descent into */
+    if (parent->children[c]->is_leaf())
+      continue;
+
+    InnerNode *child = (InnerNode *)parent->children[c];
+    BoundBox &other = (c == 0) ? bounds1 : bounds0;
+
+    /* transpose child bounds */
+    BoundBox target0 = child->children[0]->bounds;
+    BoundBox target1 = child->children[1]->bounds;
+
+    /* compute cost for both possible swaps */
+    float cost0 = merge(other, target1).half_area() - child_area[c];
+    float cost1 = merge(target0, other).half_area() - child_area[c];
+
+    if (min(cost0, cost1) < best_cost) {
+      best_child = (int)c;
+      best_other = (int)(1 - c);
+
+      if (cost0 < cost1) {
+        best_cost = cost0;
+        best_target = 0;
+      }
+      else {
+        best_cost = cost0;
+        best_target = 1;
+      }
+    }
+  }
+
+  /* if we did not find a swap that improves the SAH then do nothing */
+  if (best_cost >= 0)
+    return;
+
+  assert(best_child == 0 || best_child == 1);
+  assert(best_target != -1);
+
+  /* perform the best found tree rotation */
+  InnerNode *child = (InnerNode *)parent->children[best_child];
+
+  swap(parent->children[best_other], child->children[best_target]);
+  child->bounds = merge(child->children[0]->bounds, child->children[1]->bounds);
 }
 
 CCL_NAMESPACE_END