Merge branch 'master' into blender2.8

author: Brecht Van Lommel <brechtvanlommel@gmail.com> 2018-03-10 08:55:39 +0300
committer: Brecht Van Lommel <brechtvanlommel@gmail.com> 2018-03-10 08:55:39 +0300
commit: d27158aae9bc48b2a07760a2dbe8e642fcecbe57 (patch)
tree: 7ecfe898e53f07e589ee4be0ea5b297153d7dfcf /intern/cycles
parent: f3161bd2abe4bcc41f0e9169275be315ecc6b054 (diff)
parent: 8a76f8dac3475b1d24956e0d384d65295f15c76a (diff)
56 files changed, 1324 insertions, 1067 deletions
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 21ae07e23b8..a46955322e3 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -40,6 +40,7 @@
 
 #include "util/util_foreach.h"
 #include "util/util_path.h"
+#include "util/util_projection.h"
 #include "util/util_transform.h"
 #include "util/util_xml.h"
 
@@ -546,8 +547,10 @@ static void xml_read_transform(xml_node node, Transform& tfm)
 {
 	if(node.attribute("matrix")) {
 		vector<float> matrix;
-		if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16)
-			tfm = tfm * transform_transpose((*(Transform*)&matrix[0]));
+		if(xml_read_float_array(matrix, node, "matrix") && matrix.size() == 16) {
+			ProjectionTransform projection = *(ProjectionTransform*)&matrix[0];
+			tfm = tfm * projection_to_transform(projection_transpose(projection));
+		}
 	}
 
 	if(node.attribute("translate")) {
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index fb7530f8663..6774fdaec64 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1082,7 +1082,7 @@ class CyclesObjectSettings(bpy.types.PropertyGroup):
 
         cls.motion_steps = IntProperty(
                 name="Motion Steps",
-                description="Control accuracy of deformation motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
+                description="Control accuracy of motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
                 min=1, soft_max=8,
                 default=1,
                 )
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 0166b494db7..6e4b0373a7a 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -783,7 +783,7 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
     def poll(cls, context):
         ob = context.object
         if CyclesButtonsPanel.poll(context) and ob:
-            if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META'}:
+            if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}:
                 return True
             if ob.dupli_type == 'GROUP' and ob.dupli_group:
                 return True
@@ -815,11 +815,9 @@ class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
         layout.active = (rd.use_motion_blur and cob.use_motion_blur)
 
         row = layout.row()
-        row.prop(cob, "use_deform_motion", text="Deformation")
-
-        sub = row.row()
-        sub.active = cob.use_deform_motion
-        sub.prop(cob, "motion_steps", text="Steps")
+        if ob.type != 'CAMERA':
+            row.prop(cob, "use_deform_motion", text="Deformation")
+        row.prop(cob, "motion_steps", text="Steps")
 
 
 class CYCLES_OBJECT_PT_cycles_settings(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
index 62e950e3bef..f00ade320e7 100644
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -83,6 +83,8 @@ struct BlenderCamera {
 	Transform matrix;
 
 	float offscreen_dicing_scale;
+
+	int motion_steps;
 };
 
 static void blender_camera_init(BlenderCamera *bcam,
@@ -226,6 +228,8 @@ static void blender_camera_from_object(BlenderCamera *bcam,
 			bcam->sensor_fit = BlenderCamera::HORIZONTAL;
 		else
 			bcam->sensor_fit = BlenderCamera::VERTICAL;
+
+		bcam->motion_steps = object_motion_steps(b_ob, b_ob);
 	}
 	else {
 		/* from lamp not implemented yet */
@@ -246,8 +250,7 @@ static Transform blender_camera_matrix(const Transform& tfm,
 			result = tfm *
 				make_transform(1.0f, 0.0f, 0.0f, 0.0f,
 				               0.0f, 0.0f, 1.0f, 0.0f,
-				               0.0f, 1.0f, 0.0f, 0.0f,
-				               0.0f, 0.0f, 0.0f, 1.0f);
+				               0.0f, 1.0f, 0.0f, 0.0f);
 		}
 		else {
 			/* Make it so environment camera needs to be pointed in the direction
@@ -257,8 +260,7 @@ static Transform blender_camera_matrix(const Transform& tfm,
 			result = tfm *
 				make_transform( 0.0f, -1.0f, 0.0f, 0.0f,
 				                0.0f,  0.0f, 1.0f, 0.0f,
-				               -1.0f,  0.0f, 0.0f, 0.0f,
-				                0.0f,  0.0f, 0.0f, 1.0f);
+				               -1.0f,  0.0f, 0.0f, 0.0f);
 		}
 	}
 	else {
@@ -455,9 +457,7 @@ static void blender_camera_sync(Camera *cam,
 	cam->matrix = blender_camera_matrix(bcam->matrix,
 	                                    bcam->type,
 	                                    bcam->panorama_type);
-	cam->motion.pre = cam->matrix;
-	cam->motion.post = cam->matrix;
-	cam->use_motion = false;
+	cam->motion.resize(bcam->motion_steps, cam->matrix);
 	cam->use_perspective_motion = false;
 	cam->shuttertime = bcam->shuttertime;
 	cam->fov_pre = cam->fov;
@@ -566,20 +566,15 @@ void BlenderSync::sync_camera_motion(BL::RenderSettings& b_render,
 	Transform tfm = get_transform(b_ob_matrix);
 	tfm = blender_camera_matrix(tfm, cam->type, cam->panorama_type);
 
-	if(tfm != cam->matrix) {
-		VLOG(1) << "Camera " << b_ob.name() << " motion detected.";
-		if(motion_time == 0.0f) {
-			/* When motion blur is not centered in frame, cam->matrix gets reset. */
-			cam->matrix = tfm;
-		}
-		else if(motion_time == -1.0f) {
-			cam->motion.pre = tfm;
-			cam->use_motion = true;
-		}
-		else if(motion_time == 1.0f) {
-			cam->motion.post = tfm;
-			cam->use_motion = true;
-		}
+	if(motion_time == 0.0f) {
+		/* When motion blur is not centered in frame, cam->matrix gets reset. */
+		cam->matrix = tfm;
+	}
+
+	/* Set transform in motion array. */
+	int motion_step = cam->motion_step(motion_time);
+	if(motion_step >= 0) {
+		cam->motion[motion_step] = tfm;
 	}
 
 	if(cam->type == CAMERA_PERSPECTIVE) {
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 6017ea502ed..984442fb08c 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -633,10 +633,10 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
 	}
 }
 
-static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int time_index)
+static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int motion_step)
 {
 	VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name
-	        << ", time index " << time_index;
+	        << ", motion step " << motion_step;
 
 	/* find attribute */
 	Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
@@ -651,7 +651,7 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
 
 	/* export motion vectors for curve keys */
 	size_t numkeys = mesh->curve_keys.size();
-	float4 *mP = attr_mP->data_float4() + time_index*numkeys;
+	float4 *mP = attr_mP->data_float4() + motion_step*numkeys;
 	bool have_motion = false;
 	int i = 0;
 
@@ -702,12 +702,12 @@ static void ExportCurveSegmentsMotion(Mesh *mesh, ParticleCurveData *CData, int
 			}
 			mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
 		}
-		else if(time_index > 0) {
-			VLOG(1) << "Filling in new motion vertex position for time_index "
-			        << time_index;
+		else if(motion_step > 0) {
+			VLOG(1) << "Filling in new motion vertex position for motion_step "
+			        << motion_step;
 			/* motion, fill up previous steps that we might have skipped because
 			 * they had no motion, but we need them anyway now */
-			for(int step = 0; step < time_index; step++) {
+			for(int step = 0; step < motion_step; step++) {
 				float4 *mP = attr_mP->data_float4() + step*numkeys;
 
 				for(int key = 0; key < numkeys; key++) {
@@ -889,7 +889,7 @@ void BlenderSync::sync_curves(BL::Depsgraph& b_depsgraph,
                               BL::Mesh& b_mesh,
                               BL::Object& b_ob,
                               bool motion,
-                              int time_index)
+                              int motion_step)
 {
 	if(!motion) {
 		/* Clear stored curve data */
@@ -954,7 +954,7 @@ void BlenderSync::sync_curves(BL::Depsgraph& b_depsgraph,
 	}
 	else {
 		if(motion)
-			ExportCurveSegmentsMotion(mesh, &CData, time_index);
+			ExportCurveSegmentsMotion(mesh, &CData, motion_step);
 		else
 			ExportCurveSegments(scene, mesh, &CData);
 	}
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index 02ede74224a..afcfe3d434e 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -1252,36 +1252,10 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 	if(mesh_synced.find(mesh) == mesh_synced.end())
 		return;
 
-	/* for motion pass always compute, for motion blur it can be disabled */
-	int time_index = 0;
-
-	if(scene->need_motion() == Scene::MOTION_BLUR) {
-		if(!mesh->use_motion_blur)
-			return;
-
-		/* see if this mesh needs motion data at this time */
-		vector<float> object_times = object->motion_times();
-		bool found = false;
-
-		foreach(float object_time, object_times) {
-			if(motion_time == object_time) {
-				found = true;
-				break;
-			}
-			else
-				time_index++;
-		}
-
-		if(!found)
-			return;
-	}
-	else {
-		if(motion_time == -1.0f)
-			time_index = 0;
-		else if(motion_time == 1.0f)
-			time_index = 1;
-		else
-			return;
+	/* Find time matching motion step required by mesh. */
+	int motion_step = mesh->motion_step(motion_time);
+	if(motion_step < 0) {
+		return;
 	}
 
 	/* skip empty meshes */
@@ -1324,9 +1298,9 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 				float3 *P = &mesh->verts[0];
 				float3 *N = (attr_N)? attr_N->data_float3(): NULL;
 
-				memcpy(attr_mP->data_float3() + time_index*numverts, P, sizeof(float3)*numverts);
+				memcpy(attr_mP->data_float3() + motion_step*numverts, P, sizeof(float3)*numverts);
 				if(attr_mN)
-					memcpy(attr_mN->data_float3() + time_index*numverts, N, sizeof(float3)*numverts);
+					memcpy(attr_mN->data_float3() + motion_step*numverts, N, sizeof(float3)*numverts);
 			}
 		}
 
@@ -1336,7 +1310,7 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 
 			if(attr_mP) {
 				float3 *keys = &mesh->curve_keys[0];
-				memcpy(attr_mP->data_float3() + time_index*numkeys, keys, sizeof(float3)*numkeys);
+				memcpy(attr_mP->data_float3() + motion_step*numkeys, keys, sizeof(float3)*numkeys);
 			}
 		}
 
@@ -1359,8 +1333,8 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 			new_attribute = true;
 		}
 		/* Load vertex data from mesh. */
-		float3 *mP = attr_mP->data_float3() + time_index*numverts;
-		float3 *mN = (attr_mN)? attr_mN->data_float3() + time_index*numverts: NULL;
+		float3 *mP = attr_mP->data_float3() + motion_step*numverts;
+		float3 *mN = (attr_mN)? attr_mN->data_float3() + motion_step*numverts: NULL;
 		/* NOTE: We don't copy more that existing amount of vertices to prevent
 		 * possible memory corruption.
 		 */
@@ -1389,13 +1363,13 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 				if(attr_mN)
 					mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL);
 			}
-			else if(time_index > 0) {
+			else if(motion_step > 0) {
 				VLOG(1) << "Filling deformation motion for object " << b_ob.name();
 				/* motion, fill up previous steps that we might have skipped because
 				 * they had no motion, but we need them anyway now */
 				float3 *P = &mesh->verts[0];
 				float3 *N = (attr_N)? attr_N->data_float3(): NULL;
-				for(int step = 0; step < time_index; step++) {
+				for(int step = 0; step < motion_step; step++) {
 					memcpy(attr_mP->data_float3() + step*numverts, P, sizeof(float3)*numverts);
 					if(attr_mN)
 						memcpy(attr_mN->data_float3() + step*numverts, N, sizeof(float3)*numverts);
@@ -1405,7 +1379,7 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 		else {
 			if(b_mesh.vertices.length() != numverts) {
 				VLOG(1) << "Topology differs, discarding motion blur for object "
-				        << b_ob.name() << " at time " << time_index;
+				        << b_ob.name() << " at time " << motion_step;
 				memcpy(mP, &mesh->verts[0], sizeof(float3)*numverts);
 				if(mN != NULL) {
 					memcpy(mN, attr_N->data_float3(), sizeof(float3)*numverts);
@@ -1416,7 +1390,7 @@ void BlenderSync::sync_mesh_motion(BL::Depsgraph& b_depsgraph,
 
 	/* hair motion */
 	if(numkeys)
-		sync_curves(b_depsgraph, mesh, b_mesh, b_ob, true, time_index);
+		sync_curves(b_depsgraph, mesh, b_mesh, b_ob, true, motion_step);
 
 	/* free derived mesh */
 	b_data.meshes.remove(b_mesh, false, true, false);
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 03c5ecddc44..d949eaf3009 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -347,22 +347,11 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
 	if(motion) {
 		object = object_map.find(key);
 
-		if(object && (scene->need_motion() == Scene::MOTION_PASS ||
-		              object_use_motion(b_parent, b_ob)))
-		{
-			/* object transformation */
-			if(tfm != object->tfm) {
-				VLOG(1) << "Object " << b_ob.name() << " motion detected.";
-				if(motion_time == -1.0f || motion_time == 1.0f) {
-					object->use_motion = true;
-				}
-			}
-
-			if(motion_time == -1.0f) {
-				object->motion.pre = tfm;
-			}
-			else if(motion_time == 1.0f) {
-				object->motion.post = tfm;
+		if(object && object->use_motion()) {
+			/* Set transform at matching motion time step. */
+			int time_index = object->motion_step(motion_time);
+			if(time_index >= 0) {
+				object->motion[time_index] = tfm;
 			}
 
 			/* mesh deformation */
@@ -409,25 +398,34 @@ Object *BlenderSync::sync_object(BL::Depsgraph& b_depsgraph,
 		object->name = b_ob.name().c_str();
 		object->pass_id = b_ob.pass_index();
 		object->tfm = tfm;
-		object->motion.pre = transform_empty();
-		object->motion.post = transform_empty();
-		object->use_motion = false;
+		object->motion.clear();
 
 		/* motion blur */
-		if(scene->need_motion() == Scene::MOTION_BLUR && object->mesh) {
+		Scene::MotionType need_motion = scene->need_motion();
+		if(need_motion != Scene::MOTION_NONE && object->mesh) {
 			Mesh *mesh = object->mesh;
-
 			mesh->use_motion_blur = false;
+			mesh->motion_steps = 0;
+
+			uint motion_steps;
 
-			if(object_use_motion(b_parent, b_ob)) {
+			if(scene->need_motion() == Scene::MOTION_BLUR) {
+				motion_steps = object_motion_steps(b_parent, b_ob);
 				if(object_use_deform_motion(b_parent, b_ob)) {
-					mesh->motion_steps = object_motion_steps(b_ob);
+					mesh->motion_steps = motion_steps;
 					mesh->use_motion_blur = true;
 				}
+			}
+			else {
+				motion_steps = 3;
+				mesh->motion_steps = motion_steps;
+			}
+
+			object->motion.resize(motion_steps, transform_empty());
+			object->motion[motion_steps/2] = tfm;
 
-				vector<float> times = object->motion_times();
-				foreach(float time, times)
-					motion_times.insert(time);
+			for(size_t step = 0; step < motion_steps; step++) {
+				motion_times.insert(object->motion_time(step));
 			}
 		}
 
@@ -646,6 +644,11 @@ void BlenderSync::sync_motion(BL::RenderSettings& b_render,
 
 	/* note iteration over motion_times set happens in sorted order */
 	foreach(float relative_time, motion_times) {
+		/* center time is already handled. */
+		if(relative_time == 0.0f) {
+			continue;
+		}
+
 		VLOG(1) << "Synchronizing motion for the relative time "
 		        << relative_time << ".";
 
diff --git a/intern/cycles/blender/blender_object_cull.cpp b/intern/cycles/blender/blender_object_cull.cpp
index 1d747de647a..bdf7dc469b2 100644
--- a/intern/cycles/blender/blender_object_cull.cpp
+++ b/intern/cycles/blender/blender_object_cull.cpp
@@ -96,7 +96,7 @@ bool BlenderObjectCulling::test(Scene *scene, BL::Object& b_ob, Transform& tfm)
 bool BlenderObjectCulling::test_camera(Scene *scene, float3 bb[8])
 {
 	Camera *cam = scene->camera;
-	Transform& worldtondc = cam->worldtondc;
+	const ProjectionTransform& worldtondc = cam->worldtondc;
 	float3 bb_min = make_float3(FLT_MAX, FLT_MAX, FLT_MAX),
 	       bb_max = make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX);
 	bool all_behind = true;
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 9e8d494f83b..2ea86ba1133 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -125,7 +125,7 @@ private:
 	                 BL::Mesh& b_mesh,
 	                 BL::Object& b_ob,
 	                 bool motion,
-	                 int time_index = 0);
+	                 int motion_step = 0);
 	Object *sync_object(BL::Depsgraph& b_depsgraph,
 	                    BL::Depsgraph::duplis_iterator& b_dupli_iter,
 	                    uint layer_flag,
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index a813a09f38a..5b67ff85a9b 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -248,14 +248,15 @@ static inline float *image_get_float_pixels_for_frame(BL::Image& image,
 
 static inline Transform get_transform(const BL::Array<float, 16>& array)
 {
-	Transform tfm;
+	ProjectionTransform projection;
 
-	/* we assume both types to be just 16 floats, and transpose because blender
-	 * use column major matrix order while we use row major */
-	memcpy(&tfm, &array, sizeof(float)*16);
-	tfm = transform_transpose(tfm);
+	/* We assume both types to be just 16 floats, and transpose because blender
+	 * use column major matrix order while we use row major. */
+	memcpy(&projection, &array, sizeof(float)*16);
+	projection = projection_transpose(projection);
 
-	return tfm;
+	/* Drop last row, matrix is assumed to be affine transform. */
+	return projection_to_transform(projection);
 }
 
 static inline float2 get_float2(const BL::Array<float, 2>& array)
@@ -484,33 +485,34 @@ static inline void mesh_texture_space(BL::Mesh& b_mesh,
 	loc = loc*size - make_float3(0.5f, 0.5f, 0.5f);
 }
 
-/* object used for motion blur */
-static inline bool object_use_motion(BL::Object& b_parent, BL::Object& b_ob)
+/* Object motion steps, returns 0 if no motion blur needed. */
+static inline uint object_motion_steps(BL::Object& b_parent, BL::Object& b_ob)
 {
+	/* Get motion enabled and steps from object itself. */
 	PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
 	bool use_motion = get_boolean(cobject, "use_motion_blur");
-	/* If motion blur is enabled for the object we also check
-	 * whether it's enabled for the parent object as well.
-	 *
-	 * This way we can control motion blur from the dupligroup
-	 * duplicator much easier.
-	 */
-	if(use_motion && b_parent.ptr.data != b_ob.ptr.data) {
+	if(!use_motion) {
+		return 0;
+	}
+
+	uint steps = max(1, get_int(cobject, "motion_steps"));
+
+	/* Also check parent object, so motion blur and steps can be
+	 * controlled by dupligroup duplicator for linked groups. */
+	if(b_parent.ptr.data != b_ob.ptr.data) {
 		PointerRNA parent_cobject = RNA_pointer_get(&b_parent.ptr, "cycles");
 		use_motion &= get_boolean(parent_cobject, "use_motion_blur");
-	}
-	return use_motion;
-}
 
-/* object motion steps */
-static inline uint object_motion_steps(BL::Object& b_ob)
-{
-	PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
-	uint steps = get_int(cobject, "motion_steps");
+		if(!use_motion) {
+			return 0;
+		}
+
+		steps = max(steps, get_int(parent_cobject, "motion_steps"));
+	}
 
-	/* use uneven number of steps so we get one keyframe at the current frame,
-	 * and ue 2^(steps - 1) so objects with more/fewer steps still have samples
-	 * at the same times, to avoid sampling at many different times */
+	/* Use uneven number of steps so we get one keyframe at the current frame,
+	 * and use 2^(steps - 1) so objects with more/fewer steps still have samples
+	 * at the same times, to avoid sampling at many different times. */
 	return (2 << (steps - 1)) + 1;
 }
 
diff --git a/intern/cycles/graph/node_type.cpp b/intern/cycles/graph/node_type.cpp
index a3a8fa5f382..37aae211e93 100644
--- a/intern/cycles/graph/node_type.cpp
+++ b/intern/cycles/graph/node_type.cpp
@@ -77,7 +77,7 @@ size_t SocketType::max_size()
 
 void *SocketType::zero_default_value()
 {
-	static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
+	static Transform zero_transform = {{0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}};
 	return &zero_transform;
 }
 
diff --git a/intern/cycles/graph/node_xml.cpp b/intern/cycles/graph/node_xml.cpp
index d26b3b2c2c8..f4599e22d40 100644
--- a/intern/cycles/graph/node_xml.cpp
+++ b/intern/cycles/graph/node_xml.cpp
@@ -196,7 +196,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
 			case SocketType::TRANSFORM:
 			{
 				array<Transform> value;
-				xml_read_float_array<16>(value, attr);
+				xml_read_float_array<12>(value, attr);
 				if(value.size() == 1) {
 					node->set(socket, value[0]);
 				}
@@ -205,7 +205,7 @@ void xml_read_node(XMLReader& reader, Node *node, xml_node xml_node)
 			case SocketType::TRANSFORM_ARRAY:
 			{
 				array<Transform> value;
-				xml_read_float_array<16>(value, attr);
+				xml_read_float_array<12>(value, attr);
 				node->set(socket, value);
 				break;
 			}
@@ -400,12 +400,10 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
 			{
 				Transform tfm = node->get_transform(socket);
 				std::stringstream ss;
-				for(int i = 0; i < 4; i++) {
-					ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
-					if(i != 3) {
-						ss << " ";
-					}
+				for(int i = 0; i < 3; i++) {
+					ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
 				}
+				ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
 				attr = ss.str().c_str();
 				break;
 			}
@@ -416,11 +414,12 @@ xml_node xml_write_node(Node *node, xml_node xml_root)
 				for(size_t j = 0; j < value.size(); j++) {
 					const Transform& tfm = value[j];
 
-					for(int i = 0; i < 4; i++) {
-						ss << string_printf("%g %g %g %g", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
-						if(j != value.size() - 1 || i != 3) {
-							ss << " ";
-						}
+					for(int i = 0; i < 3; i++) {
+						ss << string_printf("%g %g %g %g ", (double)tfm[i][0], (double)tfm[i][1], (double)tfm[i][2], (double)tfm[i][3]);
+					}
+					ss << string_printf("%g %g %g %g", 0.0, 0.0, 0.0, 1.0);
+					if(j != value.size() - 1) {
+						ss << " ";
 					}
 				}
 				attr = ss.str().c_str();
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 50ea03a1f8f..9b7f4e00084 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -254,6 +254,7 @@ set(SRC_UTIL_HEADERS
 	../util/util_math_int3.h
 	../util/util_math_int4.h
 	../util/util_math_matrix.h
+	../util/util_projection.h
 	../util/util_rect.h
 	../util/util_static_assert.h
 	../util/util_transform.h
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index 6c33dad5426..060b3934a41 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -25,7 +25,6 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k
 	space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
 	space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
 	space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
-	space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 	return space;
 }
 
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index efd6798ca51..cfc567ff9ca 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -276,7 +276,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 								shader = __float_as_int(str.z);
 							}
 #endif
-							int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
+							int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 
 							/* if no transparent shadows, all light is blocked */
 							if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
index 522213f30ca..46fd178aed6 100644
--- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h
@@ -358,7 +358,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 								shader = __float_as_int(str.z);
 							}
 #endif
-							int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
+							int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 
 							/* if no transparent shadows, all light is blocked */
 							if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index c72595eed10..42c053704d5 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -53,9 +53,7 @@ ccl_device_inline AttributeDescriptor attribute_not_found()
 
 ccl_device_inline uint object_attribute_map_offset(KernelGlobals *kg, int object)
 {
-	int offset = object*OBJECT_SIZE + 15;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return __float_as_uint(f.y);
+	return kernel_tex_fetch(__objects, object).attribute_map_offset;
 }
 
 ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
@@ -105,7 +103,6 @@ ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderD
 	tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
 	tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
 	tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
-	tfm.w = kernel_tex_fetch(__attributes_float3, desc.offset + 3);
 
 	return tfm;
 }
diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h
index faf3e3cdf2b..46c3f408f0b 100644
--- a/intern/cycles/kernel/geom/geom_curve_intersect.h
+++ b/intern/cycles/kernel/geom/geom_curve_intersect.h
@@ -170,8 +170,7 @@ ccl_device_forceinline bool cardinal_curve_intersect(
 		htfm = make_transform(
 			dir.z / d, 0, -dir.x /d, 0,
 			-dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
-			dir.x, dir.y, dir.z, 0,
-			0, 0, 0, 1);
+			dir.x, dir.y, dir.z, 0);
 
 		float4 v00 = kernel_tex_fetch(__curves, prim);
 
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index 32aa2007f5d..800649abf38 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -28,62 +28,44 @@ CCL_NAMESPACE_BEGIN
 
 enum ObjectTransform {
 	OBJECT_TRANSFORM = 0,
-	OBJECT_INVERSE_TRANSFORM = 4,
-	OBJECT_TRANSFORM_MOTION_PRE = 0,
-	OBJECT_TRANSFORM_MOTION_MID = 4,
-	OBJECT_TRANSFORM_MOTION_POST = 8,
-	OBJECT_PROPERTIES = 12,
-	OBJECT_DUPLI = 13
+	OBJECT_INVERSE_TRANSFORM = 1,
 };
 
 enum ObjectVectorTransform {
-	OBJECT_VECTOR_MOTION_PRE = 0,
-	OBJECT_VECTOR_MOTION_POST = 3
+	OBJECT_PASS_MOTION_PRE = 0,
+	OBJECT_PASS_MOTION_POST = 1
 };
 
 /* Object to world space transformation */
 
 ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
 {
-	int offset = object*OBJECT_SIZE + (int)type;
-
-	Transform tfm;
-	tfm.x = kernel_tex_fetch(__objects, offset + 0);
-	tfm.y = kernel_tex_fetch(__objects, offset + 1);
-	tfm.z = kernel_tex_fetch(__objects, offset + 2);
-	tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-
-	return tfm;
+	if(type == OBJECT_INVERSE_TRANSFORM) {
+		return kernel_tex_fetch(__objects, object).itfm;
+	}
+	else {
+		return kernel_tex_fetch(__objects, object).tfm;
+	}
 }
 
 /* Lamp to world space transformation */
 
 ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse)
 {
-	int offset = lamp*LIGHT_SIZE + (inverse? 8 : 5);
-
-	Transform tfm;
-	tfm.x = kernel_tex_fetch(__light_data, offset + 0);
-	tfm.y = kernel_tex_fetch(__light_data, offset + 1);
-	tfm.z = kernel_tex_fetch(__light_data, offset + 2);
-	tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-
-	return tfm;
+	if(inverse) {
+		return kernel_tex_fetch(__lights, lamp).itfm;
+	}
+	else {
+		return kernel_tex_fetch(__lights, lamp).tfm;
+	}
 }
 
 /* Object to world space transformation for motion vectors */
 
-ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
+ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
 {
-	int offset = object*OBJECT_VECTOR_SIZE + (int)type;
-
-	Transform tfm;
-	tfm.x = kernel_tex_fetch(__objects_vector, offset + 0);
-	tfm.y = kernel_tex_fetch(__objects_vector, offset + 1);
-	tfm.z = kernel_tex_fetch(__objects_vector, offset + 2);
-	tfm.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
-
-	return tfm;
+	int offset = object*OBJECT_MOTION_PASS_SIZE + (int)type;
+	return kernel_tex_fetch(__object_motion_pass, offset);
 }
 
 /* Motion blurred object transformations */
@@ -91,27 +73,12 @@ ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int
 #ifdef __OBJECT_MOTION__
 ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
 {
-	MotionTransform motion;
-
-	int offset = object*OBJECT_SIZE + (int)OBJECT_TRANSFORM_MOTION_PRE;
-
-	motion.pre.x = kernel_tex_fetch(__objects, offset + 0);
-	motion.pre.y = kernel_tex_fetch(__objects, offset + 1);
-	motion.pre.z = kernel_tex_fetch(__objects, offset + 2);
-	motion.pre.w = kernel_tex_fetch(__objects, offset + 3);
-
-	motion.mid.x = kernel_tex_fetch(__objects, offset + 4);
-	motion.mid.y = kernel_tex_fetch(__objects, offset + 5);
-	motion.mid.z = kernel_tex_fetch(__objects, offset + 6);
-	motion.mid.w = kernel_tex_fetch(__objects, offset + 7);
-
-	motion.post.x = kernel_tex_fetch(__objects, offset + 8);
-	motion.post.y = kernel_tex_fetch(__objects, offset + 9);
-	motion.post.z = kernel_tex_fetch(__objects, offset + 10);
-	motion.post.w = kernel_tex_fetch(__objects, offset + 11);
+	const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset;
+	const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset);
+	const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1;
 
 	Transform tfm;
-	transform_motion_interpolate(&tfm, &motion, time);
+	transform_motion_array_interpolate(&tfm, motion, num_steps, time);
 
 	return tfm;
 }
@@ -237,9 +204,7 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd
 
 ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
 {
-	int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return f.x;
+	return kernel_tex_fetch(__objects, object).surface_area;
 }
 
 /* Pass ID number of object */
@@ -249,9 +214,7 @@ ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return 0.0f;
 
-	int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return f.y;
+	return kernel_tex_fetch(__objects, object).pass_id;
 }
 
 /* Per lamp random number for shader variation */
@@ -261,8 +224,7 @@ ccl_device_inline float lamp_random_number(KernelGlobals *kg, int lamp)
 	if(lamp == LAMP_NONE)
 		return 0.0f;
 
-	float4 f = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 4);
-	return f.y;
+	return kernel_tex_fetch(__lights, lamp).random;
 }
 
 /* Per object random number for shader variation */
@@ -272,9 +234,7 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return 0.0f;
 
-	int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return f.z;
+	return kernel_tex_fetch(__objects, object).random_number;
 }
 
 /* Particle ID from which this object was generated */
@@ -284,9 +244,7 @@ ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return 0;
 
-	int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return __float_as_uint(f.w);
+	return kernel_tex_fetch(__objects, object).particle_index;
 }
 
 /* Generated texture coordinate on surface from where object was instanced */
@@ -296,9 +254,10 @@ ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
-	int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return make_float3(f.x, f.y, f.z);
+	const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+	return make_float3(kobject->dupli_generated[0],
+	                   kobject->dupli_generated[1],
+	                   kobject->dupli_generated[2]);
 }
 
 /* UV texture coordinate on surface from where object was instanced */
@@ -308,27 +267,24 @@ ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return make_float3(0.0f, 0.0f, 0.0f);
 
-	int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
-	float4 f = kernel_tex_fetch(__objects, offset + 1);
-	return make_float3(f.x, f.y, 0.0f);
+	const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+	return make_float3(kobject->dupli_uv[0],
+	                   kobject->dupli_uv[1],
+	                   0.0f);
 }
 
 /* Information about mesh for motion blurred triangles and curves */
 
 ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
 {
-	int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
-
 	if(numkeys) {
-		float4 f = kernel_tex_fetch(__objects, offset);
-		*numkeys = __float_as_int(f.w);
+		*numkeys = kernel_tex_fetch(__objects, object).numkeys;
 	}
 
-	float4 f = kernel_tex_fetch(__objects, offset + 1);
 	if(numsteps)
-		*numsteps = __float_as_int(f.z);
+		*numsteps = kernel_tex_fetch(__objects, object).numsteps;
 	if(numverts)
-		*numverts = __float_as_int(f.w);
+		*numverts = kernel_tex_fetch(__objects, object).numverts;
 }
 
 /* Offset to an objects patch map */
@@ -338,76 +294,56 @@ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
 	if(object == OBJECT_NONE)
 		return 0;
 
-	int offset = object*OBJECT_SIZE + 15;
-	float4 f = kernel_tex_fetch(__objects, offset);
-	return __float_as_uint(f.x);
+	return kernel_tex_fetch(__objects, object).patch_map_offset;
 }
 
 /* Pass ID for shader */
 
 ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
 {
-	return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE + 1);
+	return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id;
 }
 
 /* Particle data from which object was instanced */
 
 ccl_device_inline uint particle_index(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f = kernel_tex_fetch(__particles, offset + 0);
-	return __float_as_uint(f.x);
+	return kernel_tex_fetch(__particles, particle).index;
 }
 
 ccl_device float particle_age(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f = kernel_tex_fetch(__particles, offset + 0);
-	return f.y;
+	return kernel_tex_fetch(__particles, particle).age;
 }
 
 ccl_device float particle_lifetime(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f = kernel_tex_fetch(__particles, offset + 0);
-	return f.z;
+	return kernel_tex_fetch(__particles, particle).lifetime;
 }
 
 ccl_device float particle_size(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f = kernel_tex_fetch(__particles, offset + 0);
-	return f.w;
+	return kernel_tex_fetch(__particles, particle).size;
 }
 
 ccl_device float4 particle_rotation(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f = kernel_tex_fetch(__particles, offset + 1);
-	return f;
+	return kernel_tex_fetch(__particles, particle).rotation;
 }
 
 ccl_device float3 particle_location(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f = kernel_tex_fetch(__particles, offset + 2);
-	return make_float3(f.x, f.y, f.z);
+	return float4_to_float3(kernel_tex_fetch(__particles, particle).location);
 }
 
 ccl_device float3 particle_velocity(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f2 = kernel_tex_fetch(__particles, offset + 2);
-	float4 f3 = kernel_tex_fetch(__particles, offset + 3);
-	return make_float3(f2.w, f3.x, f3.y);
+	return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity);
 }
 
 ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
 {
-	int offset = particle*PARTICLE_SIZE;
-	float4 f3 = kernel_tex_fetch(__particles, offset + 3);
-	float4 f4 = kernel_tex_fetch(__particles, offset + 4);
-	return make_float3(f3.z, f3.w, f4.x);
+	return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity);
 }
 
 /* Object intersection in BVH */
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 60a1e483b84..c159be92885 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -193,10 +193,10 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
 	 * transformation was set match the world/object space of motion_pre/post */
 	Transform tfm;
 	
-	tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_PRE);
+	tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE);
 	motion_pre = transform_point(&tfm, motion_pre);
 
-	tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST);
+	tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST);
 	motion_post = transform_point(&tfm, motion_post);
 
 	float3 motion_center;
@@ -204,14 +204,14 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
 	/* camera motion, for perspective/orthographic motion.pre/post will be a
 	 * world-to-raster matrix, for panorama it's world-to-camera */
 	if(kernel_data.cam.type != CAMERA_PANORAMA) {
-		tfm = kernel_data.cam.worldtoraster;
-		motion_center = transform_perspective(&tfm, center);
+		ProjectionTransform projection = kernel_data.cam.worldtoraster;
+		motion_center = transform_perspective(&projection, center);
 
-		tfm = kernel_data.cam.motion.pre;
-		motion_pre = transform_perspective(&tfm, motion_pre);
+		projection = kernel_data.cam.perspective_pre;
+		motion_pre = transform_perspective(&projection, motion_pre);
 
-		tfm = kernel_data.cam.motion.post;
-		motion_post = transform_perspective(&tfm, motion_post);
+		projection = kernel_data.cam.perspective_post;
+		motion_post = transform_perspective(&projection, motion_post);
 	}
 	else {
 		tfm = kernel_data.cam.worldtocamera;
@@ -220,13 +220,13 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
 		motion_center.x *= kernel_data.cam.width;
 		motion_center.y *= kernel_data.cam.height;
 
-		tfm = kernel_data.cam.motion.pre;
+		tfm = kernel_data.cam.motion_pass_pre;
 		motion_pre = normalize(transform_point(&tfm, motion_pre));
 		motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre));
 		motion_pre.x *= kernel_data.cam.width;
 		motion_pre.y *= kernel_data.cam.height;
 
-		tfm = kernel_data.cam.motion.post;
+		tfm = kernel_data.cam.motion_pass_post;
 		motion_post = normalize(transform_point(&tfm, motion_post));
 		motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post));
 		motion_post.x *= kernel_data.cam.width;
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 286d898992e..a4e47384b25 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -68,7 +68,7 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s
 	if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
 	if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
 
-	if(r.w != 0.0f && r.w != 1.0f) {
+	if(r.w > 1e-8f && r.w != 1.0f) {
 		/* For RGBA colors, unpremultiply after interpolation. */
 		return float4_to_float3(r) / r.w;
 	}
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 96cdb04d955..b73ad47dad3 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -42,7 +42,7 @@ ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u
 ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
 {
 	/* create ray form raster position */
-	Transform rastertocamera = kernel_data.cam.rastertocamera;
+	ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
 	float3 raster = make_float3(raster_x, raster_y, 0.0f);
 	float3 Pcamera = transform_perspective(&rastertocamera, raster);
 
@@ -54,13 +54,13 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
 		 * interpolated field of view.
 		 */
 		if(ray->time < 0.5f) {
-			Transform rastertocamera_pre = kernel_data.cam.perspective_motion.pre;
+			ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre;
 			float3 Pcamera_pre =
 			        transform_perspective(&rastertocamera_pre, raster);
 			Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f);
 		}
 		else {
-			Transform rastertocamera_post = kernel_data.cam.perspective_motion.post;
+			ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post;
 			float3 Pcamera_post =
 			        transform_perspective(&rastertocamera_post, raster);
 			Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f);
@@ -91,17 +91,12 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
 	Transform cameratoworld = kernel_data.cam.cameratoworld;
 
 #ifdef __CAMERA_MOTION__
-	if(kernel_data.cam.have_motion) {
-#  ifdef __KERNEL_OPENCL__
-		const MotionTransform tfm = kernel_data.cam.motion;
-		transform_motion_interpolate(&cameratoworld,
-									 &tfm,
-		                             ray->time);
-#  else
-		transform_motion_interpolate(&cameratoworld,
-		                             &kernel_data.cam.motion,
-		                             ray->time);
-#  endif
+	if(kernel_data.cam.num_motion_steps) {
+		transform_motion_array_interpolate(
+			&cameratoworld,
+			kernel_tex_array(__camera_motion),
+			kernel_data.cam.num_motion_steps,
+			ray->time);
 	}
 #endif
 
@@ -175,7 +170,7 @@ ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, flo
 ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
 {
 	/* create ray form raster position */
-	Transform rastertocamera = kernel_data.cam.rastertocamera;
+	ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera;
 	float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
 
 	float3 P;
@@ -203,17 +198,12 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
 	Transform cameratoworld = kernel_data.cam.cameratoworld;
 
 #ifdef __CAMERA_MOTION__
-	if(kernel_data.cam.have_motion) {
-#  ifdef __KERNEL_OPENCL__
-		const MotionTransform tfm = kernel_data.cam.motion;
-		transform_motion_interpolate(&cameratoworld,
-		                             &tfm,
-		                             ray->time);
-#  else
-		transform_motion_interpolate(&cameratoworld,
-		                             &kernel_data.cam.motion,
-		                             ray->time);
-#  endif
+	if(kernel_data.cam.num_motion_steps) {
+		transform_motion_array_interpolate(
+			&cameratoworld,
+			kernel_tex_array(__camera_motion),
+			kernel_data.cam.num_motion_steps,
+			ray->time);
 	}
 #endif
 
@@ -239,11 +229,12 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
 /* Panorama Camera */
 
 ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
+                                              const ccl_global DecomposedTransform *cam_motion,
                                               float raster_x, float raster_y,
                                               float lens_u, float lens_v,
                                               ccl_addr_space Ray *ray)
 {
-	Transform rastertocamera = cam->rastertocamera;
+	ProjectionTransform rastertocamera = cam->rastertocamera;
 	float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
 
 	/* create ray form raster position */
@@ -281,17 +272,12 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
 	Transform cameratoworld = cam->cameratoworld;
 
 #ifdef __CAMERA_MOTION__
-	if(cam->have_motion) {
-#  ifdef __KERNEL_OPENCL__
-		const MotionTransform tfm = cam->motion;
-		transform_motion_interpolate(&cameratoworld,
-		                             &tfm,
-		                             ray->time);
-#  else
-		transform_motion_interpolate(&cameratoworld,
-		                             &cam->motion,
-		                             ray->time);
-#  endif
+	if(cam->num_motion_steps) {
+		transform_motion_array_interpolate(
+			&cameratoworld,
+			cam_motion,
+			cam->num_motion_steps,
+			ray->time);
 	}
 #endif
 
@@ -410,12 +396,16 @@ ccl_device_inline void camera_sample(KernelGlobals *kg,
 #endif
 
 	/* sample */
-	if(kernel_data.cam.type == CAMERA_PERSPECTIVE)
+	if(kernel_data.cam.type == CAMERA_PERSPECTIVE) {
 		camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray);
-	else if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+	}
+	else if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
 		camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
-	else
-		camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray);
+	}
+	else {
+		const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
+		camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
+	}
 }
 
 /* Utilities */
@@ -460,7 +450,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd,
 		if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
 			P += camera_position(kg);
 
-		Transform tfm = kernel_data.cam.worldtondc;
+		ProjectionTransform tfm = kernel_data.cam.worldtondc;
 		return transform_perspective(&tfm, P);
 	}
 	else {
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 61cd90e9d2a..d26b668cb11 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -118,6 +118,7 @@ template<typename T> struct texture  {
 #define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
 #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
 #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
+#define kernel_tex_array(tex) (kg->tex.data)
 
 #define kernel_data (kg->__data)
 
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 9bd7a572f5f..ac63bcf7ac9 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -137,6 +137,7 @@ ccl_device_inline uint ccl_num_groups(uint d)
 
 /* Use arrays for regular data. */
 #define kernel_tex_fetch(t, index) t[(index)]
+#define kernel_tex_array(t) (t)
 
 #define kernel_data __data
 
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index b02e3bc576d..671c47e2225 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -144,7 +144,8 @@
 
 /* data lookup defines */
 #define kernel_data (*kg->data)
-#define kernel_tex_fetch(tex, index) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))[(index)]
+#define kernel_tex_array(tex) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data))
+#define kernel_tex_fetch(tex, index) kernel_tex_array(tex)[(index)]
 
 /* define NULL */
 #define NULL 0
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 5875249b404..a5556c3be8f 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -29,7 +29,7 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 	/* setup shading at emitter */
 	float3 eval;
 
-	int shader_flag = kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE);
+	int shader_flag = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).flags;
 
 #ifdef __BACKGROUND_MIS__
 	if(ls->type == LIGHT_BACKGROUND) {
@@ -51,9 +51,9 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
 #endif
 	if(shader_flag & SD_HAS_CONSTANT_EMISSION)
 	{
-		eval.x = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 2));
-		eval.y = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 3));
-		eval.z = __int_as_float(kernel_tex_fetch(__shader_flag, (ls->shader & SHADER_MASK)*SHADER_SIZE + 4));
+		eval.x = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[0];
+		eval.y = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[1];
+		eval.z = kernel_tex_fetch(__shaders, (ls->shader & SHADER_MASK)).constant_emission[2];
 		if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
 			ls->Ng = -ls->Ng;
 		}
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index dfa3150dc92..efab69ee37d 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -255,11 +255,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals
                                                                    float3 *lightpos,
                                                                    float3 *dir)
 {
-	float4 data0 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 0);
-	float4 data3 = kernel_tex_fetch(__light_data, (index + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 3);
+	int portal = kernel_data.integrator.portal_offset + index;
+	const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
 
-	*lightpos = make_float3(data0.y, data0.z, data0.w);
-	*dir = make_float3(data3.y, data3.z, data3.w);
+	*lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+	*dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
 
 	/* Check whether portal is on the right side. */
 	if(dot(*dir, P - *lightpos) > 1e-4f)
@@ -291,11 +291,10 @@ ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
 		}
 		num_possible++;
 
-		float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1);
-		float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2);
-
-		float3 axisu = make_float3(data1.y, data1.z, data1.w);
-		float3 axisv = make_float3(data2.y, data2.z, data2.w);
+		int portal = kernel_data.integrator.portal_offset + p;
+		const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+		float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+		float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
 
 		if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL))
 			continue;
@@ -346,10 +345,10 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg,
 
 		if(portal == 0) {
 			/* p is the portal to be sampled. */
-			float4 data1 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 1);
-			float4 data2 = kernel_tex_fetch(__light_data, (p + kernel_data.integrator.portal_offset)*LIGHT_SIZE + 2);
-			float3 axisu = make_float3(data1.y, data1.z, data1.w);
-			float3 axisv = make_float3(data2.y, data2.z, data2.w);
+			int portal = kernel_data.integrator.portal_offset + p;
+			const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
+			float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
+			float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
 
 			*pdf = area_light_sample(P, &lightpos,
 			                         axisu, axisv,
@@ -479,14 +478,10 @@ ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, flo
 	return disk_light_sample(normalize(P - center), randu, randv)*radius;
 }
 
-ccl_device float spot_light_attenuation(float4 data1, float4 data2, LightSample *ls)
+ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls)
 {
-	float3 dir = make_float3(data2.y, data2.z, data2.w);
 	float3 I = ls->Ng;
 
-	float spot_angle = data1.w;
-	float spot_smooth = data2.x;
-
 	float attenuation = dot(dir, I);
 
 	if(attenuation <= spot_angle) {
@@ -518,12 +513,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
                                          float3 P,
                                          LightSample *ls)
 {
-	float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
-	float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
-
-	LightType type = (LightType)__float_as_int(data0.x);
+	const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+	LightType type = (LightType)klight->type;
 	ls->type = type;
-	ls->shader = __float_as_int(data1.x);
+	ls->shader = klight->shader_id;
 	ls->object = PRIM_NONE;
 	ls->prim = PRIM_NONE;
 	ls->lamp = lamp;
@@ -532,10 +525,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
 
 	if(type == LIGHT_DISTANT) {
 		/* distant light */
-		float3 lightD = make_float3(data0.y, data0.z, data0.w);
+		float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
 		float3 D = lightD;
-		float radius = data1.y;
-		float invarea = data1.w;
+		float radius = klight->distant.radius;
+		float invarea = klight->distant.invarea;
 
 		if(radius > 0.0f)
 			D = distant_light_sample(D, radius, randu, randv);
@@ -562,10 +555,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
 	}
 #endif
 	else {
-		ls->P = make_float3(data0.y, data0.z, data0.w);
+		ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
 
 		if(type == LIGHT_POINT || type == LIGHT_SPOT) {
-			float radius = data1.y;
+			float radius = klight->spot.radius;
 
 			if(radius > 0.0f)
 				/* sphere light */
@@ -574,14 +567,19 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
 			ls->D = normalize_len(ls->P - P, &ls->t);
 			ls->Ng = -ls->D;
 
-			float invarea = data1.z;
+			float invarea = klight->spot.invarea;
 			ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
 			ls->pdf = invarea;
 
 			if(type == LIGHT_SPOT) {
 				/* spot light attenuation */
-				float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2);
-				ls->eval_fac *= spot_light_attenuation(data1, data2, ls);
+				float3 dir = make_float3(klight->spot.dir[0],
+                                         klight->spot.dir[1],
+				                         klight->spot.dir[2]);
+				ls->eval_fac *= spot_light_attenuation(dir,
+				                                       klight->spot.spot_angle,
+				                                       klight->spot.spot_smooth,
+				                                       ls);
 				if(ls->eval_fac == 0.0f) {
 					return false;
 				}
@@ -594,12 +592,15 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
 		}
 		else {
 			/* area light */
-			float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2);
-			float4 data3 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 3);
-
-			float3 axisu = make_float3(data1.y, data1.z, data1.w);
-			float3 axisv = make_float3(data2.y, data2.z, data2.w);
-			float3 D = make_float3(data3.y, data3.z, data3.w);
+			float3 axisu = make_float3(klight->area.axisu[0],
+			                           klight->area.axisu[1],
+			                           klight->area.axisu[2]);
+			float3 axisv = make_float3(klight->area.axisv[0],
+			                           klight->area.axisv[1],
+			                           klight->area.axisv[2]);
+			float3 D = make_float3(klight->area.dir[0],
+			                       klight->area.dir[1],
+			                       klight->area.dir[2]);
 
 			if(dot(ls->P - P, D) > 0.0f) {
 				return false;
@@ -618,7 +619,7 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
 			ls->Ng = D;
 			ls->D = normalize_len(ls->P - P, &ls->t);
 
-			float invarea = data2.x;
+			float invarea = klight->area.invarea;
 			ls->eval_fac = 0.25f*invarea;
 		}
 	}
@@ -630,12 +631,10 @@ ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
 
 ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
 {
-	float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
-	float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
-
-	LightType type = (LightType)__float_as_int(data0.x);
+	const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
+	LightType type = (LightType)klight->type;
 	ls->type = type;
-	ls->shader = __float_as_int(data1.x);
+	ls->shader = klight->shader_id;
 	ls->object = PRIM_NONE;
 	ls->prim = PRIM_NONE;
 	ls->lamp = lamp;
@@ -648,7 +647,7 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
 
 	if(type == LIGHT_DISTANT) {
 		/* distant light */
-		float radius = data1.y;
+		float radius = klight->distant.radius;
 
 		if(radius == 0.0f)
 			return false;
@@ -670,9 +669,9 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
 		 *             P
 		 */
 
-		float3 lightD = make_float3(data0.y, data0.z, data0.w);
+		float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
 		float costheta = dot(-lightD, D);
-		float cosangle = data1.z;
+		float cosangle = klight->distant.cosangle;
 
 		if(costheta < cosangle)
 			return false;
@@ -683,13 +682,14 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
 		ls->t = FLT_MAX;
 
 		/* compute pdf */
-		float invarea = data1.w;
+		float invarea = klight->distant.invarea;
 		ls->pdf = invarea/(costheta*costheta*costheta);
 		ls->eval_fac = ls->pdf;
 	}
 	else if(type == LIGHT_POINT || type == LIGHT_SPOT) {
-		float3 lightP = make_float3(data0.y, data0.z, data0.w);
-		float radius = data1.y;
+		float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
+
+		float radius = klight->spot.radius;
 
 		/* sphere light */
 		if(radius == 0.0f)
@@ -704,14 +704,19 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
 		ls->Ng = -D;
 		ls->D = D;
 
-		float invarea = data1.z;
+		float invarea = klight->spot.invarea;
 		ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
 		ls->pdf = invarea;
 
 		if(type == LIGHT_SPOT) {
 			/* spot light attenuation */
-			float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2);
-			ls->eval_fac *= spot_light_attenuation(data1, data2, ls);
+			float3 dir = make_float3(klight->spot.dir[0],
+			                         klight->spot.dir[1],
+			                         klight->spot.dir[2]);
+			ls->eval_fac *= spot_light_attenuation(dir,
+			                                       klight->spot.spot_angle,
+			                                       klight->spot.spot_smooth,
+			                                       ls);
 
 			if(ls->eval_fac == 0.0f)
 				return false;
@@ -726,22 +731,25 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
 	}
 	else if(type == LIGHT_AREA) {
 		/* area light */
-		float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2);
-		float4 data3 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 3);
-
-		float invarea = data2.x;
+		float invarea = klight->area.invarea;
 		if(invarea == 0.0f)
 			return false;
 
-		float3 axisu = make_float3(data1.y, data1.z, data1.w);
-		float3 axisv = make_float3(data2.y, data2.z, data2.w);
-		float3 Ng = make_float3(data3.y, data3.z, data3.w);
+		float3 axisu = make_float3(klight->area.axisu[0],
+		                           klight->area.axisu[1],
+		                           klight->area.axisu[2]);
+		float3 axisv = make_float3(klight->area.axisv[0],
+		                           klight->area.axisv[1],
+		                           klight->area.axisv[2]);
+		float3 Ng = make_float3(klight->area.dir[0],
+		                        klight->area.dir[1],
+		                        klight->area.dir[2]);
 
 		/* one sided */
 		if(dot(D, Ng) >= 0.0f)
 			return false;
 
-		float3 light_P = make_float3(data0.y, data0.z, data0.w);
+		float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
 
 		if(!ray_quad_intersect(P, D, 0.0f, t, light_P,
 		                       axisu, axisv, Ng,
@@ -784,7 +792,8 @@ ccl_device_inline bool triangle_world_space_vertices(KernelGlobals *kg, int obje
 #ifdef __INSTANCING__
 	if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #  ifdef __OBJECT_MOTION__
-		Transform tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
+		float object_time = (time >= 0.0f) ? time : 0.5f;
+		Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
 #  else
 		Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 #  endif
@@ -1040,7 +1049,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
 		int half_len = len >> 1;
 		int middle = first + half_len;
 
-		if(r < kernel_tex_fetch(__light_distribution, middle).x) {
+		if(r < kernel_tex_fetch(__light_distribution, middle).totarea) {
 			len = half_len;
 		}
 		else {
@@ -1055,8 +1064,8 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
 
 	/* Rescale to reuse random number. this helps the 2D samples within
 	 * each area light be stratified as well. */
-	float distr_min = kernel_tex_fetch(__light_distribution, index).x;
-	float distr_max = kernel_tex_fetch(__light_distribution, index+1).x;
+	float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
+	float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea;
 	*randu = (r - distr_min)/(distr_max - distr_min);
 
 	return index;
@@ -1066,8 +1075,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
 
 ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
 {
-	float4 data4 = kernel_tex_fetch(__light_data, index*LIGHT_SIZE + 4);
-	return (bounce > __float_as_int(data4.x));
+	return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
 }
 
 ccl_device_noinline bool light_sample(KernelGlobals *kg,
@@ -1082,12 +1090,12 @@ ccl_device_noinline bool light_sample(KernelGlobals *kg,
 	int index = light_distribution_sample(kg, &randu);
 
 	/* fetch light data */
-	float4 l = kernel_tex_fetch(__light_distribution, index);
-	int prim = __float_as_int(l.y);
+	const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index);
+	int prim = kdistribution->prim;
 
 	if(prim >= 0) {
-		int object = __float_as_int(l.w);
-		int shader_flag = __float_as_int(l.z);
+		int object = kdistribution->mesh_light.object_id;
+		int shader_flag = kdistribution->mesh_light.shader_flag;
 
 		triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
 		ls->shader |= shader_flag;
@@ -1106,8 +1114,7 @@ ccl_device_noinline bool light_sample(KernelGlobals *kg,
 
 ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
 {
-	float4 data3 = kernel_tex_fetch(__light_data, index*LIGHT_SIZE + 3);
-	return __float_as_int(data3.x);
+	return kernel_tex_fetch(__lights, index).samples;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h
index bd0e23b7705..96391db7649 100644
--- a/intern/cycles/kernel/kernel_math.h
+++ b/intern/cycles/kernel/kernel_math.h
@@ -21,6 +21,7 @@
 #include "util/util_math.h"
 #include "util/util_math_fast.h"
 #include "util/util_math_intersect.h"
+#include "util/util_projection.h"
 #include "util/util_texture.h"
 #include "util/util_transform.h"
 
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b1f66852b7f..fc8d06fc33d 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -114,7 +114,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
 
 	sd->I = -ray->D;
 
-	sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
+	sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 
 #ifdef __INSTANCING__
 	if(isect->object != OBJECT_NONE) {
@@ -199,7 +199,7 @@ void shader_setup_from_subsurface(
 		motion_triangle_shader_setup(kg, sd, isect, ray, true);
 	}
 
-	sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
+	sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 
 #  ifdef __INSTANCING__
 	if(isect->object != OBJECT_NONE) {
@@ -276,7 +276,7 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
 	sd->time = time;
 	sd->ray_length = t;
 
-	sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
+	sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 	sd->object_flag = 0;
 	if(sd->object != OBJECT_NONE) {
 		sd->object_flag |= kernel_tex_fetch(__object_flag,
@@ -386,7 +386,7 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
 	sd->Ng = -ray->D;
 	sd->I = -ray->D;
 	sd->shader = kernel_data.background.surface_shader;
-	sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
+	sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 	sd->object_flag = 0;
 	sd->time = ray->time;
 	sd->ray_length = 0.0f;
@@ -1181,7 +1181,7 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
 		sd->shader = stack[i].shader;
 
 		sd->flag &= ~SD_SHADER_FLAGS;
-		sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*SHADER_SIZE);
+		sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
 		sd->object_flag &= ~SD_OBJECT_FLAGS;
 
 		if(sd->object != OBJECT_NONE) {
@@ -1254,7 +1254,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
 		shader = __float_as_int(str.z);
 	}
 #endif
-	int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*SHADER_SIZE);
+	int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
 
 	return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
 }
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 74b659557e5..9047b93a0b2 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -31,8 +31,13 @@ KERNEL_TEX(uint, __object_node)
 KERNEL_TEX(float2, __prim_time)
 
 /* objects */
-KERNEL_TEX(float4, __objects)
-KERNEL_TEX(float4, __objects_vector)
+KERNEL_TEX(KernelObject, __objects)
+KERNEL_TEX(Transform, __object_motion_pass)
+KERNEL_TEX(DecomposedTransform, __object_motion)
+KERNEL_TEX(uint, __object_flag)
+
+/* cameras */
+KERNEL_TEX(DecomposedTransform, __camera_motion)
 
 /* triangles */
 KERNEL_TEX(uint, __tri_shader)
@@ -55,18 +60,17 @@ KERNEL_TEX(float4, __attributes_float3)
 KERNEL_TEX(uchar4, __attributes_uchar4)
 
 /* lights */
-KERNEL_TEX(float4, __light_distribution)
-KERNEL_TEX(float4, __light_data)
+KERNEL_TEX(KernelLightDistribution, __light_distribution)
+KERNEL_TEX(KernelLight, __lights)
 KERNEL_TEX(float2, __light_background_marginal_cdf)
 KERNEL_TEX(float2, __light_background_conditional_cdf)
 
 /* particles */
-KERNEL_TEX(float4, __particles)
+KERNEL_TEX(KernelParticle, __particles)
 
 /* shaders */
 KERNEL_TEX(uint4, __svm_nodes)
-KERNEL_TEX(uint, __shader_flag)
-KERNEL_TEX(uint, __object_flag)
+KERNEL_TEX(KernelShader, __shaders)
 
 /* lookup tables */
 KERNEL_TEX(float, __lookup_table)
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 2a437cdbdc6..977ceac12ea 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -35,14 +35,10 @@
 CCL_NAMESPACE_BEGIN
 
 /* Constants */
-#define OBJECT_SIZE 		16
-#define OBJECT_VECTOR_SIZE	6
-#define LIGHT_SIZE		11
-#define FILTER_TABLE_SIZE	1024
-#define RAMP_TABLE_SIZE		256
-#define SHUTTER_TABLE_SIZE		256
-#define PARTICLE_SIZE 		5
-#define SHADER_SIZE		5
+#define OBJECT_MOTION_PASS_SIZE 2
+#define FILTER_TABLE_SIZE       1024
+#define RAMP_TABLE_SIZE         256
+#define SHUTTER_TABLE_SIZE      256
 
 #define BSSRDF_MIN_RADIUS			1e-8f
 #define BSSRDF_MAX_HITS				4
@@ -925,7 +921,7 @@ enum ShaderDataFlag {
 	SD_HAS_BUMP               = (1 << 25),
 	/* Has true displacement. */
 	SD_HAS_DISPLACEMENT       = (1 << 26),
-	/* Has constant emission (value stored in __shader_flag) */
+	/* Has constant emission (value stored in __shaders) */
 	SD_HAS_CONSTANT_EMISSION  = (1 << 27),
 	/* Needs to access attributes */
 	SD_NEED_ATTRIBUTES        = (1 << 28),
@@ -1163,7 +1159,7 @@ typedef struct KernelCamera {
 
 	/* matrices */
 	Transform cameratoworld;
-	Transform rastertocamera;
+	ProjectionTransform rastertocamera;
 
 	/* differentials */
 	float4 dx;
@@ -1177,7 +1173,7 @@ typedef struct KernelCamera {
 
 	/* motion blur */
 	float shuttertime;
-	int have_motion, have_perspective_motion;
+	int num_motion_steps, have_perspective_motion;
 
 	/* clipping */
 	float nearclip;
@@ -1197,22 +1193,22 @@ typedef struct KernelCamera {
 	int is_inside_volume;
 
 	/* more matrices */
-	Transform screentoworld;
-	Transform rastertoworld;
-	/* work around cuda sm 2.0 crash, this seems to
-	 * cross some limit in combination with motion 
-	 * Transform ndctoworld; */
-	Transform worldtoscreen;
-	Transform worldtoraster;
-	Transform worldtondc;
+	ProjectionTransform screentoworld;
+	ProjectionTransform rastertoworld;
+	ProjectionTransform ndctoworld;
+	ProjectionTransform worldtoscreen;
+	ProjectionTransform worldtoraster;
+	ProjectionTransform worldtondc;
 	Transform worldtocamera;
 
-	MotionTransform motion;
+	/* Stores changes in the projeciton matrix. Use for camera zoom motion
+	 * blur and motion pass output for perspective camera. */
+	ProjectionTransform perspective_pre;
+	ProjectionTransform perspective_post;
 
-	/* Denotes changes in the projective matrix, namely in rastertocamera.
-	 * Used for camera zoom motion blur,
-	 */
-	PerspectiveMotionTransform perspective_motion;
+	/* Transforms for motion pass. */
+	Transform motion_pass_pre;
+	Transform motion_pass_post;
 
 	int shutter_table_offset;
 
@@ -1434,6 +1430,110 @@ typedef struct KernelData {
 } KernelData;
 static_assert_align(KernelData, 16);
 
+/* Kernel data structures. */
+
+typedef struct KernelObject {
+	Transform tfm;
+	Transform itfm;
+
+	float surface_area;
+	float pass_id;
+	float random_number;
+	int particle_index;
+
+	float dupli_generated[3];
+	float dupli_uv[2];
+
+	int numkeys;
+	int numsteps;
+	int numverts;
+
+	uint patch_map_offset;
+	uint attribute_map_offset;
+	uint motion_offset;
+	uint pad;
+} KernelObject;;
+static_assert_align(KernelObject, 16);
+
+typedef struct KernelSpotLight {
+	float radius;
+	float invarea;
+	float spot_angle;
+	float spot_smooth;
+	float dir[3];
+} KernelSpotLight;
+
+/* PointLight is SpotLight with only radius and invarea being used. */
+
+typedef struct KernelAreaLight {
+	float axisu[3];
+	float invarea;
+	float axisv[3];
+	float dir[3];
+} KernelAreaLight;
+
+typedef struct KernelDistantLight {
+	float radius;
+	float cosangle;
+	float invarea;
+} KernelDistantLight;
+
+typedef struct KernelLight {
+	int type;
+	float co[3];
+	int shader_id;
+	int samples;
+	float max_bounces;
+	float random;
+	Transform tfm;
+	Transform itfm;
+	union {
+		KernelSpotLight spot;
+		KernelAreaLight area;
+		KernelDistantLight distant;
+	};
+} KernelLight;
+static_assert_align(KernelLight, 16);
+
+typedef struct KernelLightDistribution {
+	float totarea;
+	int prim;
+	union {
+		struct {
+			int shader_flag;
+			int object_id;
+		} mesh_light;
+		struct {
+			float pad;
+			float size;
+		} lamp;
+	};
+} KernelLightDistribution;
+static_assert_align(KernelLightDistribution, 16);
+
+typedef struct KernelParticle {
+	int index;
+	float age;
+	float lifetime;
+	float size;
+	float4 rotation;
+	/* Only xyz are used of the following. float4 instead of float3 are used
+	 * to ensure consistent padding/alignment across devices. */
+	float4 location;
+	float4 velocity;
+	float4 angular_velocity;
+} KernelParticle;
+static_assert_align(KernelParticle, 16);
+
+typedef struct KernelShader {
+	float constant_emission[3];
+	float pad1;
+	int flags;
+	int pass_id;
+	int pad2, pad3;
+} KernelShader;
+static_assert_align(KernelShader, 16);
+
 /* Declarations required for split kernel */
 
 /* Macro for queues */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 058e7dccafd..88360e5f1ae 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -104,7 +104,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
 ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
 {
 	for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-		int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
+		int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
 
 		if(shader_flag & SD_HETEROGENEOUS_VOLUME) {
 			return true;
@@ -134,7 +134,7 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
 	int method = -1;
 
 	for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
-		int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
+		int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags;
 
 		if(shader_flag & SD_VOLUME_MIS) {
 			return SD_VOLUME_MIS;
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index ae4c521659c..0c5e5e30e47 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -62,11 +62,17 @@ CCL_NAMESPACE_BEGIN
 
 /* RenderServices implementation */
 
-#define COPY_MATRIX44(m1, m2)  { \
-	CHECK_TYPE(m1, OSL::Matrix44*); \
-	CHECK_TYPE(m2, Transform*); \
-	memcpy(m1, m2, sizeof(*m2)); \
-} (void)0
+static void copy_matrix(OSL::Matrix44& m, const Transform& tfm)
+{
+	ProjectionTransform t = projection_transpose(ProjectionTransform(tfm));
+	memcpy(&m, &t, sizeof(m));
+}
+
+static void copy_matrix(OSL::Matrix44& m, const ProjectionTransform& tfm)
+{
+	ProjectionTransform t = projection_transpose(tfm);
+	memcpy(&m, &t, sizeof(m));
+}
 
 /* static ustrings */
 ustring OSLRenderServices::u_distance("distance");
@@ -167,14 +173,12 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
 #else
 			Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 #endif
-			tfm = transform_transpose(tfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, tfm);
 
 			return true;
 		}
 		else if(sd->type == PRIMITIVE_LAMP) {
-			Transform tfm = transform_transpose(sd->ob_tfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, sd->ob_tfm);
 
 			return true;
 		}
@@ -203,14 +207,12 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
 #else
 			Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 #endif
-			itfm = transform_transpose(itfm);
-			COPY_MATRIX44(&result, &itfm);
+			copy_matrix(result, itfm);
 
 			return true;
 		}
 		else if(sd->type == PRIMITIVE_LAMP) {
-			Transform tfm = transform_transpose(sd->ob_itfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, sd->ob_itfm);
 
 			return true;
 		}
@@ -224,23 +226,19 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
 	KernelGlobals *kg = kernel_globals;
 
 	if(from == u_ndc) {
-		Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc));
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.ndctoworld);
 		return true;
 	}
 	else if(from == u_raster) {
-		Transform tfm = transform_transpose(kernel_data.cam.rastertoworld);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.rastertoworld);
 		return true;
 	}
 	else if(from == u_screen) {
-		Transform tfm = transform_transpose(kernel_data.cam.screentoworld);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.screentoworld);
 		return true;
 	}
 	else if(from == u_camera) {
-		Transform tfm = transform_transpose(kernel_data.cam.cameratoworld);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.cameratoworld);
 		return true;
 	}
 	else if(from == u_world) {
@@ -256,23 +254,19 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
 	KernelGlobals *kg = kernel_globals;
 
 	if(to == u_ndc) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtondc);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtondc);
 		return true;
 	}
 	else if(to == u_raster) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtoraster);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtoraster);
 		return true;
 	}
 	else if(to == u_screen) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtoscreen);
 		return true;
 	}
 	else if(to == u_camera) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtocamera);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtocamera);
 		return true;
 	}
 	else if(to == u_world) {
@@ -298,14 +292,12 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
 			KernelGlobals *kg = sd->osl_globals;
 			Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
 #endif
-			tfm = transform_transpose(tfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, tfm);
 
 			return true;
 		}
 		else if(sd->type == PRIMITIVE_LAMP) {
-			Transform tfm = transform_transpose(sd->ob_tfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, sd->ob_tfm);
 
 			return true;
 		}
@@ -329,14 +321,12 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
 			KernelGlobals *kg = sd->osl_globals;
 			Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 #endif
-			tfm = transform_transpose(tfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, tfm);
 
 			return true;
 		}
 		else if(sd->type == PRIMITIVE_LAMP) {
-			Transform tfm = transform_transpose(sd->ob_itfm);
-			COPY_MATRIX44(&result, &tfm);
+			copy_matrix(result, sd->ob_itfm);
 
 			return true;
 		}
@@ -350,23 +340,19 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result
 	KernelGlobals *kg = kernel_globals;
 
 	if(from == u_ndc) {
-		Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc));
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.ndctoworld);
 		return true;
 	}
 	else if(from == u_raster) {
-		Transform tfm = transform_transpose(kernel_data.cam.rastertoworld);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.rastertoworld);
 		return true;
 	}
 	else if(from == u_screen) {
-		Transform tfm = transform_transpose(kernel_data.cam.screentoworld);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.screentoworld);
 		return true;
 	}
 	else if(from == u_camera) {
-		Transform tfm = transform_transpose(kernel_data.cam.cameratoworld);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.cameratoworld);
 		return true;
 	}
 
@@ -378,23 +364,19 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44
 	KernelGlobals *kg = kernel_globals;
 	
 	if(to == u_ndc) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtondc);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtondc);
 		return true;
 	}
 	else if(to == u_raster) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtoraster);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtoraster);
 		return true;
 	}
 	else if(to == u_screen) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtoscreen);
 		return true;
 	}
 	else if(to == u_camera) {
-		Transform tfm = transform_transpose(kernel_data.cam.worldtocamera);
-		COPY_MATRIX44(&result, &tfm);
+		copy_matrix(result, kernel_data.cam.worldtocamera);
 		return true;
 	}
 	
@@ -570,8 +552,7 @@ static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives,
 static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
 {
 	if(type == TypeDesc::TypeMatrix) {
-		Transform transpose = transform_transpose(tfm);
-		memcpy(val, &transpose, sizeof(Transform));
+		copy_matrix(*(OSL::Matrix44*)val, tfm);
 		return true;
 	}
 
diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h
index 0a890545af4..42a7ae9946f 100644
--- a/intern/cycles/kernel/svm/svm_mapping.h
+++ b/intern/cycles/kernel/svm/svm_mapping.h
@@ -26,7 +26,6 @@ ccl_device void svm_node_mapping(KernelGlobals *kg, ShaderData *sd, float *stack
 	tfm.x = read_node_float(kg, offset);
 	tfm.y = read_node_float(kg, offset);
 	tfm.z = read_node_float(kg, offset);
-	tfm.w = read_node_float(kg, offset);
 
 	float3 r = transform_point(&tfm, v);
 	stack_store_float3(stack, out_offset, r);
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index c94327401f5..6ff39e5f587 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -42,7 +42,6 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
 				tfm.x = read_node_float(kg, offset);
 				tfm.y = read_node_float(kg, offset);
 				tfm.z = read_node_float(kg, offset);
-				tfm.w = read_node_float(kg, offset);
 				data = transform_point(&tfm, data);
 			}
 			break;
@@ -123,7 +122,6 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
 				tfm.x = read_node_float(kg, offset);
 				tfm.y = read_node_float(kg, offset);
 				tfm.z = read_node_float(kg, offset);
-				tfm.w = read_node_float(kg, offset);
 				data = transform_point(&tfm, data);
 			}
 			break;
@@ -207,7 +205,6 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
 				tfm.x = read_node_float(kg, offset);
 				tfm.y = read_node_float(kg, offset);
 				tfm.z = read_node_float(kg, offset);
-				tfm.w = read_node_float(kg, offset);
 				data = transform_point(&tfm, data);
 			}
 			break;
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index d967516a5c9..43b433683e0 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -39,7 +39,6 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
 		tfm.x = read_node_float(kg, offset);
 		tfm.y = read_node_float(kg, offset);
 		tfm.z = read_node_float(kg, offset);
-		tfm.w = read_node_float(kg, offset);
 		co = transform_point(&tfm, co);
 	}
 
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index a2fda12ec85..38936ffc094 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -82,6 +82,7 @@ NODE_DEFINE(Camera)
 	SOCKET_FLOAT(bladesrotation, "Blades Rotation", 0.0f);
 
 	SOCKET_TRANSFORM(matrix, "Matrix", transform_identity());
+	SOCKET_TRANSFORM_ARRAY(motion, "Motion", array<Transform>());
 
 	SOCKET_FLOAT(aperture_ratio, "Aperture Ratio", 1.0f);
 
@@ -151,9 +152,6 @@ Camera::Camera()
 	height = 512;
 	resolution = 1;
 
-	motion.pre = transform_identity();
-	motion.post = transform_identity();
-	use_motion = false;
 	use_perspective_motion = false;
 
 	shutter_curve.resize(RAMP_TABLE_SIZE);
@@ -163,12 +161,12 @@ Camera::Camera()
 
 	compute_auto_viewplane();
 
-	screentoworld = transform_identity();
-	rastertoworld = transform_identity();
-	ndctoworld = transform_identity();
-	rastertocamera = transform_identity();
+	screentoworld = projection_identity();
+	rastertoworld = projection_identity();
+	ndctoworld = projection_identity();
+	rastertocamera = projection_identity();
 	cameratoworld = transform_identity();
-	worldtoraster = transform_identity();
+	worldtoraster = projection_identity();
 
 	dx = make_float3(0.0f, 0.0f, 0.0f);
 	dy = make_float3(0.0f, 0.0f, 0.0f);
@@ -241,18 +239,18 @@ void Camera::update(Scene *scene)
 	Transform full_rastertoscreen = transform_inverse(full_screentoraster);
 
 	/* screen to camera */
-	Transform cameratoscreen;
+	ProjectionTransform cameratoscreen;
 	if(type == CAMERA_PERSPECTIVE)
-		cameratoscreen = transform_perspective(fov, nearclip, farclip);
+		cameratoscreen = projection_perspective(fov, nearclip, farclip);
 	else if(type == CAMERA_ORTHOGRAPHIC)
-		cameratoscreen = transform_orthographic(nearclip, farclip);
+		cameratoscreen = projection_orthographic(nearclip, farclip);
 	else
-		cameratoscreen = transform_identity();
+		cameratoscreen = projection_identity();
 	
-	Transform screentocamera = transform_inverse(cameratoscreen);
+	ProjectionTransform screentocamera = projection_inverse(cameratoscreen);
 
 	rastertocamera = screentocamera * rastertoscreen;
-	Transform full_rastertocamera = screentocamera * full_rastertoscreen;
+	ProjectionTransform full_rastertocamera = screentocamera * full_rastertoscreen;
 	cameratoraster = screentoraster * cameratoscreen;
 
 	cameratoworld = matrix;
@@ -270,10 +268,10 @@ void Camera::update(Scene *scene)
 
 	/* differentials */
 	if(type == CAMERA_ORTHOGRAPHIC) {
-		dx = transform_direction(&rastertocamera, make_float3(1, 0, 0));
-		dy = transform_direction(&rastertocamera, make_float3(0, 1, 0));
-		full_dx = transform_direction(&full_rastertocamera, make_float3(1, 0, 0));
-		full_dy = transform_direction(&full_rastertocamera, make_float3(0, 1, 0));
+		dx = transform_perspective_direction(&rastertocamera, make_float3(1, 0, 0));
+		dy = transform_perspective_direction(&rastertocamera, make_float3(0, 1, 0));
+		full_dx = transform_perspective_direction(&full_rastertocamera, make_float3(1, 0, 0));
+		full_dy = transform_perspective_direction(&full_rastertocamera, make_float3(0, 1, 0));
 	}
 	else if(type == CAMERA_PERSPECTIVE) {
 		dx = transform_perspective(&rastertocamera, make_float3(1, 0, 0)) -
@@ -302,23 +300,6 @@ void Camera::update(Scene *scene)
 		frustum_top_normal = normalize(make_float3(0.0f, v.z, -v.y));
 	}
 
-	/* TODO(sergey): Support other types of camera. */
-	if(type == CAMERA_PERSPECTIVE) {
-		/* TODO(sergey): Move to an utility function and de-duplicate with
-		 * calculation above.
-		 */
-		Transform screentocamera_pre =
-		        transform_inverse(transform_perspective(fov_pre,
-		                                                nearclip,
-		                                                farclip));
-		Transform screentocamera_post =
-		        transform_inverse(transform_perspective(fov_post,
-		                                                nearclip,
-		                                                farclip));
-		perspective_motion.pre = screentocamera_pre * rastertoscreen;
-		perspective_motion.post = screentocamera_post * rastertoscreen;
-	}
-
 	/* Compute kernel camera data. */
 	KernelCamera *kcam = &kernel_camera;
 
@@ -331,41 +312,65 @@ void Camera::update(Scene *scene)
 	kcam->worldtoscreen = worldtoscreen;
 	kcam->worldtoraster = worldtoraster;
 	kcam->worldtondc = worldtondc;
+	kcam->ndctoworld = ndctoworld;
 
 	/* camera motion */
-	kcam->have_motion = 0;
+	kcam->num_motion_steps = 0;
 	kcam->have_perspective_motion = 0;
+	kernel_camera_motion.clear();
+
+	/* Test if any of the transforms are actually different. */
+	bool have_motion = false;
+	for(size_t i = 0; i < motion.size(); i++) {
+		have_motion = have_motion || motion[i] != matrix;
+	}
 
 	if(need_motion == Scene::MOTION_PASS) {
 		/* TODO(sergey): Support perspective (zoom, fov) motion. */
 		if(type == CAMERA_PANORAMA) {
-			if(use_motion) {
-				kcam->motion.pre = transform_inverse(motion.pre);
-				kcam->motion.post = transform_inverse(motion.post);
+			if(have_motion) {
+				kcam->motion_pass_pre = transform_inverse(motion[0]);
+				kcam->motion_pass_post = transform_inverse(motion[motion.size()-1]);
 			}
 			else {
-				kcam->motion.pre = kcam->worldtocamera;
-				kcam->motion.post = kcam->worldtocamera;
+				kcam->motion_pass_pre = kcam->worldtocamera;
+				kcam->motion_pass_post = kcam->worldtocamera;
 			}
 		}
 		else {
-			if(use_motion) {
-				kcam->motion.pre = cameratoraster * transform_inverse(motion.pre);
-				kcam->motion.post = cameratoraster * transform_inverse(motion.post);
+			if(have_motion) {
+				kcam->perspective_pre = cameratoraster * transform_inverse(motion[0]);
+				kcam->perspective_post = cameratoraster * transform_inverse(motion[motion.size()-1]);
 			}
 			else {
-				kcam->motion.pre = worldtoraster;
-				kcam->motion.post = worldtoraster;
+				kcam->perspective_pre = worldtoraster;
+				kcam->perspective_post = worldtoraster;
 			}
 		}
 	}
 	else if(need_motion == Scene::MOTION_BLUR) {
-		if(use_motion) {
-			transform_motion_decompose(&kcam->motion, &motion, &matrix);
-			kcam->have_motion = 1;
+		if(have_motion) {
+			kernel_camera_motion.resize(motion.size());
+			transform_motion_decompose(kernel_camera_motion.data(), motion.data(), motion.size());
+			kcam->num_motion_steps = motion.size();
 		}
-		if(use_perspective_motion) {
-			kcam->perspective_motion = perspective_motion;
+
+		/* TODO(sergey): Support other types of camera. */
+		if(use_perspective_motion && type == CAMERA_PERSPECTIVE) {
+			/* TODO(sergey): Move to an utility function and de-duplicate with
+			 * calculation above.
+			 */
+			ProjectionTransform screentocamera_pre =
+					projection_inverse(projection_perspective(fov_pre,
+					                                          nearclip,
+					                                          farclip));
+			ProjectionTransform screentocamera_post =
+					projection_inverse(projection_perspective(fov_post,
+					                                          nearclip,
+					                                          farclip));
+
+			kcam->perspective_pre = screentocamera_pre * rastertoscreen;
+			kcam->perspective_post = screentocamera_post * rastertoscreen;
 			kcam->have_perspective_motion = 1;
 		}
 	}
@@ -470,6 +475,16 @@ void Camera::device_update(Device * /* device */,
 	}
 
 	dscene->data.cam = kernel_camera;
+
+	size_t num_motion_steps = kernel_camera_motion.size();
+	if(num_motion_steps) {
+		DecomposedTransform *camera_motion = dscene->camera_motion.alloc(num_motion_steps);
+		memcpy(camera_motion, kernel_camera_motion.data(), sizeof(*camera_motion) * num_motion_steps);
+		dscene->camera_motion.copy_to_device();
+	}
+	else {
+		dscene->camera_motion.free();
+	}
 }
 
 void Camera::device_update_volume(Device * /*device*/,
@@ -496,10 +511,11 @@ void Camera::device_update_volume(Device * /*device*/,
 }
 
 void Camera::device_free(Device * /*device*/,
-                         DeviceScene * /*dscene*/,
+                         DeviceScene *dscene,
                          Scene *scene)
 {
 	scene->lookup_tables->remove_table(&shutter_table_offset);
+	dscene->camera_motion.free();
 }
 
 bool Camera::modified(const Camera& cam)
@@ -510,7 +526,6 @@ bool Camera::modified(const Camera& cam)
 bool Camera::motion_modified(const Camera& cam)
 {
 	return !((motion == cam.motion) &&
-	         (use_motion == cam.use_motion) &&
 	         (use_perspective_motion == cam.use_perspective_motion));
 }
 
@@ -606,7 +621,7 @@ float Camera::world_to_raster_size(float3 P)
 		res = min(len(full_dx), len(full_dy));
 
 		if(offscreen_dicing_scale > 1.0f) {
-			float3 p = transform_perspective(&worldtocamera, P);
+			float3 p = transform_point(&worldtocamera, P);
 			float3 v = transform_perspective(&rastertocamera, make_float3(width, height, 0.0f));
 
 			/* Create point clamped to frustum */
@@ -707,17 +722,17 @@ float Camera::world_to_raster_size(float3 P)
 		 * may be a better way to do this, but calculating differentials from the
 		 * point directly ahead seems to produce good enough results. */
 #if 0
-		float2 dir = direction_to_panorama(&kernel_camera, normalize(D));
+		float2 dir = direction_to_panorama(&kernel_camera, kernel_camera_motion.data(), normalize(D));
 		float3 raster = transform_perspective(&cameratoraster, make_float3(dir.x, dir.y, 0.0f));
 
 		ray.t = 1.0f;
-		camera_sample_panorama(&kernel_camera, raster.x, raster.y, 0.0f, 0.0f, &ray);
+		camera_sample_panorama(&kernel_camera, kernel_camera_motion.data(), raster.x, raster.y, 0.0f, 0.0f, &ray);
 		if(ray.t == 0.0f) {
 			/* No differentials, just use from directly ahead. */
-			camera_sample_panorama(&kernel_camera, 0.5f*width, 0.5f*height, 0.0f, 0.0f, &ray);
+			camera_sample_panorama(&kernel_camera, kernel_camera_motion.data(), 0.5f*width, 0.5f*height, 0.0f, 0.0f, &ray);
 		}
 #else
-		camera_sample_panorama(&kernel_camera, 0.5f*width, 0.5f*height, 0.0f, 0.0f, &ray);
+		camera_sample_panorama(&kernel_camera, kernel_camera_motion.data(), 0.5f*width, 0.5f*height, 0.0f, 0.0f, &ray);
 #endif
 
 		differential_transfer(&ray.dP, ray.dP, ray.D, ray.dD, ray.D, dist);
@@ -729,4 +744,27 @@ float Camera::world_to_raster_size(float3 P)
 	return res;
 }
 
+bool Camera::use_motion() const
+{
+	return motion.size() > 1;
+}
+
+float Camera::motion_time(int step) const
+{
+	return (use_motion()) ? 2.0f * step / (motion.size() - 1) - 1.0f : 0.0f;
+}
+
+int Camera::motion_step(float time) const
+{
+	if(use_motion()) {
+		for(int step = 0; step < motion.size(); step++) {
+			if(time == motion_time(step)) {
+				return step;
+			}
+		}
+	}
+
+	return -1;
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h
index 4ec0fe3bc6e..37d05c01bd9 100644
--- a/intern/cycles/render/camera.h
+++ b/intern/cycles/render/camera.h
@@ -22,6 +22,7 @@
 #include "graph/node.h"
 
 #include "util/util_boundbox.h"
+#include "util/util_projection.h"
 #include "util/util_transform.h"
 #include "util/util_types.h"
 
@@ -140,24 +141,23 @@ public:
 	Transform matrix;
 
 	/* motion */
-	MotionTransform motion;
-	bool use_motion, use_perspective_motion;
+	array<Transform> motion;
+	bool use_perspective_motion;
 	float fov_pre, fov_post;
-	PerspectiveMotionTransform perspective_motion;
 
 	/* computed camera parameters */
-	Transform screentoworld;
-	Transform rastertoworld;
-	Transform ndctoworld;
+	ProjectionTransform screentoworld;
+	ProjectionTransform rastertoworld;
+	ProjectionTransform ndctoworld;
 	Transform cameratoworld;
 
-	Transform worldtoraster;
-	Transform worldtoscreen;
-	Transform worldtondc;
+	ProjectionTransform worldtoraster;
+	ProjectionTransform worldtoscreen;
+	ProjectionTransform worldtondc;
 	Transform worldtocamera;
 
-	Transform rastertocamera;
-	Transform cameratoraster;
+	ProjectionTransform rastertocamera;
+	ProjectionTransform cameratoraster;
 
 	float3 dx;
 	float3 dy;
@@ -176,6 +176,7 @@ public:
 
 	/* Kernel camera data, copied here for dicing. */
 	KernelCamera kernel_camera;
+	array<DecomposedTransform> kernel_camera_motion;
 
 	/* functions */
 	Camera();
@@ -199,6 +200,11 @@ public:
 	/* Calculates the width of a pixel at point in world space. */
 	float world_to_raster_size(float3 P);
 
+	/* Motion blur. */
+	float motion_time(int step) const;
+	int motion_step(float time) const;
+	bool use_motion() const;
+
 private:
 	/* Private utility functions. */
 	float3 transform_raster_to_world(float raster_x, float raster_y);
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index b62453cf5fc..8dec7e4ea64 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -288,7 +288,7 @@ void LightManager::device_update_distribution(Device *, DeviceScene *dscene, Sce
 	VLOG(1) << "Total " << num_distribution << " of light distribution primitives.";
 
 	/* emission area */
-	float4 *distribution = dscene->light_distribution.alloc(num_distribution + 1);
+	KernelLightDistribution *distribution = dscene->light_distribution.alloc(num_distribution + 1);
 	float totarea = 0.0f;
 
 	/* triangles */
@@ -334,10 +334,10 @@ void LightManager::device_update_distribution(Device *, DeviceScene *dscene, Sce
 			                         : scene->default_surface;
 
 			if(shader->use_mis && shader->has_surface_emission) {
-				distribution[offset].x = totarea;
-				distribution[offset].y = __int_as_float(i + mesh->tri_offset);
-				distribution[offset].z = __int_as_float(shader_flag);
-				distribution[offset].w = __int_as_float(object_id);
+				distribution[offset].totarea = totarea;
+				distribution[offset].prim = i + mesh->tri_offset;
+				distribution[offset].mesh_light.shader_flag = shader_flag;
+				distribution[offset].mesh_light.object_id = object_id;
 				offset++;
 
 				Mesh::Triangle t = mesh->get_triangle(i);
@@ -372,10 +372,10 @@ void LightManager::device_update_distribution(Device *, DeviceScene *dscene, Sce
 		if(!light->is_enabled)
 			continue;
 
-		distribution[offset].x = totarea;
-		distribution[offset].y = __int_as_float(~light_index);
-		distribution[offset].z = 1.0f;
-		distribution[offset].w = light->size;
+		distribution[offset].totarea = totarea;
+		distribution[offset].prim = ~light_index;
+		distribution[offset].lamp.pad = 1.0f;
+		distribution[offset].lamp.size = light->size;
 		totarea += lightarea;
 
 		if(light->size > 0.0f && light->use_mis)
@@ -390,15 +390,15 @@ void LightManager::device_update_distribution(Device *, DeviceScene *dscene, Sce
 	}
 
 	/* normalize cumulative distribution functions */
-	distribution[num_distribution].x = totarea;
-	distribution[num_distribution].y = 0.0f;
-	distribution[num_distribution].z = 0.0f;
-	distribution[num_distribution].w = 0.0f;
+	distribution[num_distribution].totarea = totarea;
+	distribution[num_distribution].prim = 0.0f;
+	distribution[num_distribution].lamp.pad = 0.0f;
+	distribution[num_distribution].lamp.size = 0.0f;
 
 	if(totarea > 0.0f) {
 		for(size_t i = 0; i < num_distribution; i++)
-			distribution[i].x /= totarea;
-		distribution[num_distribution].x = 1.0f;
+			distribution[i].totarea /= totarea;
+		distribution[num_distribution].totarea = 1.0f;
 	}
 
 	if(progress.get_cancel()) return;
@@ -620,7 +620,7 @@ void LightManager::device_update_points(Device *,
 		}
 	}
 
-	float4 *light_data = dscene->light_data.alloc(num_lights*LIGHT_SIZE);
+	KernelLight *klights = dscene->lights.alloc(num_lights);
 
 	if(num_lights == 0) {
 		VLOG(1) << "No effective light, ignoring points update.";
@@ -637,8 +637,8 @@ void LightManager::device_update_points(Device *,
 		float3 co = light->co;
 		Shader *shader = (light->shader) ? light->shader : scene->default_light;
 		int shader_id = scene->shader_manager->get_shader_id(shader);
-		float samples = __int_as_float(light->samples);
-		float max_bounces = __int_as_float(light->max_bounces);
+		int samples = light->samples;
+		int max_bounces = light->max_bounces;
 		float random = (float)light->random_id * (1.0f/(float)0xFFFFFFFF);
 
 		if(!light->cast_shadow)
@@ -661,6 +661,9 @@ void LightManager::device_update_points(Device *,
 			use_light_visibility = true;
 		}
 
+		klights[light_index].type = light->type;
+		klights[light_index].samples = samples;
+
 		if(light->type == LIGHT_POINT) {
 			shader_id &= ~SHADER_AREA_LIGHT;
 
@@ -670,10 +673,12 @@ void LightManager::device_update_points(Device *,
 			if(light->use_mis && radius > 0.0f)
 				shader_id |= SHADER_USE_MIS;
 
-			light_data[light_index*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), co.x, co.y, co.z);
-			light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, invarea, 0.0f);
-			light_data[light_index*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
+			klights[light_index].co[0] = co.x;
+			klights[light_index].co[1] = co.y;
+			klights[light_index].co[2] = co.z;
+
+			klights[light_index].spot.radius = radius;
+			klights[light_index].spot.invarea = invarea;
 		}
 		else if(light->type == LIGHT_DISTANT) {
 			shader_id &= ~SHADER_AREA_LIGHT;
@@ -690,10 +695,13 @@ void LightManager::device_update_points(Device *,
 			if(light->use_mis && area > 0.0f)
 				shader_id |= SHADER_USE_MIS;
 
-			light_data[light_index*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), dir.x, dir.y, dir.z);
-			light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, cosangle, invarea);
-			light_data[light_index*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
+			klights[light_index].co[0] = dir.x;
+			klights[light_index].co[1] = dir.y;
+			klights[light_index].co[2] = dir.z;
+
+			klights[light_index].distant.invarea = invarea;
+			klights[light_index].distant.radius = radius;
+			klights[light_index].distant.cosangle = cosangle;
 		}
 		else if(light->type == LIGHT_BACKGROUND) {
 			uint visibility = scene->background->visibility;
@@ -717,11 +725,6 @@ void LightManager::device_update_points(Device *,
 				shader_id |= SHADER_EXCLUDE_SCATTER;
 				use_light_visibility = true;
 			}
-
-			light_data[light_index*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), 0.0f, 0.0f, 0.0f);
-			light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), 0.0f, 0.0f, 0.0f);
-			light_data[light_index*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-			light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
 		}
 		else if(light->type == LIGHT_AREA) {
 			float3 axisu = light->axisu*(light->sizeu*light->size);
@@ -735,10 +738,20 @@ void LightManager::device_update_points(Device *,
 			if(light->use_mis && area > 0.0f)
 				shader_id |= SHADER_USE_MIS;
 
-			light_data[light_index*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), co.x, co.y, co.z);
-			light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), axisu.x, axisu.y, axisu.z);
-			light_data[light_index*LIGHT_SIZE + 2] = make_float4(invarea, axisv.x, axisv.y, axisv.z);
-			light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, dir.x, dir.y, dir.z);
+			klights[light_index].co[0] = co.x;
+			klights[light_index].co[1] = co.y;
+			klights[light_index].co[2] = co.z;
+
+			klights[light_index].area.axisu[0] = axisu.x;
+			klights[light_index].area.axisu[1] = axisu.y;
+			klights[light_index].area.axisu[2] = axisu.z;
+			klights[light_index].area.axisv[0] = axisv.x;
+			klights[light_index].area.axisv[1] = axisv.y;
+			klights[light_index].area.axisv[2] = axisv.z;
+			klights[light_index].area.invarea = invarea;
+			klights[light_index].area.dir[0] = dir.x;
+			klights[light_index].area.dir[1] = dir.y;
+			klights[light_index].area.dir[2] = dir.z;
 		}
 		else if(light->type == LIGHT_SPOT) {
 			shader_id &= ~SHADER_AREA_LIGHT;
@@ -754,18 +767,26 @@ void LightManager::device_update_points(Device *,
 			if(light->use_mis && radius > 0.0f)
 				shader_id |= SHADER_USE_MIS;
 
-			light_data[light_index*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), co.x, co.y, co.z);
-			light_data[light_index*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, invarea, spot_angle);
-			light_data[light_index*LIGHT_SIZE + 2] = make_float4(spot_smooth, dir.x, dir.y, dir.z);
-			light_data[light_index*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f);
+			klights[light_index].co[0] = co.x;
+			klights[light_index].co[1] = co.y;
+			klights[light_index].co[2] = co.z;
+
+			klights[light_index].spot.radius = radius;
+			klights[light_index].spot.invarea = invarea;
+			klights[light_index].spot.spot_angle = spot_angle;
+			klights[light_index].spot.spot_smooth = spot_smooth;
+			klights[light_index].spot.dir[0] = dir.x;
+			klights[light_index].spot.dir[1] = dir.y;
+			klights[light_index].spot.dir[2] = dir.z;
 		}
 
-		light_data[light_index*LIGHT_SIZE + 4] = make_float4(max_bounces, random, 0.0f, 0.0f);
+		klights[light_index].shader_id = shader_id;
+
+		klights[light_index].max_bounces = max_bounces;
+		klights[light_index].random = random;
 
-		Transform tfm = light->tfm;
-		Transform itfm = transform_inverse(tfm);
-		memcpy(&light_data[light_index*LIGHT_SIZE + 5], &tfm, sizeof(float4)*3);
-		memcpy(&light_data[light_index*LIGHT_SIZE + 8], &itfm, sizeof(float4)*3);
+		klights[light_index].tfm = light->tfm;
+		klights[light_index].itfm = transform_inverse(light->tfm);
 
 		light_index++;
 	}
@@ -782,21 +803,27 @@ void LightManager::device_update_points(Device *,
 		float3 axisu = light->axisu*(light->sizeu*light->size);
 		float3 axisv = light->axisv*(light->sizev*light->size);
 		float area = len(axisu)*len(axisv);
-		float invarea = (area > 0.0f) ? 1.0f / area : 1.0f;
+		float invarea = (area > 0.0f)? 1.0f/area: 1.0f;
 		float3 dir = light->dir;
 
 		dir = safe_normalize(dir);
 
-		light_data[light_index*LIGHT_SIZE + 0] = make_float4(__int_as_float(light->type), co.x, co.y, co.z);
-		light_data[light_index*LIGHT_SIZE + 1] = make_float4(area, axisu.x, axisu.y, axisu.z);
-		light_data[light_index*LIGHT_SIZE + 2] = make_float4(invarea, axisv.x, axisv.y, axisv.z);
-		light_data[light_index*LIGHT_SIZE + 3] = make_float4(-1, dir.x, dir.y, dir.z);
-		light_data[light_index*LIGHT_SIZE + 4] = make_float4(-1, 0.0f, 0.0f, 0.0f);
-
-		Transform tfm = light->tfm;
-		Transform itfm = transform_inverse(tfm);
-		memcpy(&light_data[light_index*LIGHT_SIZE + 5], &tfm, sizeof(float4)*3);
-		memcpy(&light_data[light_index*LIGHT_SIZE + 8], &itfm, sizeof(float4)*3);
+		klights[light_index].co[0] = co.x;
+		klights[light_index].co[1] = co.y;
+		klights[light_index].co[2] = co.z;
+
+		klights[light_index].area.axisu[0] = axisu.x;
+		klights[light_index].area.axisu[1] = axisu.y;
+		klights[light_index].area.axisu[2] = axisu.z;
+		klights[light_index].area.axisv[0] = axisv.x;
+		klights[light_index].area.axisv[1] = axisv.y;
+		klights[light_index].area.axisv[2] = axisv.z;
+		klights[light_index].area.invarea = invarea;
+		klights[light_index].area.dir[0] = dir.x;
+		klights[light_index].area.dir[1] = dir.y;
+		klights[light_index].area.dir[2] = dir.z;
+		klights[light_index].tfm = light->tfm;
+		klights[light_index].itfm = transform_inverse(light->tfm);
 
 		light_index++;
 	}
@@ -806,7 +833,7 @@ void LightManager::device_update_points(Device *,
 	VLOG(1) << "Number of lights without contribution: "
 	        << num_scene_lights - light_index;
 
-	dscene->light_data.copy_to_device();
+	dscene->lights.copy_to_device();
 }
 
 void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
@@ -842,7 +869,7 @@ void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *sce
 void LightManager::device_free(Device *, DeviceScene *dscene)
 {
 	dscene->light_distribution.free();
-	dscene->light_data.free();
+	dscene->lights.free();
 	dscene->light_background_marginal_cdf.free();
 	dscene->light_background_conditional_cdf.free();
 }
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 47d24970949..7cfbb7b7c7d 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -877,15 +877,8 @@ void Mesh::add_undisplaced()
 	}
 }
 
-void Mesh::pack_normals(Scene *scene, uint *tri_shader, float4 *vnormal)
+void Mesh::pack_shaders(Scene *scene, uint *tri_shader)
 {
-	Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL);
-	if(attr_vN == NULL) {
-		/* Happens on objects with just hair. */
-		return;
-	}
-
-	float3 *vN = attr_vN->data_float3();
 	uint shader_id = 0;
 	uint last_shader = -1;
 	bool last_smooth = false;
@@ -893,10 +886,6 @@ void Mesh::pack_normals(Scene *scene, uint *tri_shader, float4 *vnormal)
 	size_t triangles_size = num_triangles();
 	int *shader_ptr = shader.data();
 
-	bool do_transform = transform_applied;
-	Transform ntfm = transform_normal;
-
-	/* save shader */
 	for(size_t i = 0; i < triangles_size; i++) {
 		if(shader_ptr[i] != last_shader || last_smooth != smooth[i]) {
 			last_shader = shader_ptr[i];
@@ -908,7 +897,20 @@ void Mesh::pack_normals(Scene *scene, uint *tri_shader, float4 *vnormal)
 
 		tri_shader[i] = shader_id;
 	}
+}
+
+void Mesh::pack_normals(float4 *vnormal)
+{
+	Attribute *attr_vN = attributes.find(ATTR_STD_VERTEX_NORMAL);
+	if(attr_vN == NULL) {
+		/* Happens on objects with just hair. */
+		return;
+	}
 
+	bool do_transform = transform_applied;
+	Transform ntfm = transform_normal;
+
+	float3 *vN = attr_vN->data_float3();
 	size_t verts_size = verts.size();
 
 	for(size_t i = 0; i < verts_size; i++) {
@@ -1117,6 +1119,32 @@ bool Mesh::has_true_displacement() const
 	return false;
 }
 
+float Mesh::motion_time(int step) const
+{
+	return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f;
+}
+
+int Mesh::motion_step(float time) const
+{
+	if(motion_steps > 1) {
+		int attr_step = 0;
+
+		for(int step = 0; step < motion_steps; step++) {
+			float step_time = motion_time(step);
+			if(step_time == time) {
+				return attr_step;
+			}
+
+			/* Center step is stored in a separate attribute. */
+			if(step != motion_steps / 2) {
+				attr_step++;
+			}
+		}
+	}
+
+	return -1;
+}
+
 bool Mesh::need_build_bvh() const
 {
 	return !transform_applied || has_surface_bssrdf;
@@ -1445,11 +1473,11 @@ static void update_attribute_element_offset(Mesh *mesh,
 			Transform *tfm = mattr->data_transform();
 			offset = attr_float3_offset;
 
-			assert(attr_float3.size() >= offset + size * 4);
-			for(size_t k = 0; k < size*4; k++) {
+			assert(attr_float3.size() >= offset + size * 3);
+			for(size_t k = 0; k < size*3; k++) {
 				attr_float3[offset+k] = (&tfm->x)[k];
 			}
-			attr_float3_offset += size * 4;
+			attr_float3_offset += size * 3;
 		}
 		else {
 			float4 *data = mattr->data_float4();
@@ -1747,9 +1775,9 @@ void MeshManager::device_update_mesh(Device *,
 		float2 *tri_patch_uv = dscene->tri_patch_uv.alloc(vert_size);
 
 		foreach(Mesh *mesh, scene->meshes) {
-			mesh->pack_normals(scene,
-			                   &tri_shader[mesh->tri_offset],
-			                   &vnormal[mesh->vert_offset]);
+			mesh->pack_shaders(scene,
+			                   &tri_shader[mesh->tri_offset]);
+			mesh->pack_normals(&vnormal[mesh->vert_offset]);
 			mesh->pack_verts(tri_prim_index,
 			                 &tri_vindex[mesh->tri_offset],
 			                 &tri_patch[mesh->tri_offset],
@@ -2031,7 +2059,9 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
 
 	VLOG(1) << "Total " << scene->meshes.size() << " meshes.";
 
-	/* Update normals. */
+	bool true_displacement_used = false;
+	size_t total_tess_needed = 0;
+
 	foreach(Mesh *mesh, scene->meshes) {
 		foreach(Shader *shader, mesh->used_shaders) {
 			if(shader->need_update_mesh)
@@ -2039,6 +2069,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
 		}
 
 		if(mesh->need_update) {
+			/* Update normals. */
 			mesh->add_face_normals();
 			mesh->add_vertex_normals();
 
@@ -2046,57 +2077,53 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
 				mesh->add_undisplaced();
 			}
 
+			/* Test if we need tesselation. */
+			if(mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
+			   mesh->num_subd_verts == 0 &&
+			   mesh->subd_params)
+			{
+				total_tess_needed++;
+			}
+
+			/* Test if we need displacement. */
+			if(mesh->has_true_displacement()) {
+				true_displacement_used = true;
+			}
+
 			if(progress.get_cancel()) return;
 		}
 	}
 
 	/* Tessellate meshes that are using subdivision */
-	size_t total_tess_needed = 0;
-	foreach(Mesh *mesh, scene->meshes) {
-		if(mesh->need_update &&
-		   mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
-		   mesh->num_subd_verts == 0 &&
-		   mesh->subd_params)
-		{
-			total_tess_needed++;
-		}
-	}
+	if(total_tess_needed) {
+		size_t i = 0;
+		foreach(Mesh *mesh, scene->meshes) {
+			if(mesh->need_update &&
+			   mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
+			   mesh->num_subd_verts == 0 &&
+			   mesh->subd_params)
+			{
+				string msg = "Tessellating ";
+				if(mesh->name == "")
+					msg += string_printf("%u/%u", (uint)(i+1), (uint)total_tess_needed);
+				else
+					msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)total_tess_needed);
 
-	size_t i = 0;
-	foreach(Mesh *mesh, scene->meshes) {
-		if(mesh->need_update &&
-		   mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
-		   mesh->num_subd_verts == 0 &&
-		   mesh->subd_params)
-		{
-			string msg = "Tessellating ";
-			if(mesh->name == "")
-				msg += string_printf("%u/%u", (uint)(i+1), (uint)total_tess_needed);
-			else
-				msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)total_tess_needed);
+				progress.set_status("Updating Mesh", msg);
 
-			progress.set_status("Updating Mesh", msg);
+				DiagSplit dsplit(*mesh->subd_params);
+				mesh->tessellate(&dsplit);
 
-			DiagSplit dsplit(*mesh->subd_params);
-			mesh->tessellate(&dsplit);
+				i++;
 
-			i++;
+				if(progress.get_cancel()) return;
+			}
 
-			if(progress.get_cancel()) return;
 		}
 	}
 
 	/* Update images needed for true displacement. */
-	bool true_displacement_used = false;
 	bool old_need_object_flags_update = false;
-	foreach(Mesh *mesh, scene->meshes) {
-		if(mesh->need_update &&
-		   mesh->has_true_displacement())
-		{
-			true_displacement_used = true;
-			break;
-		}
-	}
 	if(true_displacement_used) {
 		VLOG(1) << "Updating images used for true displacement.";
 		device_update_displacement_images(device, scene, progress);
@@ -2122,11 +2149,17 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
 
 	/* Update displacement. */
 	bool displacement_done = false;
+	size_t num_bvh = 0;
+
 	foreach(Mesh *mesh, scene->meshes) {
-		if(mesh->need_update &&
-		   displace(device, dscene, scene, mesh, progress))
-		{
-			displacement_done = true;
+		if(mesh->need_update) {
+			if(displace(device, dscene, scene, mesh, progress)) {
+				displacement_done = true;
+			}
+
+			if(mesh->need_build_bvh()) {
+				num_bvh++;
+			}
 		}
 	}
 
@@ -2141,17 +2174,9 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
 		if(progress.get_cancel()) return;
 	}
 
-	/* Update bvh. */
-	size_t num_bvh = 0;
-	foreach(Mesh *mesh, scene->meshes) {
-		if(mesh->need_update && mesh->need_build_bvh()) {
-			num_bvh++;
-		}
-	}
-
 	TaskPool pool;
 
-	i = 0;
+	size_t i = 0;
 	foreach(Mesh *mesh, scene->meshes) {
 		if(mesh->need_update) {
 			pool.push(function_bind(&Mesh::compute_bvh,
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index c0d1513bee0..e370f8a2021 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -279,7 +279,8 @@ public:
 	void add_vertex_normals();
 	void add_undisplaced();
 
-	void pack_normals(Scene *scene, uint *shader, float4 *vnormal);
+	void pack_shaders(Scene *scene, uint *shader);
+	void pack_normals(float4 *vnormal);
 	void pack_verts(const vector<uint>& tri_prim_index,
 	                uint4 *tri_vindex,
 	                uint *tri_patch,
@@ -304,6 +305,11 @@ public:
 	bool has_motion_blur() const;
 	bool has_true_displacement() const;
 
+	/* Convert between normalized -1..1 motion time and index
+	 * in the VERTEX_MOTION attribute. */
+	float motion_time(int step) const;
+	int motion_step(float time) const;
+
 	/* Check whether the mesh should have own BVH built separately. Briefly,
 	 * own BVH is needed for mesh, if:
 	 *
diff --git a/intern/cycles/render/mesh_volume.cpp b/intern/cycles/render/mesh_volume.cpp
index f2347c79610..3571beb40d6 100644
--- a/intern/cycles/render/mesh_volume.cpp
+++ b/intern/cycles/render/mesh_volume.cpp
@@ -152,22 +152,22 @@ public:
 	void add_node_with_padding(int x, int y, int z);
 
 	void create_mesh(vector<float3> &vertices,
-					 vector<int> &indices,
-					 vector<float3> &face_normals);
+	                 vector<int> &indices,
+	                 vector<float3> &face_normals);
 
 private:
 	void generate_vertices_and_quads(vector<int3> &vertices_is,
-									 vector<QuadData> &quads);
+	                                 vector<QuadData> &quads);
 
 	void deduplicate_vertices(vector<int3> &vertices,
-							  vector<QuadData> &quads);
+	                          vector<QuadData> &quads);
 
 	void convert_object_space(const vector<int3> &vertices,
-							  vector<float3> &out_vertices);
+	                          vector<float3> &out_vertices);
 
 	void convert_quads_to_tris(const vector<QuadData> &quads,
-							   vector<int> &tris,
-							   vector<float3> &face_normals);
+	                           vector<int> &tris,
+	                           vector<float3> &face_normals);
 };
 
 VolumeMeshBuilder::VolumeMeshBuilder(VolumeParams *volume_params)
@@ -224,8 +224,8 @@ void VolumeMeshBuilder::add_node_with_padding(int x, int y, int z)
 }
 
 void VolumeMeshBuilder::create_mesh(vector<float3> &vertices,
-									vector<int> &indices,
-									vector<float3> &face_normals)
+                                    vector<int> &indices,
+                                    vector<float3> &face_normals)
 {
 	/* We create vertices in index space (is), and only convert them to object
 	 * space when done. */
@@ -260,8 +260,8 @@ void VolumeMeshBuilder::generate_vertices_and_quads(
 
 				/* Compute min and max coords of the node in index space. */
 				int3 min = make_int3((x - pad_offset.x)*CUBE_SIZE,
-									 (y - pad_offset.y)*CUBE_SIZE,
-									 (z - pad_offset.z)*CUBE_SIZE);
+				                     (y - pad_offset.y)*CUBE_SIZE,
+				                     (z - pad_offset.z)*CUBE_SIZE);
 
 				/* Maximum is just CUBE_SIZE voxels away from minimum on each axis. */
 				int3 max = make_int3(min.x + CUBE_SIZE, min.y + CUBE_SIZE, min.z + CUBE_SIZE);
@@ -316,7 +316,7 @@ void VolumeMeshBuilder::generate_vertices_and_quads(
 }
 
 void VolumeMeshBuilder::deduplicate_vertices(vector<int3> &vertices,
-											 vector<QuadData> &quads)
+                                             vector<QuadData> &quads)
 {
 	vector<int3> sorted_vertices = vertices;
 	std::sort(sorted_vertices.begin(), sorted_vertices.end());
@@ -355,7 +355,7 @@ void VolumeMeshBuilder::deduplicate_vertices(vector<int3> &vertices,
 }
 
 void VolumeMeshBuilder::convert_object_space(const vector<int3> &vertices,
-											 vector<float3> &out_vertices)
+	                                         vector<float3> &out_vertices)
 {
 	out_vertices.reserve(vertices.size());
 
@@ -369,8 +369,8 @@ void VolumeMeshBuilder::convert_object_space(const vector<int3> &vertices,
 }
 
 void VolumeMeshBuilder::convert_quads_to_tris(const vector<QuadData> &quads,
-											  vector<int> &tris,
-											  vector<float3> &face_normals)
+                                              vector<int> &tris,
+                                              vector<float3> &face_normals)
 {
 	int index_offset = 0;
 	tris.resize(quads.size()*6);
@@ -399,8 +399,8 @@ struct VoxelAttributeGrid {
 };
 
 void MeshManager::create_volume_mesh(Scene *scene,
-									 Mesh *mesh,
-									 Progress& progress)
+                                     Mesh *mesh,
+                                     Progress& progress)
 {
 	string msg = string_printf("Computing Volume Mesh %s", mesh->name.c_str());
 	progress.set_status("Updating Mesh", msg);
@@ -470,8 +470,8 @@ void MeshManager::create_volume_mesh(Scene *scene,
 	const int3 resolution = volume_params.resolution;
 	float3 start_point = make_float3(0.0f, 0.0f, 0.0f);
 	float3 cell_size = make_float3(1.0f/resolution.x,
-								   1.0f/resolution.y,
-								   1.0f/resolution.z);
+	                               1.0f/resolution.y,
+	                               1.0f/resolution.z);
 
 	if(attr) {
 		const Transform *tfm = attr->data_transform();
@@ -575,12 +575,12 @@ void MeshManager::create_volume_mesh(Scene *scene,
 
 	/* Print stats. */
 	VLOG(1) << "Memory usage volume mesh: "
-			<< ((vertices.size() + face_normals.size())*sizeof(float3) + indices.size()*sizeof(int))/(1024.0*1024.0)
-			<< "Mb.";
+	        << ((vertices.size() + face_normals.size())*sizeof(float3) + indices.size()*sizeof(int))/(1024.0*1024.0)
+	        << "Mb.";
 
 	VLOG(1) << "Memory usage volume grid: "
-			<< (resolution.x*resolution.y*resolution.z*sizeof(float))/(1024.0*1024.0)
-			<< "Mb.";
+	        << (resolution.x*resolution.y*resolution.z*sizeof(float))/(1024.0*1024.0)
+	        << "Mb.";
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 41c9730e6fb..f117962a2ea 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -117,8 +117,7 @@ Transform TextureMapping::compute_transform()
 		case NORMAL:
 			/* no translation for normals, and inverse transpose */
 			mat = rmat*smat;
-			mat = transform_inverse(mat);
-			mat = transform_transpose(mat);
+			mat = transform_transposed_inverse(mat);
 			break;
 	}
 
@@ -153,7 +152,6 @@ void TextureMapping::compile(SVMCompiler& compiler, int offset_in, int offset_ou
 	compiler.add_node(tfm.x);
 	compiler.add_node(tfm.y);
 	compiler.add_node(tfm.z);
-	compiler.add_node(tfm.w);
 
 	if(use_minmax) {
 		compiler.add_node(NODE_MIN_MAX, offset_out, offset_out);
@@ -193,9 +191,7 @@ void TextureMapping::compile_end(SVMCompiler& compiler, ShaderInput *vector_in,
 void TextureMapping::compile(OSLCompiler &compiler)
 {
 	if(!skip()) {
-		Transform tfm = transform_transpose(compute_transform());
-
-		compiler.parameter("mapping", tfm);
+		compiler.parameter("mapping", compute_transform());
 		compiler.parameter("use_mapping", 1);
 	}
 }
@@ -1434,7 +1430,6 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler)
 				compiler.add_node(tfm.x);
 				compiler.add_node(tfm.y);
 				compiler.add_node(tfm.z);
-				compiler.add_node(tfm.w);
 			}
 		}
 		else {
@@ -1478,7 +1473,7 @@ void PointDensityTextureNode::compile(OSLCompiler& compiler)
 			compiler.parameter("filename", string_printf("@%d", slot).c_str());
 		}
 		if(space == NODE_TEX_VOXEL_SPACE_WORLD) {
-			compiler.parameter("mapping", transform_transpose(tfm));
+			compiler.parameter("mapping", tfm);
 			compiler.parameter("use_mapping", 1);
 		}
 		compiler.parameter(this, "interpolation");
@@ -1558,8 +1553,7 @@ void MappingNode::compile(SVMCompiler& compiler)
 
 void MappingNode::compile(OSLCompiler& compiler)
 {
-	Transform tfm = transform_transpose(tex_mapping.compute_transform());
-	compiler.parameter("Matrix", tfm);
+	compiler.parameter("Matrix", tex_mapping.compute_transform());
 	compiler.parameter_point("mapping_min", tex_mapping.min);
 	compiler.parameter_point("mapping_max", tex_mapping.max);
 	compiler.parameter("use_minmax", tex_mapping.use_minmax);
@@ -3220,7 +3214,6 @@ void TextureCoordinateNode::compile(SVMCompiler& compiler)
 			compiler.add_node(ob_itfm.x);
 			compiler.add_node(ob_itfm.y);
 			compiler.add_node(ob_itfm.z);
-			compiler.add_node(ob_itfm.w);
 		}
 	}
 
@@ -3259,7 +3252,7 @@ void TextureCoordinateNode::compile(OSLCompiler& compiler)
 	if(compiler.output_type() == SHADER_TYPE_VOLUME)
 		compiler.parameter("is_volume", true);
 	compiler.parameter(this, "use_transform");
-	Transform ob_itfm = transform_transpose(transform_inverse(ob_tfm));
+	Transform ob_itfm = transform_transposed_inverse(ob_tfm);
 	compiler.parameter("object_itfm", ob_itfm);
 
 	compiler.parameter(this, "from_dupli");
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index b981d2b8849..138de250c5f 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -33,6 +33,52 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Global state of object transform update. */
+
+struct UpdateObjectTransformState {
+	/* Global state used by device_update_object_transform().
+	 * Common for both threaded and non-threaded update.
+	 */
+
+	/* Type of the motion required by the scene settings. */
+	Scene::MotionType need_motion;
+
+	/* Mapping from particle system to a index in packed particle array.
+	 * Only used for read.
+	 */
+	map<ParticleSystem*, int> particle_offset;
+
+	/* Mesh area.
+	 * Used to avoid calculation of mesh area multiple times. Used for both
+	 * read and write. Acquire surface_area_lock to keep it all thread safe.
+	 */
+	map<Mesh*, float> surface_area_map;
+
+	/* Motion offsets for each object. */
+	array<uint> motion_offset;
+
+	/* Packed object arrays. Those will be filled in. */
+	uint *object_flag;
+	KernelObject *objects;
+	Transform *object_motion_pass;
+	DecomposedTransform *object_motion;
+
+	/* Flags which will be synchronized to Integrator. */
+	bool have_motion;
+	bool have_curves;
+
+	/* ** Scheduling queue. ** */
+
+	Scene *scene;
+
+	/* Some locks to keep everything thread-safe. */
+	thread_spin_lock queue_lock;
+	thread_spin_lock surface_area_lock;
+
+	/* First unused object index in the queue. */
+	int queue_start_object;
+};
+
 /* Object */
 
 NODE_DEFINE(Object)
@@ -48,6 +94,7 @@ NODE_DEFINE(Object)
 	SOCKET_BOOLEAN(hide_on_missing_motion, "Hide on Missing Motion", false);
 	SOCKET_POINT(dupli_generated, "Dupli Generated", make_float3(0.0f, 0.0f, 0.0f));
 	SOCKET_POINT2(dupli_uv, "Dupli UV", make_float2(0.0f, 0.0f));
+	SOCKET_TRANSFORM_ARRAY(motion, "Motion", array<Transform>());
 
 	SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", false);
 
@@ -60,45 +107,54 @@ Object::Object()
 	particle_system = NULL;
 	particle_index = 0;
 	bounds = BoundBox::empty;
-	motion.pre = transform_empty();
-	motion.mid = transform_empty();
-	motion.post = transform_empty();
-	use_motion = false;
 }
 
 Object::~Object()
 {
 }
 
-void Object::compute_bounds(bool motion_blur)
+void Object::update_motion()
 {
-	BoundBox mbounds = mesh->bounds;
+	if(!use_motion()) {
+		return;
+	}
 
-	if(motion_blur && use_motion) {
-		MotionTransform mtfm = motion;
+	bool have_motion = false;
 
-		if(hide_on_missing_motion) {
-			/* Hide objects that have no valid previous or next transform, for
-			 * example particle that stop existing. TODO: add support for this
-			 * case in the kernel so we don't get render artifacts. */
-			if(mtfm.pre == transform_empty() ||
-			   mtfm.post == transform_empty()) {
-				bounds = BoundBox::empty;
+	for(size_t i = 0; i < motion.size(); i++) {
+		if(motion[i] == transform_empty()) {
+			if(hide_on_missing_motion) {
+				/* Hide objects that have no valid previous or next
+				 * transform, for example particle that stop existing. It
+				 * would be better to handle this in the kernel and make
+				 * objects invisible outside certain motion steps. */
+				tfm = transform_empty();
+				motion.clear();
 				return;
 			}
+			else {
+				/* Otherwise just copy center motion. */
+				motion[i] = tfm;
+			}
 		}
 
-		/* In case of missing motion information for previous/next frame,
-		 * assume there is no motion. */
-		if(mtfm.pre == transform_empty()) {
-			mtfm.pre = tfm;
-		}
-		if(mtfm.post == transform_empty()) {
-			mtfm.post = tfm;
-		}
+		/* Test if any of the transforms are actually different. */
+		have_motion = have_motion || motion[i] != tfm;
+	}
 
-		MotionTransform decomp;
-		transform_motion_decompose(&decomp, &mtfm, &tfm);
+	/* Clear motion array if there is no actual motion. */
+	if(!have_motion) {
+		motion.clear();
+	}
+}
+
+void Object::compute_bounds(bool motion_blur)
+{
+	BoundBox mbounds = mesh->bounds;
+
+	if(motion_blur && use_motion()) {
+		array<DecomposedTransform> decomp(motion.size());
+		transform_motion_decompose(decomp.data(), motion.data(), motion.size());
 
 		bounds = BoundBox::empty;
 
@@ -108,11 +164,12 @@ void Object::compute_bounds(bool motion_blur)
 		for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) {
 			Transform ttfm;
 
-			transform_motion_interpolate(&ttfm, &decomp, t);
+			transform_motion_array_interpolate(&ttfm, decomp.data(), motion.size(), t);
 			bounds.grow(mbounds.transformed(&ttfm));
 		}
 	}
 	else {
+		/* No motion blur case. */
 		if(mesh->transform_applied) {
 			bounds = mbounds;
 		}
@@ -132,7 +189,7 @@ void Object::apply_transform(bool apply_to_motion)
 		/* store matrix to transform later. when accessing these as attributes we
 		 * do not want the transform to be applied for consistency between static
 		 * and dynamic BVH, so we do it on packing. */
-		mesh->transform_normal = transform_transpose(transform_inverse(tfm));
+		mesh->transform_normal = transform_transposed_inverse(tfm);
 
 		/* apply to mesh vertices */
 		for(size_t i = 0; i < mesh->verts.size(); i++)
@@ -232,27 +289,30 @@ void Object::tag_update(Scene *scene)
 	scene->object_manager->need_update = true;
 }
 
-vector<float> Object::motion_times()
+bool Object::use_motion() const
 {
-	/* compute times at which we sample motion for this object */
-	vector<float> times;
-
-	if(!mesh || mesh->motion_steps == 1)
-		return times;
+	return (motion.size() > 1);
+}
 
-	int motion_steps = mesh->motion_steps;
+float Object::motion_time(int step) const
+{
+	return (use_motion()) ? 2.0f * step / (motion.size() - 1) - 1.0f : 0.0f;
+}
 
-	for(int step = 0; step < motion_steps; step++) {
-		if(step != motion_steps / 2) {
-			float time = 2.0f * step / (motion_steps - 1) - 1.0f;
-			times.push_back(time);
+int Object::motion_step(float time) const
+{
+	if(use_motion()) {
+		for(size_t step = 0; step < motion.size(); step++) {
+			if(time == motion_time(step)) {
+				return step;
+			}
 		}
 	}
 
-	return times;
+	return -1;
 }
 
-bool Object::is_traceable()
+bool Object::is_traceable() const
 {
 	/* Mesh itself can be empty,can skip all such objects. */
 	if(!bounds.valid() || bounds.size() == make_float3(0.0f, 0.0f, 0.0f)) {
@@ -289,8 +349,8 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
                                                    Object *ob,
                                                    int object_index)
 {
-	float4 *objects = state->objects;
-	float4 *objects_vector = state->objects_vector;
+	KernelObject& kobject = state->objects[object_index];
+	Transform *object_motion_pass = state->object_motion_pass;
 
 	Mesh *mesh = ob->mesh;
 	uint flag = 0;
@@ -357,15 +417,13 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
 		}
 	}
 
-	/* Pack in texture. */
-	int offset = object_index*OBJECT_SIZE;
-
-	/* OBJECT_TRANSFORM */
-	memcpy(&objects[offset], &tfm, sizeof(float4)*3);
-	/* OBJECT_INVERSE_TRANSFORM */
-	memcpy(&objects[offset+4], &itfm, sizeof(float4)*3);
-	/* OBJECT_PROPERTIES */
-	objects[offset+12] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index));
+	kobject.tfm = tfm;
+	kobject.itfm = itfm;
+	kobject.surface_area = surface_area;
+	kobject.pass_id = pass_id;
+	kobject.random_number = random_number;
+	kobject.particle_index = particle_index;
+	kobject.motion_offset = 0;
 
 	if(mesh->use_motion_blur) {
 		state->have_motion = true;
@@ -375,50 +433,56 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
 	}
 
 	if(state->need_motion == Scene::MOTION_PASS) {
-		/* Motion transformations, is world/object space depending if mesh
-		 * comes with deformed position in object space, or if we transform
-		 * the shading point in world space.
-		 */
-		MotionTransform mtfm = ob->motion;
+		/* Clear motion array if there is no actual motion. */
+		ob->update_motion();
 
-		/* In case of missing motion information for previous/next frame,
-		 * assume there is no motion. */
-		if(!ob->use_motion || mtfm.pre == transform_empty()) {
-			mtfm.pre = ob->tfm;
+		/* Compute motion transforms. */
+		Transform tfm_pre, tfm_post;
+		if(ob->use_motion()) {
+			tfm_pre = ob->motion[0];
+			tfm_post = ob->motion[ob->motion.size() - 1];
 		}
-		if(!ob->use_motion || mtfm.post == transform_empty()) {
-			mtfm.post = ob->tfm;
+		else {
+			tfm_pre = tfm;
+			tfm_post = tfm;
 		}
 
+		/* Motion transformations, is world/object space depending if mesh
+		 * comes with deformed position in object space, or if we transform
+		 * the shading point in world space. */
 		if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
-			mtfm.pre = mtfm.pre * itfm;
-			mtfm.post = mtfm.post * itfm;
+			tfm_pre = tfm_pre * itfm;
+			tfm_post = tfm_post * itfm;
 		}
 
-		memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+0], &mtfm.pre, sizeof(float4)*3);
-		memcpy(&objects_vector[object_index*OBJECT_VECTOR_SIZE+3], &mtfm.post, sizeof(float4)*3);
+		int motion_pass_offset = object_index*OBJECT_MOTION_PASS_SIZE;
+		object_motion_pass[motion_pass_offset + 0] = tfm_pre;
+		object_motion_pass[motion_pass_offset + 1] = tfm_post;
 	}
 	else if(state->need_motion == Scene::MOTION_BLUR) {
-		if(ob->use_motion) {
-			/* decompose transformations for interpolation. */
-			MotionTransform decomp;
+		if(ob->use_motion()) {
+			kobject.motion_offset = state->motion_offset[object_index];
 
-			transform_motion_decompose(&decomp, &ob->motion, &ob->tfm);
-			memcpy(&objects[offset], &decomp, sizeof(float4)*12);
+			/* Decompose transforms for interpolation. */
+			DecomposedTransform *decomp = state->object_motion + kobject.motion_offset;
+			transform_motion_decompose(decomp, ob->motion.data(), ob->motion.size());
 			flag |= SD_OBJECT_MOTION;
 			state->have_motion = true;
 		}
 	}
 
 	/* Dupli object coords and motion info. */
+	kobject.dupli_generated[0] = ob->dupli_generated[0];
+	kobject.dupli_generated[1] = ob->dupli_generated[1];
+	kobject.dupli_generated[2] = ob->dupli_generated[2];
+	kobject.numkeys = mesh->curve_keys.size();
+	kobject.dupli_uv[0] = ob->dupli_uv[0];
+	kobject.dupli_uv[1] = ob->dupli_uv[1];
 	int totalsteps = mesh->motion_steps;
-	int numsteps = (totalsteps - 1)/2;
-	int numverts = mesh->verts.size();
-	int numkeys = mesh->curve_keys.size();
-
-	objects[offset+13] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys));
-	objects[offset+14] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts));
-	objects[offset+15] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	kobject.numsteps = (totalsteps - 1)/2;
+	kobject.numverts = mesh->verts.size();;
+	kobject.patch_map_offset = 0;
+	kobject.attribute_map_offset = 0;
 
 	/* Object flag. */
 	if(ob->use_holdout) {
@@ -475,7 +539,6 @@ void ObjectManager::device_update_object_transform_task(
 
 void ObjectManager::device_update_transforms(DeviceScene *dscene,
                                              Scene *scene,
-                                             uint *object_flag,
                                              Progress& progress)
 {
 	UpdateObjectTransformState state;
@@ -485,13 +548,29 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene,
 	state.scene = scene;
 	state.queue_start_object = 0;
 
-	state.object_flag = object_flag;
-	state.objects = dscene->objects.alloc(OBJECT_SIZE*scene->objects.size());
+	state.objects = dscene->objects.alloc(scene->objects.size());
+	state.object_flag = dscene->object_flag.alloc(scene->objects.size());
+	state.object_motion = NULL;
+	state.object_motion_pass = NULL;
+
 	if(state.need_motion == Scene::MOTION_PASS) {
-		state.objects_vector = dscene->objects_vector.alloc(OBJECT_VECTOR_SIZE*scene->objects.size());
+		state.object_motion_pass = dscene->object_motion_pass.alloc(OBJECT_MOTION_PASS_SIZE*scene->objects.size());
 	}
-	else {
-		state.objects_vector = NULL;
+	else if(state.need_motion == Scene::MOTION_BLUR) {
+		/* Set object offsets into global object motion array. */
+		uint *motion_offsets = state.motion_offset.resize(scene->objects.size());
+		uint motion_offset = 0;
+
+		foreach(Object *ob, scene->objects) {
+			*motion_offsets = motion_offset;
+			motion_offsets++;
+
+			/* Clear motion array if there is no actual motion. */
+			ob->update_motion();
+			motion_offset += ob->motion.size();
+		}
+
+		state.object_motion = dscene->object_motion.alloc(motion_offset);
 	}
 
 	/* Particle system device offsets
@@ -534,7 +613,10 @@ void ObjectManager::device_update_transforms(DeviceScene *dscene,
 
 	dscene->objects.copy_to_device();
 	if(state.need_motion == Scene::MOTION_PASS) {
-		dscene->objects_vector.copy_to_device();
+		dscene->object_motion_pass.copy_to_device();
+	}
+	else if(state.need_motion == Scene::MOTION_BLUR) {
+		dscene->object_motion.copy_to_device();
 	}
 
 	dscene->data.bvh.have_motion = state.have_motion;
@@ -554,12 +636,9 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc
 	if(scene->objects.size() == 0)
 		return;
 
-	/* object info flag */
-	uint *object_flag = dscene->object_flag.alloc(scene->objects.size());
-
 	/* set object transform matrices, before applying static transforms */
 	progress.set_status("Updating Objects", "Copying Transformations to device");
-	device_update_transforms(dscene, scene, object_flag, progress);
+	device_update_transforms(dscene, scene, progress);
 
 	if(progress.get_cancel()) return;
 
@@ -567,7 +646,7 @@ void ObjectManager::device_update(Device *device, DeviceScene *dscene, Scene *sc
 	/* todo: do before to support getting object level coords? */
 	if(scene->params.bvh_type == SceneParams::BVH_STATIC) {
 		progress.set_status("Updating Objects", "Applying Static Transformations");
-		apply_static_transforms(dscene, scene, object_flag, progress);
+		apply_static_transforms(dscene, scene, progress);
 	}
 }
 
@@ -586,9 +665,10 @@ void ObjectManager::device_update_flags(Device *,
 	if(scene->objects.size() == 0)
 		return;
 
-	/* object info flag */
+	/* Object info flag. */
 	uint *object_flag = dscene->object_flag.data();
 
+	/* Object volume intersection. */
 	vector<Object *> volume_objects;
 	bool has_volume_objects = false;
 	foreach(Object *object, scene->objects) {
@@ -642,7 +722,7 @@ void ObjectManager::device_update_flags(Device *,
 		++object_index;
 	}
 
-	/* allocate object flag */
+	/* Copy object flag. */
 	dscene->object_flag.copy_to_device();
 }
 
@@ -652,27 +732,26 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
 		return;
 	}
 
-	uint4* objects = (uint4*)dscene->objects.data();
+	KernelObject *kobjects = dscene->objects.data();
 
 	bool update = false;
 	int object_index = 0;
 
 	foreach(Object *object, scene->objects) {
 		Mesh* mesh = object->mesh;
-		int offset = object_index*OBJECT_SIZE + 15;
 
 		if(mesh->patch_table) {
 			uint patch_map_offset = 2*(mesh->patch_table_offset + mesh->patch_table->total_size() -
 			                           mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - mesh->patch_offset;
 
-			if(objects[offset].x != patch_map_offset) {
-				objects[offset].x = patch_map_offset;
+			if(kobjects[object_index].patch_map_offset != patch_map_offset) {
+				kobjects[object_index].patch_map_offset = patch_map_offset;
 				update = true;
 			}
 		}
 
-		if(objects[offset].y != mesh->attr_map_offset) {
-			objects[offset].y = mesh->attr_map_offset;
+		if(kobjects[object_index].attribute_map_offset != mesh->attr_map_offset) {
+			kobjects[object_index].attribute_map_offset = mesh->attr_map_offset;
 			update = true;
 		}
 
@@ -687,11 +766,12 @@ void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Sc
 void ObjectManager::device_free(Device *, DeviceScene *dscene)
 {
 	dscene->objects.free();
-	dscene->objects_vector.free();
+	dscene->object_motion_pass.free();
+	dscene->object_motion.free();
 	dscene->object_flag.free();
 }
 
-void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress)
+void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress& progress)
 {
 	/* todo: normals and displacement should be done before applying transform! */
 	/* todo: create objects/meshes in right order! */
@@ -715,6 +795,8 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u
 
 	if(progress.get_cancel()) return;
 
+	uint *object_flag = dscene->object_flag.data();
+
 	/* apply transforms for objects with single user meshes */
 	foreach(Object *object, scene->objects) {
 		/* Annoying feedback loop here: we can't use is_instanced() because
@@ -725,7 +807,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u
 		if((mesh_users[object->mesh] == 1 && !object->mesh->has_surface_bssrdf) &&
 		   !object->mesh->has_true_displacement() && object->mesh->subdivision_type == Mesh::SUBDIVISION_NONE)
 		{
-			if(!(motion_blur && object->use_motion)) {
+			if(!(motion_blur && object->use_motion())) {
 				if(!object->mesh->transform_applied) {
 					object->apply_transform(apply_to_motion);
 					object->mesh->transform_applied = true;
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index acdb1b64123..c7212ae25f9 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -35,6 +35,7 @@ class ParticleSystem;
 class Progress;
 class Scene;
 struct Transform;
+struct UpdateObjectTransformState;
 
 /* Object */
 
@@ -49,8 +50,7 @@ public:
 	int pass_id;
 	vector<ParamValue> attributes;
 	uint visibility;
-	MotionTransform motion;
-	bool use_motion;
+	array<Transform> motion;
 	bool hide_on_missing_motion;
 	bool use_holdout;
 	bool is_shadow_catcher;
@@ -69,12 +69,17 @@ public:
 	void compute_bounds(bool motion_blur);
 	void apply_transform(bool apply_to_motion);
 
-	vector<float> motion_times();
+	/* Convert between normalized -1..1 motion time and index
+	 * in the motion array. */
+	bool use_motion() const;
+	float motion_time(int step) const;
+	int motion_step(float time) const;
+	void update_motion();
 
 	/* Check whether object is traceable and it worth adding it to
 	 * kernel scene.
 	 */
-	bool is_traceable();
+	bool is_traceable() const;
 
 	/* Combine object's visibility with all possible internal run-time
 	 * determined flags which denotes trace-time visibility.
@@ -95,7 +100,6 @@ public:
 	void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
 	void device_update_transforms(DeviceScene *dscene,
 	                              Scene *scene,
-	                              uint *object_flag,
 	                              Progress& progress);
 
 	void device_update_flags(Device *device,
@@ -109,49 +113,9 @@ public:
 
 	void tag_update(Scene *scene);
 
-	void apply_static_transforms(DeviceScene *dscene, Scene *scene, uint *object_flag, Progress& progress);
+	void apply_static_transforms(DeviceScene *dscene, Scene *scene, Progress& progress);
 
 protected:
-	/* Global state of object transform update. */
-	struct UpdateObjectTransformState {
-		/* Global state used by device_update_object_transform().
-		 * Common for both threaded and non-threaded update.
-		 */
-
-		/* Type of the motion required by the scene settings. */
-		Scene::MotionType need_motion;
-
-		/* Mapping from particle system to a index in packed particle array.
-		 * Only used for read.
-		 */
-		map<ParticleSystem*, int> particle_offset;
-
-		/* Mesh area.
-		 * Used to avoid calculation of mesh area multiple times. Used for both
-		 * read and write. Acquire surface_area_lock to keep it all thread safe.
-		 */
-		map<Mesh*, float> surface_area_map;
-
-		/* Packed object arrays. Those will be filled in. */
-		uint *object_flag;
-		float4 *objects;
-		float4 *objects_vector;
-
-		/* Flags which will be synchronized to Integrator. */
-		bool have_motion;
-		bool have_curves;
-
-		/* ** Scheduling queue. ** */
-
-		Scene *scene;
-
-		/* Some locks to keep everything thread-safe. */
-		thread_spin_lock queue_lock;
-		thread_spin_lock surface_area_lock;
-
-		/* First unused object index in the queue. */
-		int queue_start_object;
-	};
 	void device_update_object_transform(UpdateObjectTransformState *state,
 	                                    Object *ob,
 	                                    const int object_index);
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 9e931280691..f1a22350060 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -34,6 +34,7 @@
 #include "util/util_md5.h"
 #include "util/util_path.h"
 #include "util/util_progress.h"
+#include "util/util_projection.h"
 
 #endif
 
@@ -832,7 +833,9 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
 		case SocketType::TRANSFORM:
 		{
 			Transform value = node->get_transform(socket);
-			ss->Parameter(uname, TypeDesc::TypeMatrix, &value);
+			ProjectionTransform projection(value);
+			projection = projection_transpose(projection);
+			ss->Parameter(uname, TypeDesc::TypeMatrix, &projection);
 			break;
 		}
 		case SocketType::BOOLEAN_ARRAY:
@@ -900,7 +903,11 @@ void OSLCompiler::parameter(ShaderNode* node, const char *name)
 		case SocketType::TRANSFORM_ARRAY:
 		{
 			const array<Transform>& value = node->get_transform_array(socket);
-			ss->Parameter(uname, array_typedesc(TypeDesc::TypeMatrix, value.size()), value.data());
+			array<ProjectionTransform> fvalue(value.size());
+			for(size_t i = 0; i < value.size(); i++) {
+				fvalue[i] = projection_transpose(ProjectionTransform(value[i]));
+			}
+			ss->Parameter(uname, array_typedesc(TypeDesc::TypeMatrix, fvalue.size()), fvalue.data());
 			break;
 		}
 		case SocketType::CLOSURE:
@@ -967,7 +974,9 @@ void OSLCompiler::parameter(const char *name, ustring s)
 void OSLCompiler::parameter(const char *name, const Transform& tfm)
 {
 	OSL::ShadingSystem *ss = (OSL::ShadingSystem*)shadingsys;
-	ss->Parameter(name, TypeDesc::TypeMatrix, (float*)&tfm);
+	ProjectionTransform projection(tfm);
+	projection = projection_transpose(projection);
+	ss->Parameter(name, TypeDesc::TypeMatrix, (float*)&projection);
 }
 
 void OSLCompiler::parameter_array(const char *name, const float f[], int arraylen)
diff --git a/intern/cycles/render/particles.cpp b/intern/cycles/render/particles.cpp
index 3ee620c9d01..e4be3306d7e 100644
--- a/intern/cycles/render/particles.cpp
+++ b/intern/cycles/render/particles.cpp
@@ -62,14 +62,10 @@ void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscen
 	for(size_t j = 0; j < scene->particle_systems.size(); j++)
 		num_particles += scene->particle_systems[j]->particles.size();
 	
-	float4 *particles = dscene->particles.alloc(PARTICLE_SIZE*num_particles);
+	KernelParticle *kparticles = dscene->particles.alloc(num_particles);
 	
 	/* dummy particle */
-	particles[0] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	particles[1] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	particles[2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	particles[3] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-	particles[4] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+	memset(kparticles, 0, sizeof(KernelParticle));
 	
 	int i = 1;
 	for(size_t j = 0; j < scene->particle_systems.size(); j++) {
@@ -78,13 +74,15 @@ void ParticleSystemManager::device_update_particles(Device *, DeviceScene *dscen
 		for(size_t k = 0; k < psys->particles.size(); k++) {
 			/* pack in texture */
 			Particle& pa = psys->particles[k];
-			int offset = i*PARTICLE_SIZE;
 			
-			particles[offset] = make_float4(__uint_as_float(pa.index), pa.age, pa.lifetime, pa.size);
-			particles[offset+1] = pa.rotation;
-			particles[offset+2] = make_float4(pa.location.x, pa.location.y, pa.location.z, pa.velocity.x);
-			particles[offset+3] = make_float4(pa.velocity.y, pa.velocity.z, pa.angular_velocity.x, pa.angular_velocity.y);
-			particles[offset+4] = make_float4(pa.angular_velocity.z, 0.0f, 0.0f, 0.0f);
+			kparticles[i].index = pa.index;
+			kparticles[i].age = pa.age;
+			kparticles[i].lifetime = pa.lifetime;
+			kparticles[i].size = pa.size;
+			kparticles[i].rotation = pa.rotation;
+			kparticles[i].location = float3_to_float4(pa.location);
+			kparticles[i].velocity = float3_to_float4(pa.velocity);
+			kparticles[i].angular_velocity = float3_to_float4(pa.angular_velocity);
 			
 			i++;
 			
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index 24923b650e3..ba47e3ab6f8 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -60,19 +60,21 @@ DeviceScene::DeviceScene(Device *device)
   curve_keys(device, "__curve_keys", MEM_TEXTURE),
   patches(device, "__patches", MEM_TEXTURE),
   objects(device, "__objects", MEM_TEXTURE),
-  objects_vector(device, "__objects_vector", MEM_TEXTURE),
+  object_motion_pass(device, "__object_motion_pass", MEM_TEXTURE),
+  object_motion(device, "__object_motion", MEM_TEXTURE),
+  object_flag(device, "__object_flag", MEM_TEXTURE),
+  camera_motion(device, "__camera_motion", MEM_TEXTURE),
   attributes_map(device, "__attributes_map", MEM_TEXTURE),
   attributes_float(device, "__attributes_float", MEM_TEXTURE),
   attributes_float3(device, "__attributes_float3", MEM_TEXTURE),
   attributes_uchar4(device, "__attributes_uchar4", MEM_TEXTURE),
   light_distribution(device, "__light_distribution", MEM_TEXTURE),
-  light_data(device, "__light_data", MEM_TEXTURE),
+  lights(device, "__lights", MEM_TEXTURE),
   light_background_marginal_cdf(device, "__light_background_marginal_cdf", MEM_TEXTURE),
   light_background_conditional_cdf(device, "__light_background_conditional_cdf", MEM_TEXTURE),
   particles(device, "__particles", MEM_TEXTURE),
   svm_nodes(device, "__svm_nodes", MEM_TEXTURE),
-  shader_flag(device, "__shader_flag", MEM_TEXTURE),
-  object_flag(device, "__object_flag", MEM_TEXTURE),
+  shaders(device, "__shaders", MEM_TEXTURE),
   lookup_table(device, "__lookup_table", MEM_TEXTURE),
   sobol_directions(device, "__sobol_directions", MEM_TEXTURE)
 {
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index ea9485ff230..04bd4735a86 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -86,8 +86,13 @@ public:
 	device_vector<uint> patches;
 
 	/* objects */
-	device_vector<float4> objects;
-	device_vector<float4> objects_vector;
+	device_vector<KernelObject> objects;
+	device_vector<Transform> object_motion_pass;
+	device_vector<DecomposedTransform> object_motion;
+	device_vector<uint> object_flag;
+
+	/* cameras */
+	device_vector<DecomposedTransform> camera_motion;
 
 	/* attributes */
 	device_vector<uint4> attributes_map;
@@ -96,18 +101,17 @@ public:
 	device_vector<uchar4> attributes_uchar4;
 
 	/* lights */
-	device_vector<float4> light_distribution;
-	device_vector<float4> light_data;
+	device_vector<KernelLightDistribution> light_distribution;
+	device_vector<KernelLight> lights;
 	device_vector<float2> light_background_marginal_cdf;
 	device_vector<float2> light_background_conditional_cdf;
 
 	/* particles */
-	device_vector<float4> particles;
+	device_vector<KernelParticle> particles;
 
 	/* shaders */
 	device_vector<int4> svm_nodes;
-	device_vector<uint> shader_flag;
-	device_vector<uint> object_flag;
+	device_vector<KernelShader> shaders;
 
 	/* lookup tables */
 	device_vector<float> lookup_table;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index e8d9558c38d..41156038558 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -656,13 +656,13 @@ DeviceRequestedFeatures Session::get_requested_device_features()
 	 */
 	requested_features.use_hair = false;
 	requested_features.use_object_motion = false;
-	requested_features.use_camera_motion = scene->camera->use_motion;
+	requested_features.use_camera_motion = scene->camera->use_motion();
 	foreach(Object *object, scene->objects) {
 		Mesh *mesh = object->mesh;
 		if(mesh->num_curves()) {
 			requested_features.use_hair = true;
 		}
-		requested_features.use_object_motion |= object->use_motion | mesh->use_motion_blur;
+		requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur;
 		requested_features.use_camera_motion |= mesh->use_motion_blur;
 #ifdef WITH_OPENSUBDIV
 		if(mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 578c61a3e79..ec52c51e337 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -432,14 +432,12 @@ void ShaderManager::device_update_common(Device *device,
                                          Scene *scene,
                                          Progress& /*progress*/)
 {
-	dscene->shader_flag.free();
+	dscene->shaders.free();
 
 	if(scene->shaders.size() == 0)
 		return;
 
-	uint shader_flag_size = scene->shaders.size()*SHADER_SIZE;
-	uint *shader_flag = dscene->shader_flag.alloc(shader_flag_size);
-	uint i = 0;
+	KernelShader *kshader = dscene->shaders.alloc(scene->shaders.size());
 	bool has_volumes = false;
 	bool has_transparent_shadow = false;
 
@@ -487,16 +485,17 @@ void ShaderManager::device_update_common(Device *device,
 			flag |= SD_HAS_CONSTANT_EMISSION;
 
 		/* regular shader */
-		shader_flag[i++] = flag;
-		shader_flag[i++] = shader->pass_id;
-		shader_flag[i++] = __float_as_int(constant_emission.x);
-		shader_flag[i++] = __float_as_int(constant_emission.y);
-		shader_flag[i++] = __float_as_int(constant_emission.z);
+		kshader->flags = flag;
+		kshader->pass_id = shader->pass_id;
+		kshader->constant_emission[0] = constant_emission.x;
+		kshader->constant_emission[1] = constant_emission.y;
+		kshader->constant_emission[2] = constant_emission.z;
+		kshader++;
 
 		has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
 	}
 
-	dscene->shader_flag.copy_to_device();
+	dscene->shaders.copy_to_device();
 
 	/* lookup tables */
 	KernelTables *ktables = &dscene->data.tables;
@@ -525,7 +524,7 @@ void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *sce
 {
 	scene->lookup_tables->remove_table(&beckmann_table_offset);
 
-	dscene->shader_flag.free();
+	dscene->shaders.free();
 }
 
 void ShaderManager::add_default(Scene *scene)
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 66c4f22a7e2..24043e2231b 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -67,6 +67,7 @@ set(SRC_HEADERS
 	util_param.h
 	util_path.h
 	util_progress.h
+	util_projection.h
 	util_queue.h
 	util_rect.h
 	util_set.h
diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h
new file mode 100644
index 00000000000..dbcb9877a48
--- /dev/null
+++ b/intern/cycles/util/util_projection.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_PROJECTION_H__
+#define __UTIL_PROJECTION_H__
+
+#include "util/util_transform.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* 4x4 projection matrix, perspective or orthographic. */
+
+typedef struct ProjectionTransform {
+	float4 x, y, z, w; /* rows */
+
+#ifndef __KERNEL_GPU__
+	ProjectionTransform()
+	{
+	}
+
+	explicit ProjectionTransform(const Transform& tfm)
+	: x(tfm.x),
+	  y(tfm.y),
+	  z(tfm.z),
+	  w(make_float4(0.0f, 0.0f, 0.0f, 1.0f))
+	{
+	}
+#endif
+} ProjectionTransform;
+
+typedef struct PerspectiveMotionTransform {
+	ProjectionTransform pre;
+	ProjectionTransform post;
+} PerspectiveMotionTransform;
+
+/* Functions */
+
+ccl_device_inline float3 transform_perspective(const ProjectionTransform *t, const float3 a)
+{
+	float4 b = make_float4(a.x, a.y, a.z, 1.0f);
+	float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
+	float w = dot(t->w, b);
+
+	return (w != 0.0f)? c/w: make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline float3 transform_perspective_direction(const ProjectionTransform *t, const float3 a)
+{
+	float3 c = make_float3(
+		a.x*t->x.x + a.y*t->x.y + a.z*t->x.z,
+		a.x*t->y.x + a.y*t->y.y + a.z*t->y.z,
+		a.x*t->z.x + a.y*t->z.y + a.z*t->z.z);
+
+	return c;
+}
+
+#ifndef __KERNEL_GPU__
+
+ccl_device_inline Transform projection_to_transform(const ProjectionTransform& a)
+{
+	Transform tfm = {a.x, a.y, a.z};
+	return tfm;
+}
+
+ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform& a)
+{
+	ProjectionTransform t;
+
+	t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x;
+	t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y;
+	t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z;
+	t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w;
+
+	return t;
+}
+
+ProjectionTransform projection_inverse(const ProjectionTransform& a);
+
+ccl_device_inline ProjectionTransform make_projection(
+	float a, float b, float c, float d,
+	float e, float f, float g, float h,
+	float i, float j, float k, float l,
+	float m, float n, float o, float p)
+{
+	ProjectionTransform t;
+
+	t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d;
+	t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h;
+	t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l;
+	t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p;
+
+	return t;
+}
+ccl_device_inline ProjectionTransform projection_identity()
+{
+	return make_projection(
+		1.0f, 0.0f, 0.0f, 0.0f,
+		0.0f, 1.0f, 0.0f, 0.0f,
+		0.0f, 0.0f, 1.0f, 0.0f,
+		0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+ccl_device_inline ProjectionTransform operator*(const ProjectionTransform& a, const ProjectionTransform& b)
+{
+	ProjectionTransform c = projection_transpose(b);
+	ProjectionTransform t;
+
+	t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
+	t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
+	t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
+	t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
+
+	return t;
+}
+
+ccl_device_inline ProjectionTransform operator*(const ProjectionTransform& a, const Transform& b)
+{
+	return a * ProjectionTransform(b);
+}
+
+ccl_device_inline ProjectionTransform operator*(const Transform& a, const ProjectionTransform& b)
+{
+	return ProjectionTransform(a) * b;
+}
+
+ccl_device_inline void print_projection(const char *label, const ProjectionTransform& t)
+{
+	print_float4(label, t.x);
+	print_float4(label, t.y);
+	print_float4(label, t.z);
+	print_float4(label, t.w);
+	printf("\n");
+}
+
+ccl_device_inline ProjectionTransform projection_perspective(float fov, float n, float f)
+{
+	ProjectionTransform persp = make_projection(
+		1, 0, 0, 0,
+		0, 1, 0, 0,
+		0, 0, f / (f - n), -f*n / (f - n),
+		0, 0, 1, 0);
+
+	float inv_angle = 1.0f/tanf(0.5f*fov);
+
+	Transform scale = transform_scale(inv_angle, inv_angle, 1);
+
+	return scale * persp;
+}
+
+ccl_device_inline ProjectionTransform projection_orthographic(float znear, float zfar)
+{
+	Transform t =
+		transform_scale(1.0f, 1.0f, 1.0f / (zfar-znear)) *
+		transform_translate(0.0f, 0.0f, -znear);
+
+	return ProjectionTransform(t);
+}
+
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_PROJECTION_H__ */
+
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index c1270545339..206c3da23eb 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -46,6 +46,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "util/util_projection.h"
 #include "util/util_transform.h"
 
 #include "util/util_boundbox.h"
@@ -129,9 +130,9 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
 	return true;
 }
 
-Transform transform_inverse(const Transform& tfm)
+ProjectionTransform projection_inverse(const ProjectionTransform& tfm)
 {
-	Transform tfmR = transform_identity();
+	ProjectionTransform tfmR = projection_identity();
 	float M[4][4], R[4][4];
 
 	memcpy(R, &tfmR, sizeof(R));
@@ -145,7 +146,7 @@ Transform transform_inverse(const Transform& tfm)
 		M[2][2] += 1e-8f;
 
 		if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
-			return transform_identity();
+			return projection_identity();
 		}
 	}
 
@@ -154,6 +155,19 @@ Transform transform_inverse(const Transform& tfm)
 	return tfmR;
 }
 
+Transform transform_inverse(const Transform& tfm)
+{
+	ProjectionTransform projection(tfm);
+	return projection_to_transform(projection_inverse(projection));
+}
+
+Transform transform_transposed_inverse(const Transform& tfm)
+{
+	ProjectionTransform projection(tfm);
+	ProjectionTransform iprojection = projection_inverse(projection);
+	return projection_to_transform(projection_transpose(iprojection));
+}
+
 /* Motion Transform */
 
 float4 transform_to_quat(const Transform& tfm)
@@ -202,14 +216,14 @@ float4 transform_to_quat(const Transform& tfm)
 	return qt;
 }
 
-static void transform_decompose(Transform *decomp, const Transform *tfm)
+static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm)
 {
 	/* extract translation */
 	decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f);
 
 	/* extract rotation */
 	Transform M = *tfm;
-	M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; M.w.w = 1.0f;
+	M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f;
 
 	Transform R = M;
 	float norm;
@@ -217,9 +231,9 @@ static void transform_decompose(Transform *decomp, const Transform *tfm)
 
 	do {
 		Transform Rnext;
-		Transform Rit = transform_inverse(transform_transpose(R));
+		Transform Rit = transform_transposed_inverse(R);
 
-		for(int i = 0; i < 4; i++)
+		for(int i = 0; i < 3; i++)
 			for(int j = 0; j < 4; j++)
 				Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
 		
@@ -247,18 +261,18 @@ static void transform_decompose(Transform *decomp, const Transform *tfm)
 	decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
 }
 
-void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion, const Transform *mid)
+void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size)
 {
-	transform_decompose(&decomp->pre, &motion->pre);
-	transform_decompose(&decomp->mid, mid);
-	transform_decompose(&decomp->post, &motion->post);
-
-	/* ensure rotation around shortest angle, negated quaternions are the same
-	 * but this means we don't have to do the check in quat_interpolate */
-	if(dot(decomp->pre.x, decomp->mid.x) < 0.0f)
-		decomp->pre.x = -decomp->pre.x;
-	if(dot(decomp->mid.x, decomp->post.x) < 0.0f)
-		decomp->mid.x = -decomp->mid.x;
+	for(size_t i = 0; i < size; i++) {
+		transform_decompose(decomp + i, motion + i);
+
+		if(i > 0) {
+			/* Ensure rotation around shortest angle, negated quaternions are the same
+			 * but this means we don't have to do the check in quat_interpolate */
+			if(dot(decomp[i-1].x, decomp[i].x) < 0.0f)
+				decomp[i-1].x = -decomp[i-1].x;
+		}
+	}
 }
 
 Transform transform_from_viewplane(BoundBox2D& viewplane)
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index ac0804a7227..987f4dac777 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -26,10 +26,10 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Data Types */
+/* Affine transformation, stored as 4x3 matrix. */
 
 typedef struct Transform {
-	float4 x, y, z, w; /* rows */
+	float4 x, y, z;
 
 #ifndef __KERNEL_GPU__
 	float4 operator[](int i) const { return *(&x + i); }
@@ -37,32 +37,16 @@ typedef struct Transform {
 #endif
 } Transform;
 
-/* transform decomposed in rotation/translation/scale. we use the same data
+/* Transform decomposed in rotation/translation/scale. we use the same data
  * structure as Transform, and tightly pack decomposition into it. first the
  * rotation (4), then translation (3), then 3x3 scale matrix (9). */
 
-typedef struct ccl_may_alias MotionTransform {
-	Transform pre;
-	Transform mid;
-	Transform post;
-} MotionTransform;
-
-typedef struct PerspectiveMotionTransform {
-	Transform pre;
-	Transform post;
-} PerspectiveMotionTransform;
+typedef struct DecomposedTransform {
+	float4 x, y, z, w;
+} DecomposedTransform;
 
 /* Functions */
 
-ccl_device_inline float3 transform_perspective(const Transform *t, const float3 a)
-{
-	float4 b = make_float4(a.x, a.y, a.z, 1.0f);
-	float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
-	float w = dot(t->w, b);
-
-	return (w != 0.0f)? c/w: make_float3(0.0f, 0.0f, 0.0f);
-}
-
 ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 {
 	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
@@ -73,7 +57,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 	x = _mm_loadu_ps(&t->x.x);
 	y = _mm_loadu_ps(&t->y.x);
 	z = _mm_loadu_ps(&t->z.x);
-	w = _mm_loadu_ps(&t->w.x);
+	w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
 
 	_MM_TRANSPOSE4_PS(x, y, z, w);
 
@@ -129,29 +113,15 @@ ccl_device_inline float3 transform_direction_transposed(const Transform *t, cons
 	return make_float3(dot(x, a), dot(y, a), dot(z, a));
 }
 
-ccl_device_inline Transform transform_transpose(const Transform a)
-{
-	Transform t;
-
-	t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x;
-	t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y;
-	t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z;
-	t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w;
-
-	return t;
-}
-
 ccl_device_inline Transform make_transform(float a, float b, float c, float d,
                                            float e, float f, float g, float h,
-                                           float i, float j, float k, float l,
-                                           float m, float n, float o, float p)
+                                           float i, float j, float k, float l)
 {
 	Transform t;
 
 	t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d;
 	t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h;
 	t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l;
-	t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p;
 
 	return t;
 }
@@ -165,21 +135,22 @@ ccl_device_inline Transform make_transform_frame(float3 N)
 	const float3 dy = normalize(cross(N, dx));
 	return make_transform(dx.x, dx.y, dx.z, 0.0f,
 	                      dy.x, dy.y, dy.z, 0.0f,
-	                      N.x , N.y,  N.z,  0.0f,
-	                      0.0f, 0.0f, 0.0f, 1.0f);
+	                      N.x , N.y,  N.z,  0.0f);
 }
 
 #ifndef __KERNEL_GPU__
 
 ccl_device_inline Transform operator*(const Transform a, const Transform b)
 {
-	Transform c = transform_transpose(b);
-	Transform t;
+	float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
+	float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
+	float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
+	float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
 
-	t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
-	t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
-	t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
-	t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
+	Transform t;
+	t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
+	t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
+	t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
 
 	return t;
 }
@@ -189,7 +160,6 @@ ccl_device_inline void print_transform(const char *label, const Transform& t)
 	print_float4(label, t.x);
 	print_float4(label, t.y);
 	print_float4(label, t.z);
-	print_float4(label, t.w);
 	printf("\n");
 }
 
@@ -198,8 +168,7 @@ ccl_device_inline Transform transform_translate(float3 t)
 	return make_transform(
 		1, 0, 0, t.x,
 		0, 1, 0, t.y,
-		0, 0, 1, t.z,
-		0, 0, 0, 1);
+		0, 0, 1, t.z);
 }
 
 ccl_device_inline Transform transform_translate(float x, float y, float z)
@@ -212,8 +181,7 @@ ccl_device_inline Transform transform_scale(float3 s)
 	return make_transform(
 		s.x, 0, 0, 0,
 		0, s.y, 0, 0,
-		0, 0, s.z, 0,
-		0, 0, 0, 1);
+		0, 0, s.z, 0);
 }
 
 ccl_device_inline Transform transform_scale(float x, float y, float z)
@@ -221,21 +189,6 @@ ccl_device_inline Transform transform_scale(float x, float y, float z)
 	return transform_scale(make_float3(x, y, z));
 }
 
-ccl_device_inline Transform transform_perspective(float fov, float n, float f)
-{
-	Transform persp = make_transform(
-		1, 0, 0, 0,
-		0, 1, 0, 0,
-		0, 0, f / (f - n), -f*n / (f - n),
-		0, 0, 1, 0);
-
-	float inv_angle = 1.0f/tanf(0.5f*fov);
-
-	Transform scale = transform_scale(inv_angle, inv_angle, 1);
-
-	return scale * persp;
-}
-
 ccl_device_inline Transform transform_rotate(float angle, float3 axis)
 {
 	float s = sinf(angle);
@@ -258,9 +211,7 @@ ccl_device_inline Transform transform_rotate(float angle, float3 axis)
 		axis.z*axis.x*t - s*axis.y,
 		axis.z*axis.y*t + s*axis.x,
 		axis.z*axis.z*t + c,
-		0.0f,
-
-		0.0f, 0.0f, 0.0f, 1.0f);
+		0.0f);
 }
 
 /* Euler is assumed to be in XYZ order. */
@@ -272,12 +223,6 @@ ccl_device_inline Transform transform_euler(float3 euler)
 		transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
 }
 
-ccl_device_inline Transform transform_orthographic(float znear, float zfar)
-{
-	return transform_scale(1.0f, 1.0f, 1.0f / (zfar-znear)) *
-		transform_translate(0.0f, 0.0f, -znear);
-}
-
 ccl_device_inline Transform transform_identity()
 {
 	return transform_scale(1.0f, 1.0f, 1.0f);
@@ -306,20 +251,20 @@ ccl_device_inline void transform_set_column(Transform *t, int column, float3 val
 }
 
 Transform transform_inverse(const Transform& a);
+Transform transform_transposed_inverse(const Transform& a);
 
 ccl_device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
 {
 	/* the epsilon here is quite arbitrary, but this function is only used for
-	 * surface area and bump, where we except it to not be so sensitive */
-	Transform ttfm = transform_transpose(tfm);
+	 * surface area and bump, where we expect it to not be so sensitive */
 	float eps = 1e-6f;
 	
 	float sx = len_squared(float4_to_float3(tfm.x));
 	float sy = len_squared(float4_to_float3(tfm.y));
 	float sz = len_squared(float4_to_float3(tfm.z));
-	float stx = len_squared(float4_to_float3(ttfm.x));
-	float sty = len_squared(float4_to_float3(ttfm.y));
-	float stz = len_squared(float4_to_float3(ttfm.z));
+	float stx = len_squared(transform_get_column(&tfm, 0));
+	float sty = len_squared(transform_get_column(&tfm, 1));
+	float stz = len_squared(transform_get_column(&tfm, 2));
 
 	if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
 	   fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
@@ -357,7 +302,6 @@ ccl_device_inline Transform transform_empty()
 	return make_transform(
 		0, 0, 0, 0,
 		0, 0, 0, 0,
-		0, 0, 0, 0,
 		0, 0, 0, 0);
 }
 
@@ -414,12 +358,11 @@ ccl_device_inline Transform transform_quick_inverse(Transform M)
 	R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
 	R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
 	R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
-	R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 
 	return R;
 }
 
-ccl_device_inline void transform_compose(Transform *tfm, const Transform *decomp)
+ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
 {
 	/* rotation */
 	float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
@@ -452,60 +395,30 @@ ccl_device_inline void transform_compose(Transform *tfm, const Transform *decomp
 	tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
 	tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
 	tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
-	tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 }
 
-/* Disabled for now, need arc-length parametrization for constant speed motion.
- * #define CURVED_MOTION_INTERPOLATE */
-
-ccl_device void transform_motion_interpolate(Transform *tfm, const MotionTransform *motion, float t)
+/* Interpolate from array of decomposed transforms. */
+ccl_device void transform_motion_array_interpolate(Transform *tfm,
+                                                   const ccl_global DecomposedTransform *motion,
+                                                   uint numsteps,
+                                                   float time)
 {
-	/* possible optimization: is it worth it adding a check to skip scaling?
-	 * it's probably quite uncommon to have scaling objects. or can we skip
-	 * just shearing perhaps? */
-	Transform decomp;
-
-#ifdef CURVED_MOTION_INTERPOLATE
-	/* 3 point bezier curve interpolation for position */
-	float3 Ppre = float4_to_float3(motion->pre.y);
-	float3 Pmid = float4_to_float3(motion->mid.y);
-	float3 Ppost = float4_to_float3(motion->post.y);
-
-	float3 Pcontrol = 2.0f*Pmid - 0.5f*(Ppre + Ppost);
-	float3 P = Ppre*t*t + Pcontrol*2.0f*t*(1.0f - t) + Ppost*(1.0f - t)*(1.0f - t);
-
-	decomp.y.x = P.x;
-	decomp.y.y = P.y;
-	decomp.y.z = P.z;
-#endif
-
-	/* linear interpolation for rotation and scale */
-	if(t < 0.5f) {
-		t *= 2.0f;
-
-		decomp.x = quat_interpolate(motion->pre.x, motion->mid.x, t);
-#ifdef CURVED_MOTION_INTERPOLATE
-		decomp.y.w = (1.0f - t)*motion->pre.y.w + t*motion->mid.y.w;
-#else
-		decomp.y = (1.0f - t)*motion->pre.y + t*motion->mid.y;
-#endif
-		decomp.z = (1.0f - t)*motion->pre.z + t*motion->mid.z;
-		decomp.w = (1.0f - t)*motion->pre.w + t*motion->mid.w;
-	}
-	else {
-		t = (t - 0.5f)*2.0f;
-
-		decomp.x = quat_interpolate(motion->mid.x, motion->post.x, t);
-#ifdef CURVED_MOTION_INTERPOLATE
-		decomp.y.w = (1.0f - t)*motion->mid.y.w + t*motion->post.y.w;
-#else
-		decomp.y = (1.0f - t)*motion->mid.y + t*motion->post.y;
-#endif
-		decomp.z = (1.0f - t)*motion->mid.z + t*motion->post.z;
-		decomp.w = (1.0f - t)*motion->mid.w + t*motion->post.w;
-	}
-
-	/* compose rotation, translation, scale into matrix */
+	/* Figure out which steps we need to interpolate. */
+	int maxstep = numsteps-1;
+	int step = min((int)(time*maxstep), maxstep-1);
+	float t = time*maxstep - step;
+
+	const ccl_global DecomposedTransform *a = motion + step;
+	const ccl_global DecomposedTransform *b = motion + step + 1;
+
+	/* Interpolate rotation, translation and scale. */
+	DecomposedTransform decomp;
+	decomp.x = quat_interpolate(a->x, b->x, t);
+	decomp.y = (1.0f - t)*a->y + t*b->y;
+	decomp.z = (1.0f - t)*a->z + t*b->z;
+	decomp.w = (1.0f - t)*a->w + t*b->w;
+
+	/* Compose rotation, translation, scale into matrix. */
 	transform_compose(tfm, &decomp);
 }
 
@@ -513,13 +426,13 @@ ccl_device void transform_motion_interpolate(Transform *tfm, const MotionTransfo
 
 class BoundBox2D;
 
-ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransform& B)
+ccl_device_inline bool operator==(const DecomposedTransform& A, const DecomposedTransform& B)
 {
-	return (A.pre == B.pre && A.post == B.post);
+	return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
 }
 
 float4 transform_to_quat(const Transform& tfm);
-void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion, const Transform *mid);
+void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
 Transform transform_from_viewplane(BoundBox2D& viewplane);
 
 #endif
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 625c19c7c46..e98e4e34181 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -215,6 +215,18 @@ public:
 		return data_;
 	}
 
+	T* resize(size_t newsize, const T& value)
+	{
+		size_t oldsize = size();
+		resize(newsize);
+
+		for(size_t i = oldsize; i < size(); i++) {
+			data_[i] = value;
+		}
+
+		return data_;
+	}
+
 	void clear()
 	{
 		if(data_ != NULL) {
author	Brecht Van Lommel <brechtvanlommel@gmail.com>	2018-03-10 08:55:39 +0300
committer	Brecht Van Lommel <brechtvanlommel@gmail.com>	2018-03-10 08:55:39 +0300
commit	d27158aae9bc48b2a07760a2dbe8e642fcecbe57 (patch)
tree	7ecfe898e53f07e589ee4be0ea5b297153d7dfcf /intern/cycles
parent	f3161bd2abe4bcc41f0e9169275be315ecc6b054 (diff)
parent	8a76f8dac3475b1d24956e0d384d65295f15c76a (diff)