1 files changed, 50 insertions, 137 deletions
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index ac0804a7227..987f4dac777 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -26,10 +26,10 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Data Types */
+/* Affine transformation, stored as 4x3 matrix. */
 
 typedef struct Transform {
-	float4 x, y, z, w; /* rows */
+	float4 x, y, z;
 
 #ifndef __KERNEL_GPU__
 	float4 operator[](int i) const { return *(&x + i); }
@@ -37,32 +37,16 @@ typedef struct Transform {
 #endif
 } Transform;
 
-/* transform decomposed in rotation/translation/scale. we use the same data
+/* Transform decomposed in rotation/translation/scale. we use the same data
  * structure as Transform, and tightly pack decomposition into it. first the
  * rotation (4), then translation (3), then 3x3 scale matrix (9). */
 
-typedef struct ccl_may_alias MotionTransform {
-	Transform pre;
-	Transform mid;
-	Transform post;
-} MotionTransform;
-
-typedef struct PerspectiveMotionTransform {
-	Transform pre;
-	Transform post;
-} PerspectiveMotionTransform;
+typedef struct DecomposedTransform {
+	float4 x, y, z, w;
+} DecomposedTransform;
 
 /* Functions */
 
-ccl_device_inline float3 transform_perspective(const Transform *t, const float3 a)
-{
-	float4 b = make_float4(a.x, a.y, a.z, 1.0f);
-	float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
-	float w = dot(t->w, b);
-
-	return (w != 0.0f)? c/w: make_float3(0.0f, 0.0f, 0.0f);
-}
-
 ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 {
 	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
@@ -73,7 +57,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 	x = _mm_loadu_ps(&t->x.x);
 	y = _mm_loadu_ps(&t->y.x);
 	z = _mm_loadu_ps(&t->z.x);
-	w = _mm_loadu_ps(&t->w.x);
+	w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
 
 	_MM_TRANSPOSE4_PS(x, y, z, w);
 
@@ -129,29 +113,15 @@ ccl_device_inline float3 transform_direction_transposed(const Transform *t, cons
 	return make_float3(dot(x, a), dot(y, a), dot(z, a));
 }
 
-ccl_device_inline Transform transform_transpose(const Transform a)
-{
-	Transform t;
-
-	t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x;
-	t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y;
-	t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z;
-	t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w;
-
-	return t;
-}
-
 ccl_device_inline Transform make_transform(float a, float b, float c, float d,
                                            float e, float f, float g, float h,
-                                           float i, float j, float k, float l,
-                                           float m, float n, float o, float p)
+                                           float i, float j, float k, float l)
 {
 	Transform t;
 
 	t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d;
 	t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h;
 	t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l;
-	t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p;
 
 	return t;
 }
@@ -165,21 +135,22 @@ ccl_device_inline Transform make_transform_frame(float3 N)
 	const float3 dy = normalize(cross(N, dx));
 	return make_transform(dx.x, dx.y, dx.z, 0.0f,
 	                      dy.x, dy.y, dy.z, 0.0f,
-	                      N.x , N.y,  N.z,  0.0f,
-	                      0.0f, 0.0f, 0.0f, 1.0f);
+	                      N.x , N.y,  N.z,  0.0f);
 }
 
 #ifndef __KERNEL_GPU__
 
 ccl_device_inline Transform operator*(const Transform a, const Transform b)
 {
-	Transform c = transform_transpose(b);
-	Transform t;
+	float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
+	float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
+	float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
+	float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
 
-	t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w));
-	t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w));
-	t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w));
-	t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w));
+	Transform t;
+	t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
+	t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
+	t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
 
 	return t;
 }
@@ -189,7 +160,6 @@ ccl_device_inline void print_transform(const char *label, const Transform& t)
 	print_float4(label, t.x);
 	print_float4(label, t.y);
 	print_float4(label, t.z);
-	print_float4(label, t.w);
 	printf("\n");
 }
 
@@ -198,8 +168,7 @@ ccl_device_inline Transform transform_translate(float3 t)
 	return make_transform(
 		1, 0, 0, t.x,
 		0, 1, 0, t.y,
-		0, 0, 1, t.z,
-		0, 0, 0, 1);
+		0, 0, 1, t.z);
 }
 
 ccl_device_inline Transform transform_translate(float x, float y, float z)
@@ -212,8 +181,7 @@ ccl_device_inline Transform transform_scale(float3 s)
 	return make_transform(
 		s.x, 0, 0, 0,
 		0, s.y, 0, 0,
-		0, 0, s.z, 0,
-		0, 0, 0, 1);
+		0, 0, s.z, 0);
 }
 
 ccl_device_inline Transform transform_scale(float x, float y, float z)
@@ -221,21 +189,6 @@ ccl_device_inline Transform transform_scale(float x, float y, float z)
 	return transform_scale(make_float3(x, y, z));
 }
 
-ccl_device_inline Transform transform_perspective(float fov, float n, float f)
-{
-	Transform persp = make_transform(
-		1, 0, 0, 0,
-		0, 1, 0, 0,
-		0, 0, f / (f - n), -f*n / (f - n),
-		0, 0, 1, 0);
-
-	float inv_angle = 1.0f/tanf(0.5f*fov);
-
-	Transform scale = transform_scale(inv_angle, inv_angle, 1);
-
-	return scale * persp;
-}
-
 ccl_device_inline Transform transform_rotate(float angle, float3 axis)
 {
 	float s = sinf(angle);
@@ -258,9 +211,7 @@ ccl_device_inline Transform transform_rotate(float angle, float3 axis)
 		axis.z*axis.x*t - s*axis.y,
 		axis.z*axis.y*t + s*axis.x,
 		axis.z*axis.z*t + c,
-		0.0f,
-
-		0.0f, 0.0f, 0.0f, 1.0f);
+		0.0f);
 }
 
 /* Euler is assumed to be in XYZ order. */
@@ -272,12 +223,6 @@ ccl_device_inline Transform transform_euler(float3 euler)
 		transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
 }
 
-ccl_device_inline Transform transform_orthographic(float znear, float zfar)
-{
-	return transform_scale(1.0f, 1.0f, 1.0f / (zfar-znear)) *
-		transform_translate(0.0f, 0.0f, -znear);
-}
-
 ccl_device_inline Transform transform_identity()
 {
 	return transform_scale(1.0f, 1.0f, 1.0f);
@@ -306,20 +251,20 @@ ccl_device_inline void transform_set_column(Transform *t, int column, float3 val
 }
 
 Transform transform_inverse(const Transform& a);
+Transform transform_transposed_inverse(const Transform& a);
 
 ccl_device_inline bool transform_uniform_scale(const Transform& tfm, float& scale)
 {
 	/* the epsilon here is quite arbitrary, but this function is only used for
-	 * surface area and bump, where we except it to not be so sensitive */
-	Transform ttfm = transform_transpose(tfm);
+	 * surface area and bump, where we expect it to not be so sensitive */
 	float eps = 1e-6f;
 	
 	float sx = len_squared(float4_to_float3(tfm.x));
 	float sy = len_squared(float4_to_float3(tfm.y));
 	float sz = len_squared(float4_to_float3(tfm.z));
-	float stx = len_squared(float4_to_float3(ttfm.x));
-	float sty = len_squared(float4_to_float3(ttfm.y));
-	float stz = len_squared(float4_to_float3(ttfm.z));
+	float stx = len_squared(transform_get_column(&tfm, 0));
+	float sty = len_squared(transform_get_column(&tfm, 1));
+	float stz = len_squared(transform_get_column(&tfm, 2));
 
 	if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps &&
 	   fabsf(sx - stx) < eps && fabsf(sx - sty) < eps &&
@@ -357,7 +302,6 @@ ccl_device_inline Transform transform_empty()
 	return make_transform(
 		0, 0, 0, 0,
 		0, 0, 0, 0,
-		0, 0, 0, 0,
 		0, 0, 0, 0);
 }
 
@@ -414,12 +358,11 @@ ccl_device_inline Transform transform_quick_inverse(Transform M)
 	R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
 	R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
 	R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
-	R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 
 	return R;
 }
 
-ccl_device_inline void transform_compose(Transform *tfm, const Transform *decomp)
+ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp)
 {
 	/* rotation */
 	float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
@@ -452,60 +395,30 @@ ccl_device_inline void transform_compose(Transform *tfm, const Transform *decomp
 	tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
 	tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
 	tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
-	tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
 }
 
-/* Disabled for now, need arc-length parametrization for constant speed motion.
- * #define CURVED_MOTION_INTERPOLATE */
-
-ccl_device void transform_motion_interpolate(Transform *tfm, const MotionTransform *motion, float t)
+/* Interpolate from array of decomposed transforms. */
+ccl_device void transform_motion_array_interpolate(Transform *tfm,
+                                                   const ccl_global DecomposedTransform *motion,
+                                                   uint numsteps,
+                                                   float time)
 {
-	/* possible optimization: is it worth it adding a check to skip scaling?
-	 * it's probably quite uncommon to have scaling objects. or can we skip
-	 * just shearing perhaps? */
-	Transform decomp;
-
-#ifdef CURVED_MOTION_INTERPOLATE
-	/* 3 point bezier curve interpolation for position */
-	float3 Ppre = float4_to_float3(motion->pre.y);
-	float3 Pmid = float4_to_float3(motion->mid.y);
-	float3 Ppost = float4_to_float3(motion->post.y);
-
-	float3 Pcontrol = 2.0f*Pmid - 0.5f*(Ppre + Ppost);
-	float3 P = Ppre*t*t + Pcontrol*2.0f*t*(1.0f - t) + Ppost*(1.0f - t)*(1.0f - t);
-
-	decomp.y.x = P.x;
-	decomp.y.y = P.y;
-	decomp.y.z = P.z;
-#endif
-
-	/* linear interpolation for rotation and scale */
-	if(t < 0.5f) {
-		t *= 2.0f;
-
-		decomp.x = quat_interpolate(motion->pre.x, motion->mid.x, t);
-#ifdef CURVED_MOTION_INTERPOLATE
-		decomp.y.w = (1.0f - t)*motion->pre.y.w + t*motion->mid.y.w;
-#else
-		decomp.y = (1.0f - t)*motion->pre.y + t*motion->mid.y;
-#endif
-		decomp.z = (1.0f - t)*motion->pre.z + t*motion->mid.z;
-		decomp.w = (1.0f - t)*motion->pre.w + t*motion->mid.w;
-	}
-	else {
-		t = (t - 0.5f)*2.0f;
-
-		decomp.x = quat_interpolate(motion->mid.x, motion->post.x, t);
-#ifdef CURVED_MOTION_INTERPOLATE
-		decomp.y.w = (1.0f - t)*motion->mid.y.w + t*motion->post.y.w;
-#else
-		decomp.y = (1.0f - t)*motion->mid.y + t*motion->post.y;
-#endif
-		decomp.z = (1.0f - t)*motion->mid.z + t*motion->post.z;
-		decomp.w = (1.0f - t)*motion->mid.w + t*motion->post.w;
-	}
-
-	/* compose rotation, translation, scale into matrix */
+	/* Figure out which steps we need to interpolate. */
+	int maxstep = numsteps-1;
+	int step = min((int)(time*maxstep), maxstep-1);
+	float t = time*maxstep - step;
+
+	const ccl_global DecomposedTransform *a = motion + step;
+	const ccl_global DecomposedTransform *b = motion + step + 1;
+
+	/* Interpolate rotation, translation and scale. */
+	DecomposedTransform decomp;
+	decomp.x = quat_interpolate(a->x, b->x, t);
+	decomp.y = (1.0f - t)*a->y + t*b->y;
+	decomp.z = (1.0f - t)*a->z + t*b->z;
+	decomp.w = (1.0f - t)*a->w + t*b->w;
+
+	/* Compose rotation, translation, scale into matrix. */
 	transform_compose(tfm, &decomp);
 }
 
@@ -513,13 +426,13 @@ ccl_device void transform_motion_interpolate(Transform *tfm, const MotionTransfo
 
 class BoundBox2D;
 
-ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransform& B)
+ccl_device_inline bool operator==(const DecomposedTransform& A, const DecomposedTransform& B)
 {
-	return (A.pre == B.pre && A.post == B.post);
+	return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
 }
 
 float4 transform_to_quat(const Transform& tfm);
-void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion, const Transform *mid);
+void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size);
 Transform transform_from_viewplane(BoundBox2D& viewplane);
 
 #endif