3 files changed, 262 insertions, 33 deletions
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 53c1302b4a1..f09803d8b09 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -55,6 +55,10 @@ CCL_NAMESPACE_BEGIN
 #ifndef M_2_PI_F
 #define M_2_PI_F	((float)0.636619772367581343075535053490057448)
 #endif
+#ifndef M_SQRT2_F
+#define M_SQRT2_F	((float)1.41421356237309504880)
+#endif
+
 
 /* Scalar */
 
@@ -719,6 +723,45 @@ __device_inline float4 cross(const float4& a, const float4& b)
 #endif
 }
 
+__device_inline bool is_zero(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	return a == make_float4(0.0f);
+#else
+	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+#endif
+}
+
+__device_inline float reduce_add(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 h = shuffle<1,0,3,2>(a) + a;
+	return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */
+#else
+	return ((a.x + a.y) + (a.z + a.w));
+#endif
+}
+
+__device_inline float average(const float4& a)
+{
+	return reduce_add(a) * 0.25f;
+}
+
+__device_inline float dot(const float4& a, const float4& b)
+{
+	return reduce_add(a * b);
+}
+
+__device_inline float len(const float4 a)
+{
+	return sqrtf(dot(a, a));
+}
+
+__device_inline float4 normalize(const float4 a)
+{
+	return a/len(a);
+}
+
 __device_inline float4 min(float4 a, float4 b)
 {
 #ifdef __KERNEL_SSE__
@@ -790,39 +833,6 @@ __device_inline void print_float4(const char *label, const float4& a)
 
 #endif
 
-#ifndef __KERNEL_OPENCL__
-
-__device_inline bool is_zero(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	return a == make_float4(0.0f);
-#else
-	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
-#endif
-}
-
-__device_inline float reduce_add(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 h = shuffle<1,0,3,2>(a) + a;
-	return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */
-#else
-	return ((a.x + a.y) + (a.z + a.w));
-#endif
-}
-
-__device_inline float average(const float4& a)
-{
-	return reduce_add(a) * 0.25f;
-}
-
-__device_inline float dot(const float4& a, const float4& b)
-{
-	return reduce_add(a * b);
-}
-
-#endif
-
 /* Int3 */
 
 #ifndef __KERNEL_OPENCL__
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 0fd26825911..1780994da27 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -53,6 +53,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Transform Inverse */
+
 static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
 {
 	/* forward elimination */
@@ -151,5 +153,104 @@ Transform transform_inverse(const Transform& tfm)
 	return tfmR;
 }
 
+/* Motion Transform */
+
+static float4 transform_to_quat(const Transform& tfm)
+{
+	double trace = tfm[0][0] + tfm[1][1] + tfm[2][2];
+	float4 qt;
+
+	if(trace > 0.0f) {
+		double s = sqrt(trace + 1.0);
+
+		qt.w = (float)(s/2.0);
+		s = 0.5/s;
+
+		qt.x = (float)((tfm[2][1] - tfm[1][2]) * s);
+		qt.y = (float)((tfm[0][2] - tfm[2][0]) * s);
+		qt.z = (float)((tfm[1][0] - tfm[0][1]) * s);
+	}
+	else {
+		int i = 0;
+
+		if(tfm[1][1] > tfm[i][i])
+			i = 1;
+		if(tfm[2][2] > tfm[i][i])
+			i = 2;
+
+		int j = (i + 1)%3;
+		int k = (j + 1)%3;
+
+		double s = sqrt((tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0);
+
+		double q[3];
+		q[i] = s * 0.5;
+		if(s != 0.0)
+			s = 0.5/s;
+
+		double w = (tfm[k][j] - tfm[j][k]) * s;
+		q[j] = (tfm[j][i] + tfm[i][j]) * s;
+		q[k] = (tfm[k][i] + tfm[i][k]) * s;
+
+		qt.x = (float)q[0];
+		qt.y = (float)q[1];
+		qt.z = (float)q[2];
+		qt.w = (float)w;
+	}
+
+	return qt;
+}
+
+static void transform_decompose(Transform *decomp, const Transform *tfm)
+{
+	/* extract translation */
+	decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f);
+
+	/* extract rotation */
+	Transform M = *tfm;
+	M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; M.w.w = 1.0f;
+
+	Transform R = M;
+	float norm;
+	int iteration = 0;
+
+	do {
+		Transform Rnext;
+		Transform Rit = transform_inverse(transform_transpose(R));
+
+		for(int i = 0; i < 4; i++)
+			for(int j = 0; j < 4; j++)
+				Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
+		
+		norm = 0.0f;
+		for(int i = 0; i < 3; i++) {
+			norm = max(norm,
+				fabsf(R[i][0] - Rnext[i][0]) +
+				fabsf(R[i][1] - Rnext[i][1]) +
+				fabsf(R[i][2] - Rnext[i][2]));
+		}
+
+		R = Rnext;
+		iteration++;
+	} while(iteration < 100 && norm > 1e-4f);
+
+	if(transform_negative_scale(R))
+		R = R * transform_scale(-1.0f, -1.0f, -1.0f); /* todo: test scale */
+
+	decomp->x = transform_to_quat(R);
+
+	/* extract scale and pack it */
+	Transform scale = transform_inverse(R) * M;
+	decomp->y.w = scale.x.x;
+	decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y);
+	decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
+}
+
+void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion)
+{
+	transform_decompose(&decomp->pre, &motion->pre);
+	transform_decompose(&decomp->post, &motion->post);
+}
+
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index aeaef7b0e21..03dfbaa441d 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -28,6 +28,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Data Types */
+
 typedef struct Transform {
 	float4 x, y, z, w; /* rows */
 
@@ -37,6 +39,17 @@ typedef struct Transform {
 #endif
 } Transform;
 
+typedef struct MotionTransform {
+	Transform pre;
+	Transform post;
+} MotionTransform;
+
+/* transform decomposed in rotation/translation/scale. we use the same data
+ * structure as Transform, and tightly pack decomposition into it. first the
+ * rotation (4), then translation (3), then 3x3 scale matrix (9) */
+
+/* Functions */
+
 __device_inline float3 transform_perspective(const Transform *t, const float3 a)
 {
 	float4 b = make_float4(a.x, a.y, a.z, 1.0f);
@@ -62,6 +75,15 @@ __device_inline float3 transform_direction(const Transform *t, const float3 a)
 	return c;
 }
 
+__device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
+{
+	float3 x = make_float3(t->x.x, t->y.x, t->z.x);
+	float3 y = make_float3(t->x.y, t->y.y, t->z.y);
+	float3 z = make_float3(t->x.z, t->y.z, t->z.z);
+
+	return make_float3(dot(x, a), dot(y, a), dot(z, a));
+}
+
 #ifndef __KERNEL_GPU__
 
 __device_inline void print_transform(const char *label, const Transform& t)
@@ -272,6 +294,102 @@ __device_inline Transform transform_clear_scale(const Transform& tfm)
 
 #endif
 
+/* Motion Transform */
+
+__device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
+{
+	float costheta = dot(q1, q2);
+
+	if(costheta > 0.9995f) {
+		return normalize((1.0f - t)*q1 + t*q2);
+	}
+	else  {
+		float theta = acosf(clamp(costheta, -1.0f, 1.0f));
+		float thetap = theta * t;
+		float4 qperp = normalize(q2 - q1 * costheta);
+		return q1 * cosf(thetap) + qperp * sinf(thetap);
+	}
+}
+
+__device_inline Transform transform_quick_inverse(Transform M)
+{
+	Transform R;
+	float det = M.x.x*(M.z.z*M.y.y - M.z.y*M.y.z) - M.y.x*(M.z.z*M.x.y - M.z.y*M.x.z) + M.z.x*(M.y.z*M.x.y - M.y.y*M.x.z);
+
+	det = (det != 0.0f)? 1.0f/det: 0.0f;
+
+	float3 Rx = det*make_float3(M.z.z*M.y.y - M.z.y*M.y.z, M.z.y*M.x.z - M.z.z*M.x.y, M.y.z*M.x.y - M.y.y*M.x.z);
+	float3 Ry = det*make_float3(M.z.x*M.y.z - M.z.z*M.y.x, M.z.z*M.x.x - M.z.x*M.x.z, M.y.x*M.x.z - M.y.z*M.x.x);
+	float3 Rz = det*make_float3(M.z.y*M.y.x - M.z.x*M.y.y, M.z.x*M.x.y - M.z.y*M.x.x, M.y.y*M.x.x - M.y.x*M.x.y);
+	float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
+
+	R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
+	R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
+	R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
+	R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+
+	return R;
+}
+
+__device_inline void transform_compose(Transform *tfm, const Transform *decomp)
+{
+	/* rotation */
+	float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
+
+	q0 = M_SQRT2_F * decomp->x.w;
+	q1 = M_SQRT2_F * decomp->x.x;
+	q2 = M_SQRT2_F * decomp->x.y;
+	q3 = M_SQRT2_F * decomp->x.z;
+
+	qda = q0*q1;
+	qdb = q0*q2;
+	qdc = q0*q3;
+	qaa = q1*q1;
+	qab = q1*q2;
+	qac = q1*q3;
+	qbb = q2*q2;
+	qbc = q2*q3;
+	qcc = q3*q3;
+
+	float3 rotation_x = make_float3(1.0f-qbb-qcc, -qdc+qab, qdb+qac);
+	float3 rotation_y = make_float3(qdc+qab, 1.0f-qaa-qcc, -qda+qbc);
+	float3 rotation_z = make_float3(-qdb+qac, qda+qbc, 1.0f-qaa-qbb);
+
+	/* scale */
+	float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
+	float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
+	float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
+
+	/* compose with translation */
+	tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
+	tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
+	tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
+	tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+__device void transform_motion_interpolate(Transform *tfm, const MotionTransform *motion, float t)
+{
+	Transform decomp;
+
+	decomp.x = quat_interpolate(motion->pre.x, motion->post.x, t);
+	decomp.y = (1.0f - t)*motion->pre.y + t*motion->post.y;
+	decomp.z = (1.0f - t)*motion->pre.z + t*motion->post.z;
+	decomp.w = (1.0f - t)*motion->pre.w + t*motion->post.w;
+
+	transform_compose(tfm, &decomp);
+}
+
+#ifndef __KERNEL_GPU__
+
+__device_inline bool operator==(const MotionTransform& A, const MotionTransform& B)
+{
+	return (A.pre == B.pre && A.post == B.post);
+}
+
+void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion);
+
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_TRANSFORM_H__ */