Cycles: support for motion vector and UV passes.

Most of the changes are related to adding support for motion data throughout the code. There's some code for actual camera/object motion blur raytracing but it's unfinished (it badly slows down the raytracing kernel even when the option is turned off), so that code it disabled still. Motion vector export from Blender tries to avoid computing derived meshes when the mesh does not have a deforming modifier, and it also won't store motion vectors for every vertex if only the object or camera is moving.
author: Brecht Van Lommel <brechtvanlommel@pandora.be> 2012-04-30 16:49:26 +0400
committer: Brecht Van Lommel <brechtvanlommel@pandora.be> 2012-04-30 16:49:26 +0400
commit: 1d8c79818870b92df46c443d7778438aa67d019c (patch)
tree: ba3fc305671261e40851d8a230a33ebe19396e95 /intern/cycles/util
parent: 796dd8a321108df26757fb9df5c2aa6eb42c9633 (diff)
3 files changed, 262 insertions, 33 deletions
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 53c1302b4a1..f09803d8b09 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -55,6 +55,10 @@ CCL_NAMESPACE_BEGIN
 #ifndef M_2_PI_F
 #define M_2_PI_F	((float)0.636619772367581343075535053490057448)
 #endif
+#ifndef M_SQRT2_F
+#define M_SQRT2_F	((float)1.41421356237309504880)
+#endif
+
 
 /* Scalar */
 
@@ -719,6 +723,45 @@ __device_inline float4 cross(const float4& a, const float4& b)
 #endif
 }
 
+__device_inline bool is_zero(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	return a == make_float4(0.0f);
+#else
+	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+#endif
+}
+
+__device_inline float reduce_add(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 h = shuffle<1,0,3,2>(a) + a;
+	return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */
+#else
+	return ((a.x + a.y) + (a.z + a.w));
+#endif
+}
+
+__device_inline float average(const float4& a)
+{
+	return reduce_add(a) * 0.25f;
+}
+
+__device_inline float dot(const float4& a, const float4& b)
+{
+	return reduce_add(a * b);
+}
+
+__device_inline float len(const float4 a)
+{
+	return sqrtf(dot(a, a));
+}
+
+__device_inline float4 normalize(const float4 a)
+{
+	return a/len(a);
+}
+
 __device_inline float4 min(float4 a, float4 b)
 {
 #ifdef __KERNEL_SSE__
@@ -790,39 +833,6 @@ __device_inline void print_float4(const char *label, const float4& a)
 
 #endif
 
-#ifndef __KERNEL_OPENCL__
-
-__device_inline bool is_zero(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	return a == make_float4(0.0f);
-#else
-	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
-#endif
-}
-
-__device_inline float reduce_add(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 h = shuffle<1,0,3,2>(a) + a;
-	return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */
-#else
-	return ((a.x + a.y) + (a.z + a.w));
-#endif
-}
-
-__device_inline float average(const float4& a)
-{
-	return reduce_add(a) * 0.25f;
-}
-
-__device_inline float dot(const float4& a, const float4& b)
-{
-	return reduce_add(a * b);
-}
-
-#endif
-
 /* Int3 */
 
 #ifndef __KERNEL_OPENCL__
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 0fd26825911..1780994da27 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -53,6 +53,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Transform Inverse */
+
 static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
 {
 	/* forward elimination */
@@ -151,5 +153,104 @@ Transform transform_inverse(const Transform& tfm)
 	return tfmR;
 }
 
+/* Motion Transform */
+
+static float4 transform_to_quat(const Transform& tfm)
+{
+	double trace = tfm[0][0] + tfm[1][1] + tfm[2][2];
+	float4 qt;
+
+	if(trace > 0.0f) {
+		double s = sqrt(trace + 1.0);
+
+		qt.w = (float)(s/2.0);
+		s = 0.5/s;
+
+		qt.x = (float)((tfm[2][1] - tfm[1][2]) * s);
+		qt.y = (float)((tfm[0][2] - tfm[2][0]) * s);
+		qt.z = (float)((tfm[1][0] - tfm[0][1]) * s);
+	}
+	else {
+		int i = 0;
+
+		if(tfm[1][1] > tfm[i][i])
+			i = 1;
+		if(tfm[2][2] > tfm[i][i])
+			i = 2;
+
+		int j = (i + 1)%3;
+		int k = (j + 1)%3;
+
+		double s = sqrt((tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0);
+
+		double q[3];
+		q[i] = s * 0.5;
+		if(s != 0.0)
+			s = 0.5/s;
+
+		double w = (tfm[k][j] - tfm[j][k]) * s;
+		q[j] = (tfm[j][i] + tfm[i][j]) * s;
+		q[k] = (tfm[k][i] + tfm[i][k]) * s;
+
+		qt.x = (float)q[0];
+		qt.y = (float)q[1];
+		qt.z = (float)q[2];
+		qt.w = (float)w;
+	}
+
+	return qt;
+}
+
+static void transform_decompose(Transform *decomp, const Transform *tfm)
+{
+	/* extract translation */
+	decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f);
+
+	/* extract rotation */
+	Transform M = *tfm;
+	M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; M.w.w = 1.0f;
+
+	Transform R = M;
+	float norm;
+	int iteration = 0;
+
+	do {
+		Transform Rnext;
+		Transform Rit = transform_inverse(transform_transpose(R));
+
+		for(int i = 0; i < 4; i++)
+			for(int j = 0; j < 4; j++)
+				Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
+		
+		norm = 0.0f;
+		for(int i = 0; i < 3; i++) {
+			norm = max(norm,
+				fabsf(R[i][0] - Rnext[i][0]) +
+				fabsf(R[i][1] - Rnext[i][1]) +
+				fabsf(R[i][2] - Rnext[i][2]));
+		}
+
+		R = Rnext;
+		iteration++;
+	} while(iteration < 100 && norm > 1e-4f);
+
+	if(transform_negative_scale(R))
+		R = R * transform_scale(-1.0f, -1.0f, -1.0f); /* todo: test scale */
+
+	decomp->x = transform_to_quat(R);
+
+	/* extract scale and pack it */
+	Transform scale = transform_inverse(R) * M;
+	decomp->y.w = scale.x.x;
+	decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y);
+	decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
+}
+
+void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion)
+{
+	transform_decompose(&decomp->pre, &motion->pre);
+	transform_decompose(&decomp->post, &motion->post);
+}
+
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index aeaef7b0e21..03dfbaa441d 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -28,6 +28,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Data Types */
+
 typedef struct Transform {
 	float4 x, y, z, w; /* rows */
 
@@ -37,6 +39,17 @@ typedef struct Transform {
 #endif
 } Transform;
 
+typedef struct MotionTransform {
+	Transform pre;
+	Transform post;
+} MotionTransform;
+
+/* transform decomposed in rotation/translation/scale. we use the same data
+ * structure as Transform, and tightly pack decomposition into it. first the
+ * rotation (4), then translation (3), then 3x3 scale matrix (9) */
+
+/* Functions */
+
 __device_inline float3 transform_perspective(const Transform *t, const float3 a)
 {
 	float4 b = make_float4(a.x, a.y, a.z, 1.0f);
@@ -62,6 +75,15 @@ __device_inline float3 transform_direction(const Transform *t, const float3 a)
 	return c;
 }
 
+__device_inline float3 transform_direction_transposed(const Transform *t, const float3 a)
+{
+	float3 x = make_float3(t->x.x, t->y.x, t->z.x);
+	float3 y = make_float3(t->x.y, t->y.y, t->z.y);
+	float3 z = make_float3(t->x.z, t->y.z, t->z.z);
+
+	return make_float3(dot(x, a), dot(y, a), dot(z, a));
+}
+
 #ifndef __KERNEL_GPU__
 
 __device_inline void print_transform(const char *label, const Transform& t)
@@ -272,6 +294,102 @@ __device_inline Transform transform_clear_scale(const Transform& tfm)
 
 #endif
 
+/* Motion Transform */
+
+__device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
+{
+	float costheta = dot(q1, q2);
+
+	if(costheta > 0.9995f) {
+		return normalize((1.0f - t)*q1 + t*q2);
+	}
+	else  {
+		float theta = acosf(clamp(costheta, -1.0f, 1.0f));
+		float thetap = theta * t;
+		float4 qperp = normalize(q2 - q1 * costheta);
+		return q1 * cosf(thetap) + qperp * sinf(thetap);
+	}
+}
+
+__device_inline Transform transform_quick_inverse(Transform M)
+{
+	Transform R;
+	float det = M.x.x*(M.z.z*M.y.y - M.z.y*M.y.z) - M.y.x*(M.z.z*M.x.y - M.z.y*M.x.z) + M.z.x*(M.y.z*M.x.y - M.y.y*M.x.z);
+
+	det = (det != 0.0f)? 1.0f/det: 0.0f;
+
+	float3 Rx = det*make_float3(M.z.z*M.y.y - M.z.y*M.y.z, M.z.y*M.x.z - M.z.z*M.x.y, M.y.z*M.x.y - M.y.y*M.x.z);
+	float3 Ry = det*make_float3(M.z.x*M.y.z - M.z.z*M.y.x, M.z.z*M.x.x - M.z.x*M.x.z, M.y.x*M.x.z - M.y.z*M.x.x);
+	float3 Rz = det*make_float3(M.z.y*M.y.x - M.z.x*M.y.y, M.z.x*M.x.y - M.z.y*M.x.x, M.y.y*M.x.x - M.y.x*M.x.y);
+	float3 T = -make_float3(M.x.w, M.y.w, M.z.w);
+
+	R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T));
+	R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T));
+	R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T));
+	R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+
+	return R;
+}
+
+__device_inline void transform_compose(Transform *tfm, const Transform *decomp)
+{
+	/* rotation */
+	float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
+
+	q0 = M_SQRT2_F * decomp->x.w;
+	q1 = M_SQRT2_F * decomp->x.x;
+	q2 = M_SQRT2_F * decomp->x.y;
+	q3 = M_SQRT2_F * decomp->x.z;
+
+	qda = q0*q1;
+	qdb = q0*q2;
+	qdc = q0*q3;
+	qaa = q1*q1;
+	qab = q1*q2;
+	qac = q1*q3;
+	qbb = q2*q2;
+	qbc = q2*q3;
+	qcc = q3*q3;
+
+	float3 rotation_x = make_float3(1.0f-qbb-qcc, -qdc+qab, qdb+qac);
+	float3 rotation_y = make_float3(qdc+qab, 1.0f-qaa-qcc, -qda+qbc);
+	float3 rotation_z = make_float3(-qdb+qac, qda+qbc, 1.0f-qaa-qbb);
+
+	/* scale */
+	float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
+	float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
+	float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
+
+	/* compose with translation */
+	tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
+	tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
+	tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
+	tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+__device void transform_motion_interpolate(Transform *tfm, const MotionTransform *motion, float t)
+{
+	Transform decomp;
+
+	decomp.x = quat_interpolate(motion->pre.x, motion->post.x, t);
+	decomp.y = (1.0f - t)*motion->pre.y + t*motion->post.y;
+	decomp.z = (1.0f - t)*motion->pre.z + t*motion->post.z;
+	decomp.w = (1.0f - t)*motion->pre.w + t*motion->post.w;
+
+	transform_compose(tfm, &decomp);
+}
+
+#ifndef __KERNEL_GPU__
+
+__device_inline bool operator==(const MotionTransform& A, const MotionTransform& B)
+{
+	return (A.pre == B.pre && A.post == B.post);
+}
+
+void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion);
+
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_TRANSFORM_H__ */
author	Brecht Van Lommel <brechtvanlommel@pandora.be>	2012-04-30 16:49:26 +0400
committer	Brecht Van Lommel <brechtvanlommel@pandora.be>	2012-04-30 16:49:26 +0400
commit	1d8c79818870b92df46c443d7778438aa67d019c (patch)
tree	ba3fc305671261e40851d8a230a33ebe19396e95 /intern/cycles/util
parent	796dd8a321108df26757fb9df5c2aa6eb42c9633 (diff)