diff options
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/util_math.h | 76 | ||||
-rw-r--r-- | intern/cycles/util/util_transform.cpp | 101 | ||||
-rw-r--r-- | intern/cycles/util/util_transform.h | 118 |
3 files changed, 262 insertions, 33 deletions
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 53c1302b4a1..f09803d8b09 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -55,6 +55,10 @@ CCL_NAMESPACE_BEGIN #ifndef M_2_PI_F #define M_2_PI_F ((float)0.636619772367581343075535053490057448) #endif +#ifndef M_SQRT2_F +#define M_SQRT2_F ((float)1.41421356237309504880) +#endif + /* Scalar */ @@ -719,6 +723,45 @@ __device_inline float4 cross(const float4& a, const float4& b) #endif } +__device_inline bool is_zero(const float4& a) +{ +#ifdef __KERNEL_SSE__ + return a == make_float4(0.0f); +#else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); +#endif +} + +__device_inline float reduce_add(const float4& a) +{ +#ifdef __KERNEL_SSE__ + float4 h = shuffle<1,0,3,2>(a) + a; + return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */ +#else + return ((a.x + a.y) + (a.z + a.w)); +#endif +} + +__device_inline float average(const float4& a) +{ + return reduce_add(a) * 0.25f; +} + +__device_inline float dot(const float4& a, const float4& b) +{ + return reduce_add(a * b); +} + +__device_inline float len(const float4 a) +{ + return sqrtf(dot(a, a)); +} + +__device_inline float4 normalize(const float4 a) +{ + return a/len(a); +} + __device_inline float4 min(float4 a, float4 b) { #ifdef __KERNEL_SSE__ @@ -790,39 +833,6 @@ __device_inline void print_float4(const char *label, const float4& a) #endif -#ifndef __KERNEL_OPENCL__ - -__device_inline bool is_zero(const float4& a) -{ -#ifdef __KERNEL_SSE__ - return a == make_float4(0.0f); -#else - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); -#endif -} - -__device_inline float reduce_add(const float4& a) -{ -#ifdef __KERNEL_SSE__ - float4 h = shuffle<1,0,3,2>(a) + a; - return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h); /* todo: efficiency? */ -#else - return ((a.x + a.y) + (a.z + a.w)); -#endif -} - -__device_inline float average(const float4& a) -{ - return reduce_add(a) * 0.25f; -} - -__device_inline float dot(const float4& a, const float4& b) -{ - return reduce_add(a * b); -} - -#endif - /* Int3 */ #ifndef __KERNEL_OPENCL__ diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 0fd26825911..1780994da27 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -53,6 +53,8 @@ CCL_NAMESPACE_BEGIN +/* Transform Inverse */ + static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) { /* forward elimination */ @@ -151,5 +153,104 @@ Transform transform_inverse(const Transform& tfm) return tfmR; } +/* Motion Transform */ + +static float4 transform_to_quat(const Transform& tfm) +{ + double trace = tfm[0][0] + tfm[1][1] + tfm[2][2]; + float4 qt; + + if(trace > 0.0f) { + double s = sqrt(trace + 1.0); + + qt.w = (float)(s/2.0); + s = 0.5/s; + + qt.x = (float)((tfm[2][1] - tfm[1][2]) * s); + qt.y = (float)((tfm[0][2] - tfm[2][0]) * s); + qt.z = (float)((tfm[1][0] - tfm[0][1]) * s); + } + else { + int i = 0; + + if(tfm[1][1] > tfm[i][i]) + i = 1; + if(tfm[2][2] > tfm[i][i]) + i = 2; + + int j = (i + 1)%3; + int k = (j + 1)%3; + + double s = sqrt((tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0); + + double q[3]; + q[i] = s * 0.5; + if(s != 0.0) + s = 0.5/s; + + double w = (tfm[k][j] - tfm[j][k]) * s; + q[j] = (tfm[j][i] + tfm[i][j]) * s; + q[k] = (tfm[k][i] + tfm[i][k]) * s; + + qt.x = (float)q[0]; + qt.y = (float)q[1]; + qt.z = (float)q[2]; + qt.w = (float)w; + } + + return qt; +} + +static void transform_decompose(Transform *decomp, const Transform *tfm) +{ + /* extract translation */ + decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f); + + /* extract rotation */ + Transform M = *tfm; + M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; M.w.w = 1.0f; + + Transform R = M; + float norm; + int iteration = 0; + + do { + Transform Rnext; + Transform Rit = transform_inverse(transform_transpose(R)); + + for(int i = 0; i < 4; i++) + for(int j = 0; j < 4; j++) + Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]); + + norm = 0.0f; + for(int i = 0; i < 3; i++) { + norm = max(norm, + fabsf(R[i][0] - Rnext[i][0]) + + fabsf(R[i][1] - Rnext[i][1]) + + fabsf(R[i][2] - Rnext[i][2])); + } + + R = Rnext; + iteration++; + } while(iteration < 100 && norm > 1e-4f); + + if(transform_negative_scale(R)) + R = R * transform_scale(-1.0f, -1.0f, -1.0f); /* todo: test scale */ + + decomp->x = transform_to_quat(R); + + /* extract scale and pack it */ + Transform scale = transform_inverse(R) * M; + decomp->y.w = scale.x.x; + decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y); + decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z); +} + +void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion) +{ + transform_decompose(&decomp->pre, &motion->pre); + transform_decompose(&decomp->post, &motion->post); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index aeaef7b0e21..03dfbaa441d 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -28,6 +28,8 @@ CCL_NAMESPACE_BEGIN +/* Data Types */ + typedef struct Transform { float4 x, y, z, w; /* rows */ @@ -37,6 +39,17 @@ typedef struct Transform { #endif } Transform; +typedef struct MotionTransform { + Transform pre; + Transform post; +} MotionTransform; + +/* transform decomposed in rotation/translation/scale. we use the same data + * structure as Transform, and tightly pack decomposition into it. first the + * rotation (4), then translation (3), then 3x3 scale matrix (9) */ + +/* Functions */ + __device_inline float3 transform_perspective(const Transform *t, const float3 a) { float4 b = make_float4(a.x, a.y, a.z, 1.0f); @@ -62,6 +75,15 @@ __device_inline float3 transform_direction(const Transform *t, const float3 a) return c; } +__device_inline float3 transform_direction_transposed(const Transform *t, const float3 a) +{ + float3 x = make_float3(t->x.x, t->y.x, t->z.x); + float3 y = make_float3(t->x.y, t->y.y, t->z.y); + float3 z = make_float3(t->x.z, t->y.z, t->z.z); + + return make_float3(dot(x, a), dot(y, a), dot(z, a)); +} + #ifndef __KERNEL_GPU__ __device_inline void print_transform(const char *label, const Transform& t) @@ -272,6 +294,102 @@ __device_inline Transform transform_clear_scale(const Transform& tfm) #endif +/* Motion Transform */ + +__device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) +{ + float costheta = dot(q1, q2); + + if(costheta > 0.9995f) { + return normalize((1.0f - t)*q1 + t*q2); + } + else { + float theta = acosf(clamp(costheta, -1.0f, 1.0f)); + float thetap = theta * t; + float4 qperp = normalize(q2 - q1 * costheta); + return q1 * cosf(thetap) + qperp * sinf(thetap); + } +} + +__device_inline Transform transform_quick_inverse(Transform M) +{ + Transform R; + float det = M.x.x*(M.z.z*M.y.y - M.z.y*M.y.z) - M.y.x*(M.z.z*M.x.y - M.z.y*M.x.z) + M.z.x*(M.y.z*M.x.y - M.y.y*M.x.z); + + det = (det != 0.0f)? 1.0f/det: 0.0f; + + float3 Rx = det*make_float3(M.z.z*M.y.y - M.z.y*M.y.z, M.z.y*M.x.z - M.z.z*M.x.y, M.y.z*M.x.y - M.y.y*M.x.z); + float3 Ry = det*make_float3(M.z.x*M.y.z - M.z.z*M.y.x, M.z.z*M.x.x - M.z.x*M.x.z, M.y.x*M.x.z - M.y.z*M.x.x); + float3 Rz = det*make_float3(M.z.y*M.y.x - M.z.x*M.y.y, M.z.x*M.x.y - M.z.y*M.x.x, M.y.y*M.x.x - M.y.x*M.x.y); + float3 T = -make_float3(M.x.w, M.y.w, M.z.w); + + R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T)); + R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T)); + R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T)); + R.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f); + + return R; +} + +__device_inline void transform_compose(Transform *tfm, const Transform *decomp) +{ + /* rotation */ + float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc; + + q0 = M_SQRT2_F * decomp->x.w; + q1 = M_SQRT2_F * decomp->x.x; + q2 = M_SQRT2_F * decomp->x.y; + q3 = M_SQRT2_F * decomp->x.z; + + qda = q0*q1; + qdb = q0*q2; + qdc = q0*q3; + qaa = q1*q1; + qab = q1*q2; + qac = q1*q3; + qbb = q2*q2; + qbc = q2*q3; + qcc = q3*q3; + + float3 rotation_x = make_float3(1.0f-qbb-qcc, -qdc+qab, qdb+qac); + float3 rotation_y = make_float3(qdc+qab, 1.0f-qaa-qcc, -qda+qbc); + float3 rotation_z = make_float3(-qdb+qac, qda+qbc, 1.0f-qaa-qbb); + + /* scale */ + float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y); + float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z); + float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w); + + /* compose with translation */ + tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); + tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); + tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); + tfm->w = make_float4(0.0f, 0.0f, 0.0f, 1.0f); +} + +__device void transform_motion_interpolate(Transform *tfm, const MotionTransform *motion, float t) +{ + Transform decomp; + + decomp.x = quat_interpolate(motion->pre.x, motion->post.x, t); + decomp.y = (1.0f - t)*motion->pre.y + t*motion->post.y; + decomp.z = (1.0f - t)*motion->pre.z + t*motion->post.z; + decomp.w = (1.0f - t)*motion->pre.w + t*motion->post.w; + + transform_compose(tfm, &decomp); +} + +#ifndef __KERNEL_GPU__ + +__device_inline bool operator==(const MotionTransform& A, const MotionTransform& B) +{ + return (A.pre == B.pre && A.post == B.post); +} + +void transform_motion_decompose(MotionTransform *decomp, const MotionTransform *motion); + +#endif + CCL_NAMESPACE_END #endif /* __UTIL_TRANSFORM_H__ */ |