1 files changed, 544 insertions, 83 deletions
diff --git a/source/blender/blenlib/intern/math_matrix.c b/source/blender/blenlib/intern/math_matrix.c
index c970f2132c3..6e8f4622488 100644
--- a/source/blender/blenlib/intern/math_matrix.c
+++ b/source/blender/blenlib/intern/math_matrix.c
@@ -25,10 +25,6 @@
  * ***** END GPL LICENSE BLOCK *****
  */
 
-#include <float.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 
 #include "BLI_math.h"
 
@@ -129,10 +125,15 @@ void swap_m4m4(float m1[][4], float m2[][4])
 
 /******************************** Arithmetic *********************************/
 
-void mul_m4_m4m4(float m1[][4], float m2[][4], float m3[][4])
+void mul_m4_m4m4(float m1[][4], float m2_[][4], float m3_[][4])
 {
-  /* matrix product: m1[j][k] = m2[j][i].m3[i][k] */
+	float m2[4][4], m3[4][4];
 
+	/* copy so it works when m1 is the same pointer as m2 or m3 */
+	copy_m4_m4(m2, m2_);
+	copy_m4_m4(m3, m3_);
+
+	/* matrix product: m1[j][k] = m2[j][i].m3[i][k] */
 	m1[0][0] = m2[0][0]*m3[0][0] + m2[0][1]*m3[1][0] + m2[0][2]*m3[2][0] + m2[0][3]*m3[3][0];
 	m1[0][1] = m2[0][0]*m3[0][1] + m2[0][1]*m3[1][1] + m2[0][2]*m3[2][1] + m2[0][3]*m3[3][1];
 	m1[0][2] = m2[0][0]*m3[0][2] + m2[0][1]*m3[1][2] + m2[0][2]*m3[2][2] + m2[0][3]*m3[3][2];
@@ -155,20 +156,26 @@ void mul_m4_m4m4(float m1[][4], float m2[][4], float m3[][4])
 
 }
 
-void mul_m3_m3m3(float m1[][3], float m3[][3], float m2[][3])
+void mul_m3_m3m3(float m1[][3], float m3_[][3], float m2_[][3])
 {
-   /*  m1[i][j] = m2[i][k]*m3[k][j], args are flipped!  */
-  	m1[0][0]= m2[0][0]*m3[0][0] + m2[0][1]*m3[1][0] + m2[0][2]*m3[2][0]; 
-  	m1[0][1]= m2[0][0]*m3[0][1] + m2[0][1]*m3[1][1] + m2[0][2]*m3[2][1]; 
-  	m1[0][2]= m2[0][0]*m3[0][2] + m2[0][1]*m3[1][2] + m2[0][2]*m3[2][2]; 
+	float m2[3][3], m3[3][3];
+
+	/* copy so it works when m1 is the same pointer as m2 or m3 */
+	copy_m3_m3(m2, m2_);
+	copy_m3_m3(m3, m3_);
+
+	/* m1[i][j] = m2[i][k]*m3[k][j], args are flipped!  */
+	m1[0][0]= m2[0][0]*m3[0][0] + m2[0][1]*m3[1][0] + m2[0][2]*m3[2][0]; 
+	m1[0][1]= m2[0][0]*m3[0][1] + m2[0][1]*m3[1][1] + m2[0][2]*m3[2][1]; 
+	m1[0][2]= m2[0][0]*m3[0][2] + m2[0][1]*m3[1][2] + m2[0][2]*m3[2][2]; 
 
-  	m1[1][0]= m2[1][0]*m3[0][0] + m2[1][1]*m3[1][0] + m2[1][2]*m3[2][0]; 
-  	m1[1][1]= m2[1][0]*m3[0][1] + m2[1][1]*m3[1][1] + m2[1][2]*m3[2][1]; 
-  	m1[1][2]= m2[1][0]*m3[0][2] + m2[1][1]*m3[1][2] + m2[1][2]*m3[2][2]; 
+	m1[1][0]= m2[1][0]*m3[0][0] + m2[1][1]*m3[1][0] + m2[1][2]*m3[2][0]; 
+	m1[1][1]= m2[1][0]*m3[0][1] + m2[1][1]*m3[1][1] + m2[1][2]*m3[2][1]; 
+	m1[1][2]= m2[1][0]*m3[0][2] + m2[1][1]*m3[1][2] + m2[1][2]*m3[2][2]; 
 
-  	m1[2][0]= m2[2][0]*m3[0][0] + m2[2][1]*m3[1][0] + m2[2][2]*m3[2][0]; 
-  	m1[2][1]= m2[2][0]*m3[0][1] + m2[2][1]*m3[1][1] + m2[2][2]*m3[2][1]; 
-  	m1[2][2]= m2[2][0]*m3[0][2] + m2[2][1]*m3[1][2] + m2[2][2]*m3[2][2]; 
+	m1[2][0]= m2[2][0]*m3[0][0] + m2[2][1]*m3[1][0] + m2[2][2]*m3[2][0]; 
+	m1[2][1]= m2[2][0]*m3[0][1] + m2[2][1]*m3[1][1] + m2[2][2]*m3[2][1]; 
+	m1[2][2]= m2[2][0]*m3[0][2] + m2[2][1]*m3[1][2] + m2[2][2]*m3[2][2]; 
 }
 
 void mul_m4_m4m3(float (*m1)[4], float (*m3)[4], float (*m2)[3])
@@ -187,18 +194,18 @@ void mul_m4_m4m3(float (*m1)[4], float (*m3)[4], float (*m2)[3])
 /* m1 = m2 * m3, ignore the elements on the 4th row/column of m3*/
 void mul_m3_m3m4(float m1[][3], float m2[][3], float m3[][4])
 {
-    /* m1[i][j] = m2[i][k] * m3[k][j] */
-    m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] +m2[0][2] * m3[2][0];
-    m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] +m2[0][2] * m3[2][1];
-    m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] +m2[0][2] * m3[2][2];
+	/* m1[i][j] = m2[i][k] * m3[k][j] */
+	m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] +m2[0][2] * m3[2][0];
+	m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] +m2[0][2] * m3[2][1];
+	m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] +m2[0][2] * m3[2][2];
 
-    m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] +m2[1][2] * m3[2][0];
-    m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] +m2[1][2] * m3[2][1];
-    m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] +m2[1][2] * m3[2][2];
+	m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] +m2[1][2] * m3[2][0];
+	m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] +m2[1][2] * m3[2][1];
+	m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] +m2[1][2] * m3[2][2];
 
-    m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] +m2[2][2] * m3[2][0];
-    m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] +m2[2][2] * m3[2][1];
-    m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] +m2[2][2] * m3[2][2];
+	m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] +m2[2][2] * m3[2][0];
+	m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] +m2[2][2] * m3[2][1];
+	m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] +m2[2][2] * m3[2][2];
 }
 
 void mul_m4_m3m4(float (*m1)[4], float (*m3)[3], float (*m2)[4])
@@ -325,17 +332,23 @@ void mul_project_m4_v4(float mat[][4], float *vec)
 	vec[2] /= w;
 }
 
-void mul_m4_v4(float mat[][4], float *vec)
+void mul_v4_m4v4(float r[4], float mat[4][4], float v[4])
 {
-	float x,y,z;
+	float x, y, z;
 
-	x=vec[0]; 
-	y=vec[1]; 
-	z= vec[2];
-	vec[0]=x*mat[0][0] + y*mat[1][0] + z*mat[2][0] + mat[3][0]*vec[3];
-	vec[1]=x*mat[0][1] + y*mat[1][1] + z*mat[2][1] + mat[3][1]*vec[3];
-	vec[2]=x*mat[0][2] + y*mat[1][2] + z*mat[2][2] + mat[3][2]*vec[3];
-	vec[3]=x*mat[0][3] + y*mat[1][3] + z*mat[2][3] + mat[3][3]*vec[3];
+	x= v[0]; 
+	y= v[1]; 
+	z= v[2];
+
+	r[0]= x*mat[0][0] + y*mat[1][0] + z*mat[2][0] + mat[3][0]*v[3];
+	r[1]= x*mat[0][1] + y*mat[1][1] + z*mat[2][1] + mat[3][1]*v[3];
+	r[2]= x*mat[0][2] + y*mat[1][2] + z*mat[2][2] + mat[3][2]*v[3];
+	r[3]= x*mat[0][3] + y*mat[1][3] + z*mat[2][3] + mat[3][3]*v[3];
+}
+
+void mul_m4_v4(float mat[4][4], float r[4])
+{
+	mul_v4_m4v4(r, mat, r);
 }
 
 void mul_v3_m3v3(float r[3], float M[3][3], float a[3])
@@ -441,8 +454,8 @@ int invert_m3_m3(float m1[3][3], float m2[3][3])
 
 	/* then determinant old matrix! */
 	det= m2[0][0]* (m2[1][1]*m2[2][2] - m2[1][2]*m2[2][1])
-	    -m2[1][0]* (m2[0][1]*m2[2][2] - m2[0][2]*m2[2][1])
-	    +m2[2][0]* (m2[0][1]*m2[1][2] - m2[0][2]*m2[1][1]);
+		-m2[1][0]* (m2[0][1]*m2[2][2] - m2[0][2]*m2[2][1])
+		+m2[2][0]* (m2[0][1]*m2[1][2] - m2[0][2]*m2[1][1]);
 
 	success= (det != 0);
 
@@ -827,8 +840,8 @@ float determinant_m3(float a1, float a2, float a3,
 	float ans;
 
 	ans = a1 * determinant_m2(b2, b3, c2, c3)
-	    - b1 * determinant_m2(a2, a3, c2, c3)
-	    + c1 * determinant_m2(a2, a3, b2, b3);
+		- b1 * determinant_m2(a2, a3, c2, c3)
+		+ c1 * determinant_m2(a2, a3, b2, b3);
 
 	return ans;
 }
@@ -859,9 +872,9 @@ float determinant_m4(float m[][4])
 	d4= m[3][3];
 
 	ans = a1 * determinant_m3(b2, b3, b4, c2, c3, c4, d2, d3, d4)
-	    - b1 * determinant_m3(a2, a3, a4, c2, c3, c4, d2, d3, d4)
-	    + c1 * determinant_m3(a2, a3, a4, b2, b3, b4, d2, d3, d4)
-	    - d1 * determinant_m3(a2, a3, a4, b2, b3, b4, c2, c3, c4);
+		- b1 * determinant_m3(a2, a3, a4, c2, c3, c4, d2, d3, d4)
+		+ c1 * determinant_m3(a2, a3, a4, b2, b3, b4, d2, d3, d4)
+		- d1 * determinant_m3(a2, a3, a4, b2, b3, b4, c2, c3, c4);
 
 	return ans;
 }
@@ -942,54 +955,47 @@ void scale_m4_fl(float m[][4], float scale)
 
 void translate_m4(float mat[][4],float Tx, float Ty, float Tz)
 {
-    mat[3][0] += (Tx*mat[0][0] + Ty*mat[1][0] + Tz*mat[2][0]);
-    mat[3][1] += (Tx*mat[0][1] + Ty*mat[1][1] + Tz*mat[2][1]);
-    mat[3][2] += (Tx*mat[0][2] + Ty*mat[1][2] + Tz*mat[2][2]);
+	mat[3][0] += (Tx*mat[0][0] + Ty*mat[1][0] + Tz*mat[2][0]);
+	mat[3][1] += (Tx*mat[0][1] + Ty*mat[1][1] + Tz*mat[2][1]);
+	mat[3][2] += (Tx*mat[0][2] + Ty*mat[1][2] + Tz*mat[2][2]);
 }
 
-void rotate_m4(float mat[][4], char axis,float angle)
+void rotate_m4(float mat[][4], const char axis, const float angle)
 {
 	int col;
-    float temp[4];
-    float cosine, sine;
-
-    for(col=0; col<4 ; col++)	/* init temp to zero matrix */
-        temp[col] = 0;
-
-    angle = (float)(angle*(3.1415926535/180.0));
-    cosine = (float)cos(angle);
-    sine = (float)sin(angle);
-    switch(axis){
-    case 'x':    
-    case 'X':    
-        for(col=0 ; col<4 ; col++)
-            temp[col] = cosine*mat[1][col] + sine*mat[2][col];
-        for(col=0 ; col<4 ; col++) {
-	    mat[2][col] = - sine*mat[1][col] + cosine*mat[2][col];
-            mat[1][col] = temp[col];
+	float temp[4]= {0.0f, 0.0f, 0.0f, 0.0f};
+	float cosine, sine;
+
+	cosine = (float)cos(angle);
+	sine = (float)sin(angle);
+	switch(axis){
+	case 'X':    
+		for(col=0 ; col<4 ; col++)
+			temp[col] = cosine*mat[1][col] + sine*mat[2][col];
+		for(col=0 ; col<4 ; col++) {
+		mat[2][col] = - sine*mat[1][col] + cosine*mat[2][col];
+			mat[1][col] = temp[col];
 	}
-        break;
-
-    case 'y':
-    case 'Y':
-        for(col=0 ; col<4 ; col++)
-            temp[col] = cosine*mat[0][col] - sine*mat[2][col];
-        for(col=0 ; col<4 ; col++) {
-            mat[2][col] = sine*mat[0][col] + cosine*mat[2][col];
-            mat[0][col] = temp[col];
-        }
+		break;
+
+	case 'Y':
+		for(col=0 ; col<4 ; col++)
+			temp[col] = cosine*mat[0][col] - sine*mat[2][col];
+		for(col=0 ; col<4 ; col++) {
+			mat[2][col] = sine*mat[0][col] + cosine*mat[2][col];
+			mat[0][col] = temp[col];
+		}
 	break;
 
-    case 'z':
-    case 'Z':
-        for(col=0 ; col<4 ; col++)
-            temp[col] = cosine*mat[0][col] + sine*mat[1][col];
-        for(col=0 ; col<4 ; col++) {
-            mat[1][col] = - sine*mat[0][col] + cosine*mat[1][col];
-            mat[0][col] = temp[col];
-        }
+	case 'Z':
+		for(col=0 ; col<4 ; col++)
+			temp[col] = cosine*mat[0][col] + sine*mat[1][col];
+		for(col=0 ; col<4 ; col++) {
+			mat[1][col] = - sine*mat[0][col] + cosine*mat[1][col];
+			mat[0][col] = temp[col];
+		}
 	break;
-    }
+	}
 }
 
 void blend_m3_m3m3(float out[][3], float dst[][3], float src[][3], float srcweight)
@@ -1143,3 +1149,458 @@ void print_m4(char *str, float m[][4])
 	printf("%f %f %f %f\n",m[0][3],m[1][3],m[2][3],m[3][3]);
 	printf("\n");
 }
+
+/*********************************** SVD ************************************
+ * from TNT matrix library
+
+ * Compute the Single Value Decomposition of an arbitrary matrix A
+ * That is compute the 3 matrices U,W,V with U column orthogonal (m,n) 
+ * ,W a diagonal matrix and V an orthogonal square matrix s.t. 
+ * A = U.W.Vt. From this decomposition it is trivial to compute the 
+ * (pseudo-inverse) of A as Ainv = V.Winv.tranpose(U).
+ */
+
+void svd_m4(float U[4][4], float s[4], float V[4][4], float A_[4][4])
+{
+	float A[4][4];
+	float work1[4], work2[4];
+	int m = 4;
+	int n = 4;
+	int maxiter = 200;
+	int nu = minf(m,n);
+
+	float *work = work1;
+	float *e = work2;
+	float eps;
+
+	int i=0, j=0, k=0, p, pp, iter;
+
+	// Reduce A to bidiagonal form, storing the diagonal elements
+	// in s and the super-diagonal elements in e.
+
+	int nct = minf(m-1,n);
+	int nrt = maxf(0,minf(n-2,m));
+
+	copy_m4_m4(A, A_);
+	zero_m4(U);
+	zero_v4(s);
+
+	for (k = 0; k < maxf(nct,nrt); k++) {
+		if (k < nct) {
+
+			// Compute the transformation for the k-th column and
+			// place the k-th diagonal in s[k].
+			// Compute 2-norm of k-th column without under/overflow.
+			s[k] = 0;
+			for (i = k; i < m; i++) {
+				s[k] = hypotf(s[k],A[i][k]);
+			}
+			if (s[k] != 0.0f) {
+				float invsk;
+				if (A[k][k] < 0.0f) {
+					s[k] = -s[k];
+				}
+				invsk = 1.0f/s[k];
+				for (i = k; i < m; i++) {
+					A[i][k] *= invsk;
+				}
+				A[k][k] += 1.0f;
+			}
+			s[k] = -s[k];
+		}
+		for (j = k+1; j < n; j++) {
+			if ((k < nct) && (s[k] != 0.0f))  {
+
+			// Apply the transformation.
+
+				float t = 0;
+				for (i = k; i < m; i++) {
+					t += A[i][k]*A[i][j];
+				}
+				t = -t/A[k][k];
+				for (i = k; i < m; i++) {
+					A[i][j] += t*A[i][k];
+				}
+			}
+
+			// Place the k-th row of A into e for the
+			// subsequent calculation of the row transformation.
+
+			e[j] = A[k][j];
+		}
+		if (k < nct) {
+
+			// Place the transformation in U for subsequent back
+			// multiplication.
+
+			for (i = k; i < m; i++)
+				U[i][k] = A[i][k];
+		}
+		if (k < nrt) {
+
+			// Compute the k-th row transformation and place the
+			// k-th super-diagonal in e[k].
+			// Compute 2-norm without under/overflow.
+			e[k] = 0;
+			for (i = k+1; i < n; i++) {
+				e[k] = hypotf(e[k],e[i]);
+			}
+			if (e[k] != 0.0f) {
+				float invek;
+				if (e[k+1] < 0.0f) {
+					e[k] = -e[k];
+				}
+				invek = 1.0f/e[k];
+				for (i = k+1; i < n; i++) {
+					e[i] *= invek;
+				}
+				e[k+1] += 1.0f;
+			}
+			e[k] = -e[k];
+			if ((k+1 < m) & (e[k] != 0.0f)) {
+				float invek1;
+
+			// Apply the transformation.
+
+				for (i = k+1; i < m; i++) {
+					work[i] = 0.0f;
+				}
+				for (j = k+1; j < n; j++) {
+					for (i = k+1; i < m; i++) {
+						work[i] += e[j]*A[i][j];
+					}
+				}
+				invek1 = 1.0f/e[k+1];
+				for (j = k+1; j < n; j++) {
+					float t = -e[j]*invek1;
+					for (i = k+1; i < m; i++) {
+						A[i][j] += t*work[i];
+					}
+				}
+			}
+
+			// Place the transformation in V for subsequent
+			// back multiplication.
+
+			for (i = k+1; i < n; i++)
+				V[i][k] = e[i];
+		}
+	}
+
+	// Set up the final bidiagonal matrix or order p.
+
+	p = minf(n,m+1);
+	if (nct < n) {
+		s[nct] = A[nct][nct];
+	}
+	if (m < p) {
+		s[p-1] = 0.0f;
+	}
+	if (nrt+1 < p) {
+		e[nrt] = A[nrt][p-1];
+	}
+	e[p-1] = 0.0f;
+
+	// If required, generate U.
+
+	for (j = nct; j < nu; j++) {
+		for (i = 0; i < m; i++) {
+			U[i][j] = 0.0f;
+		}
+		U[j][j] = 1.0f;
+	}
+	for (k = nct-1; k >= 0; k--) {
+		if (s[k] != 0.0f) {
+			for (j = k+1; j < nu; j++) {
+				float t = 0;
+				for (i = k; i < m; i++) {
+					t += U[i][k]*U[i][j];
+				}
+				t = -t/U[k][k];
+				for (i = k; i < m; i++) {
+					U[i][j] += t*U[i][k];
+				}
+			}
+			for (i = k; i < m; i++ ) {
+				U[i][k] = -U[i][k];
+			}
+			U[k][k] = 1.0f + U[k][k];
+			for (i = 0; i < k-1; i++) {
+				U[i][k] = 0.0f;
+			}
+		} else {
+			for (i = 0; i < m; i++) {
+				U[i][k] = 0.0f;
+			}
+			U[k][k] = 1.0f;
+		}
+	}
+
+	// If required, generate V.
+
+	for (k = n-1; k >= 0; k--) {
+		if ((k < nrt) & (e[k] != 0.0f)) {
+			for (j = k+1; j < nu; j++) {
+				float t = 0;
+				for (i = k+1; i < n; i++) {
+					t += V[i][k]*V[i][j];
+				}
+				t = -t/V[k+1][k];
+				for (i = k+1; i < n; i++) {
+					V[i][j] += t*V[i][k];
+				}
+			}
+		}
+		for (i = 0; i < n; i++) {
+			V[i][k] = 0.0f;
+		}
+		V[k][k] = 1.0f;
+	}
+
+	// Main iteration loop for the singular values.
+
+	pp = p-1;
+	iter = 0;
+	eps = powf(2.0f,-52.0f);
+	while (p > 0) {
+		int kase=0;
+		k=0;
+
+		// Test for maximum iterations to avoid infinite loop
+		if(maxiter == 0)
+			break;
+		maxiter--;
+
+		// This section of the program inspects for
+		// negligible elements in the s and e arrays.  On
+		// completion the variables kase and k are set as follows.
+
+		// kase = 1	  if s(p) and e[k-1] are negligible and k<p
+		// kase = 2	  if s(k) is negligible and k<p
+		// kase = 3	  if e[k-1] is negligible, k<p, and
+		//				  s(k), ..., s(p) are not negligible (qr step).
+		// kase = 4	  if e(p-1) is negligible (convergence).
+
+		for (k = p-2; k >= -1; k--) {
+			if (k == -1) {
+				break;
+			}
+			if (fabsf(e[k]) <= eps*(fabsf(s[k]) + fabsf(s[k+1]))) {
+				e[k] = 0.0f;
+				break;
+			}
+		}
+		if (k == p-2) {
+			kase = 4;
+		} else {
+			int ks;
+			for (ks = p-1; ks >= k; ks--) {
+				float t;
+				if (ks == k) {
+					break;
+				}
+				t = (ks != p ? fabsf(e[ks]) : 0.f) + 
+							  (ks != k+1 ? fabsf(e[ks-1]) : 0.0f);
+				if (fabsf(s[ks]) <= eps*t)  {
+					s[ks] = 0.0f;
+					break;
+				}
+			}
+			if (ks == k) {
+				kase = 3;
+			} else if (ks == p-1) {
+				kase = 1;
+			} else {
+				kase = 2;
+				k = ks;
+			}
+		}
+		k++;
+
+		// Perform the task indicated by kase.
+
+		switch (kase) {
+
+			// Deflate negligible s(p).
+
+			case 1: {
+				float f = e[p-2];
+				e[p-2] = 0.0f;
+				for (j = p-2; j >= k; j--) {
+					float t = hypotf(s[j],f);
+					float invt = 1.0f/t;
+					float cs = s[j]*invt;
+					float sn = f*invt;
+					s[j] = t;
+					if (j != k) {
+						f = -sn*e[j-1];
+						e[j-1] = cs*e[j-1];
+					}
+
+					for (i = 0; i < n; i++) {
+						t = cs*V[i][j] + sn*V[i][p-1];
+						V[i][p-1] = -sn*V[i][j] + cs*V[i][p-1];
+						V[i][j] = t;
+					}
+				}
+			}
+			break;
+
+			// Split at negligible s(k).
+
+			case 2: {
+				float f = e[k-1];
+				e[k-1] = 0.0f;
+				for (j = k; j < p; j++) {
+					float t = hypotf(s[j],f);
+					float invt = 1.0f/t;
+					float cs = s[j]*invt;
+					float sn = f*invt;
+					s[j] = t;
+					f = -sn*e[j];
+					e[j] = cs*e[j];
+
+					for (i = 0; i < m; i++) {
+						t = cs*U[i][j] + sn*U[i][k-1];
+						U[i][k-1] = -sn*U[i][j] + cs*U[i][k-1];
+						U[i][j] = t;
+					}
+				}
+			}
+			break;
+
+			// Perform one qr step.
+
+			case 3: {
+
+				// Calculate the shift.
+
+				float scale = maxf(maxf(maxf(maxf(
+						  fabsf(s[p-1]),fabsf(s[p-2])),fabsf(e[p-2])), 
+						  fabsf(s[k])),fabsf(e[k]));
+				float invscale = 1.0f/scale;
+				float sp = s[p-1]*invscale;
+				float spm1 = s[p-2]*invscale;
+				float epm1 = e[p-2]*invscale;
+				float sk = s[k]*invscale;
+				float ek = e[k]*invscale;
+				float b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)*0.5f;
+				float c = (sp*epm1)*(sp*epm1);
+				float shift = 0.0f;
+				float f, g;
+				if ((b != 0.0f) || (c != 0.0f)) {
+					shift = sqrtf(b*b + c);
+					if (b < 0.0f) {
+						shift = -shift;
+					}
+					shift = c/(b + shift);
+				}
+				f = (sk + sp)*(sk - sp) + shift;
+				g = sk*ek;
+
+				// Chase zeros.
+
+				for (j = k; j < p-1; j++) {
+					float t = hypotf(f,g);
+					/* division by zero checks added to avoid NaN (brecht) */
+					float cs = (t == 0.0f)? 0.0f: f/t;
+					float sn = (t == 0.0f)? 0.0f: g/t;
+					if (j != k) {
+						e[j-1] = t;
+					}
+					f = cs*s[j] + sn*e[j];
+					e[j] = cs*e[j] - sn*s[j];
+					g = sn*s[j+1];
+					s[j+1] = cs*s[j+1];
+
+					for (i = 0; i < n; i++) {
+						t = cs*V[i][j] + sn*V[i][j+1];
+						V[i][j+1] = -sn*V[i][j] + cs*V[i][j+1];
+						V[i][j] = t;
+					}
+
+					t = hypotf(f,g);
+					/* division by zero checks added to avoid NaN (brecht) */
+					cs = (t == 0.0f)? 0.0f: f/t;
+					sn = (t == 0.0f)? 0.0f: g/t;
+					s[j] = t;
+					f = cs*e[j] + sn*s[j+1];
+					s[j+1] = -sn*e[j] + cs*s[j+1];
+					g = sn*e[j+1];
+					e[j+1] = cs*e[j+1];
+					if (j < m-1) {
+						for (i = 0; i < m; i++) {
+							t = cs*U[i][j] + sn*U[i][j+1];
+							U[i][j+1] = -sn*U[i][j] + cs*U[i][j+1];
+							U[i][j] = t;
+						}
+					}
+				}
+				e[p-2] = f;
+				iter = iter + 1;
+			}
+			break;
+
+			// Convergence.
+
+			case 4: {
+
+				// Make the singular values positive.
+
+				if (s[k] <= 0.0f) {
+					s[k] = (s[k] < 0.0f ? -s[k] : 0.0f);
+
+					for (i = 0; i <= pp; i++)
+						V[i][k] = -V[i][k];
+				}
+
+				// Order the singular values.
+
+				while (k < pp) {
+					float t;
+					if (s[k] >= s[k+1]) {
+						break;
+					}
+					t = s[k];
+					s[k] = s[k+1];
+					s[k+1] = t;
+					if (k < n-1) {
+						for (i = 0; i < n; i++) {
+							t = V[i][k+1]; V[i][k+1] = V[i][k]; V[i][k] = t;
+						}
+					}
+					if (k < m-1) {
+						for (i = 0; i < m; i++) {
+							t = U[i][k+1]; U[i][k+1] = U[i][k]; U[i][k] = t;
+						}
+					}
+					k++;
+				}
+				iter = 0;
+				p--;
+			}
+			break;
+		}
+	}
+}
+
+void pseudoinverse_m4_m4(float Ainv[4][4], float A[4][4], float epsilon)
+{
+	/* compute moon-penrose pseudo inverse of matrix, singular values
+	   below epsilon are ignored for stability (truncated SVD) */
+	float V[4][4], W[4], Wm[4][4], U[4][4];
+	int i;
+
+	transpose_m4(A);
+	svd_m4(V, W, U, A);
+	transpose_m4(U);
+	transpose_m4(V);
+
+	zero_m4(Wm);
+	for(i=0; i<4; i++)
+		Wm[i][i]= (W[i] < epsilon)? 0.0f: 1.0f/W[i];
+
+	transpose_m4(V);
+
+	mul_serie_m4(Ainv, U, Wm, V, 0, 0, 0, 0, 0);
+}