Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/opennl/superlu/smyblas2.c')
-rw-r--r--intern/opennl/superlu/smyblas2.c225
1 files changed, 225 insertions, 0 deletions
diff --git a/intern/opennl/superlu/smyblas2.c b/intern/opennl/superlu/smyblas2.c
new file mode 100644
index 00000000000..729e17f7674
--- /dev/null
+++ b/intern/opennl/superlu/smyblas2.c
@@ -0,0 +1,225 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name: smyblas2.c
+ * Purpose:
+ * Level 2 BLAS operations: solves and matvec, written in C.
+ * Note:
+ * This is only used when the system lacks an efficient BLAS library.
+ */
+
+/*
+ * Solves a dense UNIT lower triangular system. The unit lower
+ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
+ * The solution will be returned in the rhs vector.
+ */
+void slsolve ( int ldm, int ncol, float *M, float *rhs )
+{
+ int k;
+ float x0, x1, x2, x3, x4, x5, x6, x7;
+ float *M0;
+ register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+ register int firstcol = 0;
+
+ M0 = &M[0];
+
+ while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+ Mki2 = Mki1 + ldm + 1;
+ Mki3 = Mki2 + ldm + 1;
+ Mki4 = Mki3 + ldm + 1;
+ Mki5 = Mki4 + ldm + 1;
+ Mki6 = Mki5 + ldm + 1;
+ Mki7 = Mki6 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+ x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+ x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+ x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++;
+ x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++;
+ x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
+ x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
+ - x6 * *Mki6++;
+
+ rhs[++firstcol] = x1;
+ rhs[++firstcol] = x2;
+ rhs[++firstcol] = x3;
+ rhs[++firstcol] = x4;
+ rhs[++firstcol] = x5;
+ rhs[++firstcol] = x6;
+ rhs[++firstcol] = x7;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+ - x2 * *Mki2++ - x3 * *Mki3++
+ - x4 * *Mki4++ - x5 * *Mki5++
+ - x6 * *Mki6++ - x7 * *Mki7++;
+
+ M0 += 8 * ldm + 8;
+ }
+
+ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+ Mki2 = Mki1 + ldm + 1;
+ Mki3 = Mki2 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+ x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+ x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+
+ rhs[++firstcol] = x1;
+ rhs[++firstcol] = x2;
+ rhs[++firstcol] = x3;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+ - x2 * *Mki2++ - x3 * *Mki3++;
+
+ M0 += 4 * ldm + 4;
+ }
+
+ if ( firstcol < ncol - 1 ) { /* Do 2 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+
+ rhs[++firstcol] = x1;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
+
+ }
+
+}
+
+/*
+ * Solves a dense upper triangular system. The upper triangular matrix is
+ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
+ * in the rhs vector.
+ */
+void
+susolve ( ldm, ncol, M, rhs )
+int ldm; /* in */
+int ncol; /* in */
+float *M; /* in */
+float *rhs; /* modified */
+{
+ float xj;
+ int jcol, j, irow;
+
+ jcol = ncol - 1;
+
+ for (j = 0; j < ncol; j++) {
+
+ xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
+ rhs[jcol] = xj;
+
+ for (irow = 0; irow < jcol; irow++)
+ rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
+
+ jcol--;
+
+ }
+}
+
+
+/*
+ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
+ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
+ */
+void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
+
+int ldm; /* in -- leading dimension of M */
+int nrow; /* in */
+int ncol; /* in */
+float *M; /* in */
+float *vec; /* in */
+float *Mxvec; /* in/out */
+
+{
+ float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
+ float *M0;
+ register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+ register int firstcol = 0;
+ int k;
+
+ M0 = &M[0];
+ while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+
+ Mki0 = M0;
+ Mki1 = Mki0 + ldm;
+ Mki2 = Mki1 + ldm;
+ Mki3 = Mki2 + ldm;
+ Mki4 = Mki3 + ldm;
+ Mki5 = Mki4 + ldm;
+ Mki6 = Mki5 + ldm;
+ Mki7 = Mki6 + ldm;
+
+ vi0 = vec[firstcol++];
+ vi1 = vec[firstcol++];
+ vi2 = vec[firstcol++];
+ vi3 = vec[firstcol++];
+ vi4 = vec[firstcol++];
+ vi5 = vec[firstcol++];
+ vi6 = vec[firstcol++];
+ vi7 = vec[firstcol++];
+
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ + vi2 * *Mki2++ + vi3 * *Mki3++
+ + vi4 * *Mki4++ + vi5 * *Mki5++
+ + vi6 * *Mki6++ + vi7 * *Mki7++;
+
+ M0 += 8 * ldm;
+ }
+
+ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+
+ Mki0 = M0;
+ Mki1 = Mki0 + ldm;
+ Mki2 = Mki1 + ldm;
+ Mki3 = Mki2 + ldm;
+
+ vi0 = vec[firstcol++];
+ vi1 = vec[firstcol++];
+ vi2 = vec[firstcol++];
+ vi3 = vec[firstcol++];
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ + vi2 * *Mki2++ + vi3 * *Mki3++ ;
+
+ M0 += 4 * ldm;
+ }
+
+ while ( firstcol < ncol ) { /* Do 1 column */
+
+ Mki0 = M0;
+ vi0 = vec[firstcol++];
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++;
+
+ M0 += ldm;
+ }
+
+}
+