diff options
Diffstat (limited to 'intern/opennl/superlu/smyblas2.c')
-rw-r--r-- | intern/opennl/superlu/smyblas2.c | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/intern/opennl/superlu/smyblas2.c b/intern/opennl/superlu/smyblas2.c new file mode 100644 index 00000000000..729e17f7674 --- /dev/null +++ b/intern/opennl/superlu/smyblas2.c @@ -0,0 +1,225 @@ + + +/* + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + */ +/* + * File name: smyblas2.c + * Purpose: + * Level 2 BLAS operations: solves and matvec, written in C. + * Note: + * This is only used when the system lacks an efficient BLAS library. + */ + +/* + * Solves a dense UNIT lower triangular system. The unit lower + * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). + * The solution will be returned in the rhs vector. + */ +void slsolve ( int ldm, int ncol, float *M, float *rhs ) +{ + int k; + float x0, x1, x2, x3, x4, x5, x6, x7; + float *M0; + register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; + register int firstcol = 0; + + M0 = &M[0]; + + while ( firstcol < ncol - 7 ) { /* Do 8 columns */ + Mki0 = M0 + 1; + Mki1 = Mki0 + ldm + 1; + Mki2 = Mki1 + ldm + 1; + Mki3 = Mki2 + ldm + 1; + Mki4 = Mki3 + ldm + 1; + Mki5 = Mki4 + ldm + 1; + Mki6 = Mki5 + ldm + 1; + Mki7 = Mki6 + ldm + 1; + + x0 = rhs[firstcol]; + x1 = rhs[firstcol+1] - x0 * *Mki0++; + x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; + x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; + x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ + - x3 * *Mki3++; + x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ + - x3 * *Mki3++ - x4 * *Mki4++; + x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ + - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++; + x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ + - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ + - x6 * *Mki6++; + + rhs[++firstcol] = x1; + rhs[++firstcol] = x2; + rhs[++firstcol] = x3; + rhs[++firstcol] = x4; + rhs[++firstcol] = x5; + rhs[++firstcol] = x6; + rhs[++firstcol] = x7; + ++firstcol; + + for (k = firstcol; k < ncol; k++) + rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ + - x2 * *Mki2++ - x3 * *Mki3++ + - x4 * *Mki4++ - x5 * *Mki5++ + - x6 * *Mki6++ - x7 * *Mki7++; + + M0 += 8 * ldm + 8; + } + + while ( firstcol < ncol - 3 ) { /* Do 4 columns */ + Mki0 = M0 + 1; + Mki1 = Mki0 + ldm + 1; + Mki2 = Mki1 + ldm + 1; + Mki3 = Mki2 + ldm + 1; + + x0 = rhs[firstcol]; + x1 = rhs[firstcol+1] - x0 * *Mki0++; + x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; + x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; + + rhs[++firstcol] = x1; + rhs[++firstcol] = x2; + rhs[++firstcol] = x3; + ++firstcol; + + for (k = firstcol; k < ncol; k++) + rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ + - x2 * *Mki2++ - x3 * *Mki3++; + + M0 += 4 * ldm + 4; + } + + if ( firstcol < ncol - 1 ) { /* Do 2 columns */ + Mki0 = M0 + 1; + Mki1 = Mki0 + ldm + 1; + + x0 = rhs[firstcol]; + x1 = rhs[firstcol+1] - x0 * *Mki0++; + + rhs[++firstcol] = x1; + ++firstcol; + + for (k = firstcol; k < ncol; k++) + rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++; + + } + +} + +/* + * Solves a dense upper triangular system. The upper triangular matrix is + * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned + * in the rhs vector. + */ +void +susolve ( ldm, ncol, M, rhs ) +int ldm; /* in */ +int ncol; /* in */ +float *M; /* in */ +float *rhs; /* modified */ +{ + float xj; + int jcol, j, irow; + + jcol = ncol - 1; + + for (j = 0; j < ncol; j++) { + + xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */ + rhs[jcol] = xj; + + for (irow = 0; irow < jcol; irow++) + rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */ + + jcol--; + + } +} + + +/* + * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. + * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. + */ +void smatvec ( ldm, nrow, ncol, M, vec, Mxvec ) + +int ldm; /* in -- leading dimension of M */ +int nrow; /* in */ +int ncol; /* in */ +float *M; /* in */ +float *vec; /* in */ +float *Mxvec; /* in/out */ + +{ + float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7; + float *M0; + register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; + register int firstcol = 0; + int k; + + M0 = &M[0]; + while ( firstcol < ncol - 7 ) { /* Do 8 columns */ + + Mki0 = M0; + Mki1 = Mki0 + ldm; + Mki2 = Mki1 + ldm; + Mki3 = Mki2 + ldm; + Mki4 = Mki3 + ldm; + Mki5 = Mki4 + ldm; + Mki6 = Mki5 + ldm; + Mki7 = Mki6 + ldm; + + vi0 = vec[firstcol++]; + vi1 = vec[firstcol++]; + vi2 = vec[firstcol++]; + vi3 = vec[firstcol++]; + vi4 = vec[firstcol++]; + vi5 = vec[firstcol++]; + vi6 = vec[firstcol++]; + vi7 = vec[firstcol++]; + + for (k = 0; k < nrow; k++) + Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ + + vi2 * *Mki2++ + vi3 * *Mki3++ + + vi4 * *Mki4++ + vi5 * *Mki5++ + + vi6 * *Mki6++ + vi7 * *Mki7++; + + M0 += 8 * ldm; + } + + while ( firstcol < ncol - 3 ) { /* Do 4 columns */ + + Mki0 = M0; + Mki1 = Mki0 + ldm; + Mki2 = Mki1 + ldm; + Mki3 = Mki2 + ldm; + + vi0 = vec[firstcol++]; + vi1 = vec[firstcol++]; + vi2 = vec[firstcol++]; + vi3 = vec[firstcol++]; + for (k = 0; k < nrow; k++) + Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ + + vi2 * *Mki2++ + vi3 * *Mki3++ ; + + M0 += 4 * ldm; + } + + while ( firstcol < ncol ) { /* Do 1 column */ + + Mki0 = M0; + vi0 = vec[firstcol++]; + for (k = 0; k < nrow; k++) + Mxvec[k] += vi0 * *Mki0++; + + M0 += ldm; + } + +} + |