Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/opennl/superlu/ssp_blas2.c')
-rw-r--r--intern/opennl/superlu/ssp_blas2.c469
1 files changed, 469 insertions, 0 deletions
diff --git a/intern/opennl/superlu/ssp_blas2.c b/intern/opennl/superlu/ssp_blas2.c
new file mode 100644
index 00000000000..347f9ab5fd4
--- /dev/null
+++ b/intern/opennl/superlu/ssp_blas2.c
@@ -0,0 +1,469 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ * File name: ssp_blas2.c
+ * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
+ */
+
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
+
+int
+sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
+ SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * sp_strsv() solves one of the systems of equations
+ * A*x = b, or A'*x = b,
+ * where b and x are n element vectors and A is a sparse unit , or
+ * non-unit, upper or lower triangular matrix.
+ * No test for singularity or near-singularity is included in this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Parameters
+ * ==========
+ *
+ * uplo - (input) char*
+ * On entry, uplo specifies whether the matrix is an upper or
+ * lower triangular matrix as follows:
+ * uplo = 'U' or 'u' A is an upper triangular matrix.
+ * uplo = 'L' or 'l' A is a lower triangular matrix.
+ *
+ * trans - (input) char*
+ * On entry, trans specifies the equations to be solved as
+ * follows:
+ * trans = 'N' or 'n' A*x = b.
+ * trans = 'T' or 't' A'*x = b.
+ * trans = 'C' or 'c' A'*x = b.
+ *
+ * diag - (input) char*
+ * On entry, diag specifies whether or not A is unit
+ * triangular as follows:
+ * diag = 'U' or 'u' A is assumed to be unit triangular.
+ * diag = 'N' or 'n' A is not assumed to be unit
+ * triangular.
+ *
+ * L - (input) SuperMatrix*
+ * The factor L from the factorization Pr*A*Pc=L*U. Use
+ * compressed row subscripts storage for supernodes,
+ * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
+ *
+ * U - (input) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U.
+ * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
+ *
+ * x - (input/output) float*
+ * Before entry, the incremented array X must contain the n
+ * element right-hand side vector b. On exit, X is overwritten
+ * with the solution vector x.
+ *
+ * info - (output) int*
+ * If *info = -i, the i-th argument had an illegal value.
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ SCformat *Lstore;
+ NCformat *Ustore;
+ float *Lval, *Uval;
+ int incx = 1;
+ int nrow;
+ int fsupc, nsupr, nsupc, luptr, istart, irow;
+ int i, k, iptr, jcol;
+ float *work;
+ flops_t solve_ops;
+
+ /* Test the input parameters */
+ *info = 0;
+ if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
+ else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
+ !lsame_(trans, "C")) *info = -2;
+ else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
+ else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
+ else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
+ if ( *info ) {
+ i = -(*info);
+ xerbla_("sp_strsv", &i);
+ return 0;
+ }
+
+ Lstore = L->Store;
+ Lval = Lstore->nzval;
+ Ustore = U->Store;
+ Uval = Ustore->nzval;
+ solve_ops = 0;
+
+ if ( !(work = floatCalloc(L->nrow)) )
+ ABORT("Malloc fails for work in sp_strsv().");
+
+ if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
+
+ if ( lsame_(uplo, "L") ) {
+ /* Form x := inv(L)*x */
+ if ( L->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+ nrow = nsupr - nsupc;
+
+ solve_ops += nsupc * (nsupc - 1);
+ solve_ops += 2 * nrow * nsupc;
+
+ if ( nsupc == 1 ) {
+ for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
+ irow = L_SUB(iptr);
+ ++luptr;
+ x[irow] -= x[fsupc] * Lval[luptr];
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+
+ SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
+ &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#else
+ strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+
+ sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
+ &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#endif
+#else
+ slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
+
+ smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
+ &x[fsupc], &work[0] );
+#endif
+
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; ++i, ++iptr) {
+ irow = L_SUB(iptr);
+ x[irow] -= work[i]; /* Scatter */
+ work[i] = 0.0;
+
+ }
+ }
+ } /* for k ... */
+
+ } else {
+ /* Form x := inv(U)*x */
+
+ if ( U->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = Lstore->nsuper; k >= 0; k--) {
+ fsupc = L_FST_SUPC(k);
+ nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += nsupc * (nsupc + 1);
+
+ if ( nsupc == 1 ) {
+ x[fsupc] /= Lval[luptr];
+ for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
+ irow = U_SUB(i);
+ x[irow] -= x[fsupc] * Uval[i];
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+#else
+ susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
+#endif
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
+ i++) {
+ irow = U_SUB(i);
+ x[irow] -= x[jcol] * Uval[i];
+ }
+ }
+ }
+ } /* for k ... */
+
+ }
+ } else { /* Form x := inv(A')*x */
+
+ if ( lsame_(uplo, "L") ) {
+ /* Form x := inv(L')*x */
+ if ( L->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = Lstore->nsuper; k >= 0; --k) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += 2 * (nsupr - nsupc) * nsupc;
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ iptr = istart + nsupc;
+ for (i = L_NZ_START(jcol) + nsupc;
+ i < L_NZ_START(jcol+1); i++) {
+ irow = L_SUB(iptr);
+ x[jcol] -= x[irow] * Lval[i];
+ iptr++;
+ }
+ }
+
+ if ( nsupc > 1 ) {
+ solve_ops += nsupc * (nsupc - 1);
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("T", strlen("T"));
+ ftcs3 = _cptofcd("U", strlen("U"));
+ STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+ }
+ }
+ } else {
+ /* Form x := inv(U')*x */
+ if ( U->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
+ irow = U_SUB(i);
+ x[jcol] -= x[irow] * Uval[i];
+ }
+ }
+
+ solve_ops += nsupc * (nsupc + 1);
+
+ if ( nsupc == 1 ) {
+ x[fsupc] /= Lval[luptr];
+ } else {
+#ifdef _CRAY
+ ftcs1 = _cptofcd("U", strlen("U"));
+ ftcs2 = _cptofcd("T", strlen("T"));
+ ftcs3 = _cptofcd("N", strlen("N"));
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+ }
+ } /* for k ... */
+ }
+ }
+
+ stat->ops[SOLVE] += solve_ops;
+ SUPERLU_FREE(work);
+ return 0;
+}
+
+
+
+
+int
+sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
+ int incx, float beta, float *y, int incy)
+{
+/* Purpose
+ =======
+
+ sp_sgemv() performs one of the matrix-vector operations
+ y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
+ where alpha and beta are scalars, x and y are vectors and A is a
+ sparse A->nrow by A->ncol matrix.
+
+ Parameters
+ ==========
+
+ TRANS - (input) char*
+ On entry, TRANS specifies the operation to be performed as
+ follows:
+ TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
+ TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
+ TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
+
+ ALPHA - (input) float
+ On entry, ALPHA specifies the scalar alpha.
+
+ A - (input) SuperMatrix*
+ Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ Currently, the type of A can be:
+ Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
+ In the future, more general A can be handled.
+
+ X - (input) float*, array of DIMENSION at least
+ ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+ and at least
+ ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+ Before entry, the incremented array X must contain the
+ vector x.
+
+ INCX - (input) int
+ On entry, INCX specifies the increment for the elements of
+ X. INCX must not be zero.
+
+ BETA - (input) float
+ On entry, BETA specifies the scalar beta. When BETA is
+ supplied as zero then Y need not be set on input.
+
+ Y - (output) float*, array of DIMENSION at least
+ ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+ and at least
+ ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+ Before entry with BETA non-zero, the incremented array Y
+ must contain the vector y. On exit, Y is overwritten by the
+ updated vector y.
+
+ INCY - (input) int
+ On entry, INCY specifies the increment for the elements of
+ Y. INCY must not be zero.
+
+ ==== Sparse Level 2 Blas routine.
+*/
+
+ /* Local variables */
+ NCformat *Astore;
+ float *Aval;
+ int info;
+ float temp;
+ int lenx, leny, i, j, irow;
+ int iy, jx, jy, kx, ky;
+ int notran;
+
+ notran = lsame_(trans, "N");
+ Astore = A->Store;
+ Aval = Astore->nzval;
+
+ /* Test the input parameters */
+ info = 0;
+ if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
+ else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
+ else if (incx == 0) info = 5;
+ else if (incy == 0) info = 8;
+ if (info != 0) {
+ xerbla_("sp_sgemv ", &info);
+ return 0;
+ }
+
+ /* Quick return if possible. */
+ if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
+ return 0;
+
+ /* Set LENX and LENY, the lengths of the vectors x and y, and set
+ up the start points in X and Y. */
+ if (lsame_(trans, "N")) {
+ lenx = A->ncol;
+ leny = A->nrow;
+ } else {
+ lenx = A->nrow;
+ leny = A->ncol;
+ }
+ if (incx > 0) kx = 0;
+ else kx = - (lenx - 1) * incx;
+ if (incy > 0) ky = 0;
+ else ky = - (leny - 1) * incy;
+
+ /* Start the operations. In this version the elements of A are
+ accessed sequentially with one pass through A. */
+ /* First form y := beta*y. */
+ if (beta != 1.) {
+ if (incy == 1) {
+ if (beta == 0.)
+ for (i = 0; i < leny; ++i) y[i] = 0.;
+ else
+ for (i = 0; i < leny; ++i) y[i] = beta * y[i];
+ } else {
+ iy = ky;
+ if (beta == 0.)
+ for (i = 0; i < leny; ++i) {
+ y[iy] = 0.;
+ iy += incy;
+ }
+ else
+ for (i = 0; i < leny; ++i) {
+ y[iy] = beta * y[iy];
+ iy += incy;
+ }
+ }
+ }
+
+ if (alpha == 0.) return 0;
+
+ if ( notran ) {
+ /* Form y := alpha*A*x + y. */
+ jx = kx;
+ if (incy == 1) {
+ for (j = 0; j < A->ncol; ++j) {
+ if (x[jx] != 0.) {
+ temp = alpha * x[jx];
+ for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+ irow = Astore->rowind[i];
+ y[irow] += temp * Aval[i];
+ }
+ }
+ jx += incx;
+ }
+ } else {
+ ABORT("Not implemented.");
+ }
+ } else {
+ /* Form y := alpha*A'*x + y. */
+ jy = ky;
+ if (incx == 1) {
+ for (j = 0; j < A->ncol; ++j) {
+ temp = 0.;
+ for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+ irow = Astore->rowind[i];
+ temp += Aval[i] * x[irow];
+ }
+ y[jy] += alpha * temp;
+ jy += incy;
+ }
+ } else {
+ ABORT("Not implemented.");
+ }
+ }
+ return 0;
+} /* sp_sgemv */
+
+
+