diff options
Diffstat (limited to 'intern/opennl/superlu/ssp_blas2.c')
-rw-r--r-- | intern/opennl/superlu/ssp_blas2.c | 469 |
1 files changed, 469 insertions, 0 deletions
diff --git a/intern/opennl/superlu/ssp_blas2.c b/intern/opennl/superlu/ssp_blas2.c new file mode 100644 index 00000000000..347f9ab5fd4 --- /dev/null +++ b/intern/opennl/superlu/ssp_blas2.c @@ -0,0 +1,469 @@ + +/* + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + */ +/* + * File name: ssp_blas2.c + * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. + */ + +#include "ssp_defs.h" + +/* + * Function prototypes + */ +void susolve(int, int, float*, float*); +void slsolve(int, int, float*, float*); +void smatvec(int, int, int, float*, float*, float*); +int strsv_(char*, char*, char*, int*, float*, int*, float*, int*); + +int +sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, + SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info) +{ +/* + * Purpose + * ======= + * + * sp_strsv() solves one of the systems of equations + * A*x = b, or A'*x = b, + * where b and x are n element vectors and A is a sparse unit , or + * non-unit, upper or lower triangular matrix. + * No test for singularity or near-singularity is included in this + * routine. Such tests must be performed before calling this routine. + * + * Parameters + * ========== + * + * uplo - (input) char* + * On entry, uplo specifies whether the matrix is an upper or + * lower triangular matrix as follows: + * uplo = 'U' or 'u' A is an upper triangular matrix. + * uplo = 'L' or 'l' A is a lower triangular matrix. + * + * trans - (input) char* + * On entry, trans specifies the equations to be solved as + * follows: + * trans = 'N' or 'n' A*x = b. + * trans = 'T' or 't' A'*x = b. + * trans = 'C' or 'c' A'*x = b. + * + * diag - (input) char* + * On entry, diag specifies whether or not A is unit + * triangular as follows: + * diag = 'U' or 'u' A is assumed to be unit triangular. + * diag = 'N' or 'n' A is not assumed to be unit + * triangular. + * + * L - (input) SuperMatrix* + * The factor L from the factorization Pr*A*Pc=L*U. Use + * compressed row subscripts storage for supernodes, + * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU. + * + * U - (input) SuperMatrix* + * The factor U from the factorization Pr*A*Pc=L*U. + * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU. + * + * x - (input/output) float* + * Before entry, the incremented array X must contain the n + * element right-hand side vector b. On exit, X is overwritten + * with the solution vector x. + * + * info - (output) int* + * If *info = -i, the i-th argument had an illegal value. + * + */ +#ifdef _CRAY + _fcd ftcs1 = _cptofcd("L", strlen("L")), + ftcs2 = _cptofcd("N", strlen("N")), + ftcs3 = _cptofcd("U", strlen("U")); +#endif + SCformat *Lstore; + NCformat *Ustore; + float *Lval, *Uval; + int incx = 1; + int nrow; + int fsupc, nsupr, nsupc, luptr, istart, irow; + int i, k, iptr, jcol; + float *work; + flops_t solve_ops; + + /* Test the input parameters */ + *info = 0; + if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; + else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && + !lsame_(trans, "C")) *info = -2; + else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; + else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; + else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; + if ( *info ) { + i = -(*info); + xerbla_("sp_strsv", &i); + return 0; + } + + Lstore = L->Store; + Lval = Lstore->nzval; + Ustore = U->Store; + Uval = Ustore->nzval; + solve_ops = 0; + + if ( !(work = floatCalloc(L->nrow)) ) + ABORT("Malloc fails for work in sp_strsv()."); + + if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */ + + if ( lsame_(uplo, "L") ) { + /* Form x := inv(L)*x */ + if ( L->nrow == 0 ) return 0; /* Quick return */ + + for (k = 0; k <= Lstore->nsuper; k++) { + fsupc = L_FST_SUPC(k); + istart = L_SUB_START(fsupc); + nsupr = L_SUB_START(fsupc+1) - istart; + nsupc = L_FST_SUPC(k+1) - fsupc; + luptr = L_NZ_START(fsupc); + nrow = nsupr - nsupc; + + solve_ops += nsupc * (nsupc - 1); + solve_ops += 2 * nrow * nsupc; + + if ( nsupc == 1 ) { + for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { + irow = L_SUB(iptr); + ++luptr; + x[irow] -= x[fsupc] * Lval[luptr]; + } + } else { +#ifdef USE_VENDOR_BLAS +#ifdef _CRAY + STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); + + SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], + &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); +#else + strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); + + sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], + &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); +#endif +#else + slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); + + smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], + &x[fsupc], &work[0] ); +#endif + + iptr = istart + nsupc; + for (i = 0; i < nrow; ++i, ++iptr) { + irow = L_SUB(iptr); + x[irow] -= work[i]; /* Scatter */ + work[i] = 0.0; + + } + } + } /* for k ... */ + + } else { + /* Form x := inv(U)*x */ + + if ( U->nrow == 0 ) return 0; /* Quick return */ + + for (k = Lstore->nsuper; k >= 0; k--) { + fsupc = L_FST_SUPC(k); + nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); + nsupc = L_FST_SUPC(k+1) - fsupc; + luptr = L_NZ_START(fsupc); + + solve_ops += nsupc * (nsupc + 1); + + if ( nsupc == 1 ) { + x[fsupc] /= Lval[luptr]; + for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { + irow = U_SUB(i); + x[irow] -= x[fsupc] * Uval[i]; + } + } else { +#ifdef USE_VENDOR_BLAS +#ifdef _CRAY + STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); +#else + strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); +#endif +#else + susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] ); +#endif + + for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { + solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); + for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); + i++) { + irow = U_SUB(i); + x[irow] -= x[jcol] * Uval[i]; + } + } + } + } /* for k ... */ + + } + } else { /* Form x := inv(A')*x */ + + if ( lsame_(uplo, "L") ) { + /* Form x := inv(L')*x */ + if ( L->nrow == 0 ) return 0; /* Quick return */ + + for (k = Lstore->nsuper; k >= 0; --k) { + fsupc = L_FST_SUPC(k); + istart = L_SUB_START(fsupc); + nsupr = L_SUB_START(fsupc+1) - istart; + nsupc = L_FST_SUPC(k+1) - fsupc; + luptr = L_NZ_START(fsupc); + + solve_ops += 2 * (nsupr - nsupc) * nsupc; + + for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { + iptr = istart + nsupc; + for (i = L_NZ_START(jcol) + nsupc; + i < L_NZ_START(jcol+1); i++) { + irow = L_SUB(iptr); + x[jcol] -= x[irow] * Lval[i]; + iptr++; + } + } + + if ( nsupc > 1 ) { + solve_ops += nsupc * (nsupc - 1); +#ifdef _CRAY + ftcs1 = _cptofcd("L", strlen("L")); + ftcs2 = _cptofcd("T", strlen("T")); + ftcs3 = _cptofcd("U", strlen("U")); + STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); +#else + strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); +#endif + } + } + } else { + /* Form x := inv(U')*x */ + if ( U->nrow == 0 ) return 0; /* Quick return */ + + for (k = 0; k <= Lstore->nsuper; k++) { + fsupc = L_FST_SUPC(k); + nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); + nsupc = L_FST_SUPC(k+1) - fsupc; + luptr = L_NZ_START(fsupc); + + for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { + solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); + for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { + irow = U_SUB(i); + x[jcol] -= x[irow] * Uval[i]; + } + } + + solve_ops += nsupc * (nsupc + 1); + + if ( nsupc == 1 ) { + x[fsupc] /= Lval[luptr]; + } else { +#ifdef _CRAY + ftcs1 = _cptofcd("U", strlen("U")); + ftcs2 = _cptofcd("T", strlen("T")); + ftcs3 = _cptofcd("N", strlen("N")); + STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); +#else + strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, + &x[fsupc], &incx); +#endif + } + } /* for k ... */ + } + } + + stat->ops[SOLVE] += solve_ops; + SUPERLU_FREE(work); + return 0; +} + + + + +int +sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, + int incx, float beta, float *y, int incy) +{ +/* Purpose + ======= + + sp_sgemv() performs one of the matrix-vector operations + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + where alpha and beta are scalars, x and y are vectors and A is a + sparse A->nrow by A->ncol matrix. + + Parameters + ========== + + TRANS - (input) char* + On entry, TRANS specifies the operation to be performed as + follows: + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + ALPHA - (input) float + On entry, ALPHA specifies the scalar alpha. + + A - (input) SuperMatrix* + Matrix A with a sparse format, of dimension (A->nrow, A->ncol). + Currently, the type of A can be: + Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. + In the future, more general A can be handled. + + X - (input) float*, array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + + INCX - (input) int + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + + BETA - (input) float + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + + Y - (output) float*, array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - (input) int + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + + ==== Sparse Level 2 Blas routine. +*/ + + /* Local variables */ + NCformat *Astore; + float *Aval; + int info; + float temp; + int lenx, leny, i, j, irow; + int iy, jx, jy, kx, ky; + int notran; + + notran = lsame_(trans, "N"); + Astore = A->Store; + Aval = Astore->nzval; + + /* Test the input parameters */ + info = 0; + if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; + else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; + else if (incx == 0) info = 5; + else if (incy == 0) info = 8; + if (info != 0) { + xerbla_("sp_sgemv ", &info); + return 0; + } + + /* Quick return if possible. */ + if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.)) + return 0; + + /* Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. */ + if (lsame_(trans, "N")) { + lenx = A->ncol; + leny = A->nrow; + } else { + lenx = A->nrow; + leny = A->ncol; + } + if (incx > 0) kx = 0; + else kx = - (lenx - 1) * incx; + if (incy > 0) ky = 0; + else ky = - (leny - 1) * incy; + + /* Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. */ + /* First form y := beta*y. */ + if (beta != 1.) { + if (incy == 1) { + if (beta == 0.) + for (i = 0; i < leny; ++i) y[i] = 0.; + else + for (i = 0; i < leny; ++i) y[i] = beta * y[i]; + } else { + iy = ky; + if (beta == 0.) + for (i = 0; i < leny; ++i) { + y[iy] = 0.; + iy += incy; + } + else + for (i = 0; i < leny; ++i) { + y[iy] = beta * y[iy]; + iy += incy; + } + } + } + + if (alpha == 0.) return 0; + + if ( notran ) { + /* Form y := alpha*A*x + y. */ + jx = kx; + if (incy == 1) { + for (j = 0; j < A->ncol; ++j) { + if (x[jx] != 0.) { + temp = alpha * x[jx]; + for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { + irow = Astore->rowind[i]; + y[irow] += temp * Aval[i]; + } + } + jx += incx; + } + } else { + ABORT("Not implemented."); + } + } else { + /* Form y := alpha*A'*x + y. */ + jy = ky; + if (incx == 1) { + for (j = 0; j < A->ncol; ++j) { + temp = 0.; + for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { + irow = Astore->rowind[i]; + temp += Aval[i] * x[irow]; + } + y[jy] += alpha * temp; + jy += incy; + } + } else { + ABORT("Not implemented."); + } + } + return 0; +} /* sp_sgemv */ + + + |