Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/opennl/superlu/scolumn_bmod.c')
-rw-r--r--intern/opennl/superlu/scolumn_bmod.c353
1 files changed, 353 insertions, 0 deletions
diff --git a/intern/opennl/superlu/scolumn_bmod.c b/intern/opennl/superlu/scolumn_bmod.c
new file mode 100644
index 00000000000..c877a27dd53
--- /dev/null
+++ b/intern/opennl/superlu/scolumn_bmod.c
@@ -0,0 +1,353 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+
+/* Return value: 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ */
+int
+scolumn_bmod (
+ const int jcol, /* in */
+ const int nseg, /* in */
+ float *dense, /* in */
+ float *tempv, /* working array */
+ int *segrep, /* in */
+ int *repfnz, /* in */
+ int fpanelc, /* in -- first column in the current panel */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+
+#ifdef USE_VENDOR_BLAS
+ int incx = 1, incy = 1;
+ float alpha, beta;
+#endif
+
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = no of columns in supernode
+ * nsupr = no of rows in supernode (used as leading dimension)
+ * luptr = location of supernodal LU-block in storage
+ * kfnz = first nonz in the k-th supernodal segment
+ * no_zeros = no of leading zeros in a supernodal U-segment
+ */
+ float ukj, ukj1, ukj2;
+ int luptr, luptr1, luptr2;
+ int fsupc, nsupc, nsupr, segsze;
+ int nrow; /* No of rows in the matrix of matrix-vector */
+ int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
+ register int lptr, kfnz, isub, irow, i;
+ register int no_zeros, new_next;
+ int ufirst, nextlu;
+ int fst_col; /* First column within small LU update */
+ int d_fsupc; /* Distance between the first column of the current
+ panel and the first column of the current snode. */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ int nzlumax;
+ float *tempv1;
+ float zero = 0.0;
+#ifdef USE_VENDOR_BLAS
+ float one = 1.0;
+ float none = -1.0;
+#endif
+ int mem_error;
+ flops_t *ops = stat->ops;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ nzlumax = Glu->nzlumax;
+ jcolp1 = jcol + 1;
+ jsupno = supno[jcol];
+
+ /*
+ * For each nonz supernode segment of U[*,j] in topological order
+ */
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) {
+
+ krep = segrep[k];
+ k--;
+ ksupno = supno[krep];
+ if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
+
+ fsupc = xsup[ksupno];
+ fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+ /* Distance from the current supernode to the current panel;
+ d_fsupc=0 if fsupc > fpanelc. */
+ d_fsupc = fst_col - fsupc;
+
+ luptr = xlusup[fst_col] + d_fsupc;
+ lptr = xlsub[fsupc] + d_fsupc;
+
+ kfnz = repfnz[krep];
+ kfnz = SUPERLU_MAX ( kfnz, fpanelc );
+
+ segsze = krep - kfnz + 1;
+ nsupc = krep - fst_col + 1;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
+ nrow = nsupr - d_fsupc - nsupc;
+ krep_ind = lptr + nsupc - 1;
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+
+ /*
+ * Case 1: Update U-segment of size 1 -- col-col update
+ */
+ if ( segsze == 1 ) {
+ ukj = dense[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ dense[irow] -= ukj*lusup[luptr];
+ luptr++;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ ukj1 = dense[lsub[krep_ind - 1]];
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) { /* Case 2: 2cols-col update */
+ ukj -= ukj1 * lusup[luptr1];
+ dense[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++;
+ luptr1++;
+ dense[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] );
+ }
+ } else { /* Case 3: 3cols-col update */
+ ukj2 = dense[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense[lsub[krep_ind]] = ukj;
+ dense[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++;
+ luptr1++;
+ luptr2++;
+ dense[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+
+
+ } else {
+ /*
+ * Case: sup-col update
+ * Perform a triangular solve and block update,
+ * then scatter the result of sup-col update to dense
+ */
+
+ no_zeros = kfnz - fst_col;
+
+ /* Copy U[*,j] segment from dense[*] to tempv[*] */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ tempv[i] = dense[irow];
+ ++isub;
+ }
+
+ /* Dense triangular solve -- start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#endif
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+ sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
+#endif
+
+
+ /* Scatter tempv[] into SPA dense[] as a temporary storage */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense[irow] = tempv[i];
+ tempv[i] = zero;
+ ++isub;
+ }
+
+ /* Scatter tempv1[] into SPA dense[] */
+ for (i = 0; i < nrow; i++) {
+ irow = lsub[isub];
+ dense[irow] -= tempv1[i];
+ tempv1[i] = zero;
+ ++isub;
+ }
+ }
+
+ } /* if jsupno ... */
+
+ } /* for each segment... */
+
+ /*
+ * Process the supernodal portion of L\U[*,j]
+ */
+ nextlu = xlusup[jcol];
+ fsupc = xsup[jsupno];
+
+ /* Copy the SPA dense into L\U[*,j] */
+ new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
+ while ( new_next > nzlumax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
+ return (mem_error);
+ lusup = Glu->lusup;
+ lsub = Glu->lsub;
+ }
+
+ for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+ irow = lsub[isub];
+ lusup[nextlu] = dense[irow];
+ dense[irow] = zero;
+ ++nextlu;
+ }
+
+ xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
+
+ /* For more updates within the panel (also within the current supernode),
+ * should start from the first column of the panel, or the first column
+ * of the supernode, whichever is bigger. There are 2 cases:
+ * 1) fsupc < fpanelc, then fst_col := fpanelc
+ * 2) fsupc >= fpanelc, then fst_col := fsupc
+ */
+ fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+ if ( fst_col < jcol ) {
+
+ /* Distance between the current supernode and the current panel.
+ d_fsupc=0 if fsupc >= fpanelc. */
+ d_fsupc = fst_col - fsupc;
+
+ lptr = xlsub[fsupc] + d_fsupc;
+ luptr = xlusup[fst_col] + d_fsupc;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
+ nsupc = jcol - fst_col; /* Excluding jcol */
+ nrow = nsupr - d_fsupc - nsupc;
+
+ /* Points to the beginning of jcol in snode L\U(jsupno) */
+ ufirst = xlusup[jcol] + d_fsupc;
+
+ ops[TRSV] += nsupc * (nsupc - 1);
+ ops[GEMV] += 2 * nrow * nsupc;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
+ &nsupr, &lusup[ufirst], &incx );
+#else
+ strsv_( "L", "N", "U", &nsupc, &lusup[luptr],
+ &nsupr, &lusup[ufirst], &incx );
+#endif
+
+ alpha = none; beta = one; /* y := beta*y + alpha*A*x */
+
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+ sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+ slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+
+ smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
+ &lusup[ufirst], tempv );
+
+ /* Copy updates from tempv[*] into lusup[*] */
+ isub = ufirst + nsupc;
+ for (i = 0; i < nrow; i++) {
+ lusup[isub] -= tempv[i];
+ tempv[i] = 0.0;
+ ++isub;
+ }
+
+#endif
+
+
+ } /* if fst_col < jcol ... */
+
+ return 0;
+}