Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/opennl/superlu/spanel_bmod.c')
-rw-r--r--intern/opennl/superlu/spanel_bmod.c449
1 files changed, 449 insertions, 0 deletions
diff --git a/intern/opennl/superlu/spanel_bmod.c b/intern/opennl/superlu/spanel_bmod.c
new file mode 100644
index 00000000000..a59a9086df1
--- /dev/null
+++ b/intern/opennl/superlu/spanel_bmod.c
@@ -0,0 +1,449 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void slsolve(int, int, float *, float *);
+void smatvec(int, int, int, float *, float *, float *);
+extern void scheck_tempv();
+
+void
+spanel_bmod (
+ const int m, /* in - number of rows in the matrix */
+ const int w, /* in */
+ const int jcol, /* in */
+ const int nseg, /* in */
+ float *dense, /* out, of size n by w */
+ float *tempv, /* working array */
+ int *segrep, /* in */
+ int *repfnz, /* in, of size n by w */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Performs numeric block updates (sup-panel) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ *
+ * Before entering this routine, the original nonzeros in the panel
+ * were already copied into the spa[m,w].
+ *
+ * Updated/Output parameters-
+ * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned
+ * collectively in the m-by-w vector dense[*].
+ *
+ */
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ int incx = 1, incy = 1;
+ float alpha, beta;
+#endif
+
+ register int k, ksub;
+ int fsupc, nsupc, nsupr, nrow;
+ int krep, krep_ind;
+ float ukj, ukj1, ukj2;
+ int luptr, luptr1, luptr2;
+ int segsze;
+ int block_nrow; /* no of rows in a block row */
+ register int lptr; /* Points to the row subscripts of a supernode */
+ int kfnz, irow, no_zeros;
+ register int isub, isub1, i;
+ register int jj; /* Index through each column in the panel */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ int *repfnz_col; /* repfnz[] for a column in the panel */
+ float *dense_col; /* dense[] for a column in the panel */
+ float *tempv1; /* Used in 1-D update */
+ float *TriTmp, *MatvecTmp; /* used in 2-D update */
+ float zero = 0.0;
+ register int ldaTmp;
+ register int r_ind, r_hi;
+ static int first = 1, maxsuper, rowblk, colblk;
+ flops_t *ops = stat->ops;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ if ( first ) {
+ maxsuper = sp_ienv(3);
+ rowblk = sp_ienv(4);
+ colblk = sp_ienv(5);
+ first = 0;
+ }
+ ldaTmp = maxsuper + rowblk;
+
+ /*
+ * For each nonz supernode segment of U[*,j] in topological order
+ */
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
+
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = no of columns in a supernode
+ * nsupr = no of rows in a supernode
+ */
+ krep = segrep[k--];
+ fsupc = xsup[supno[krep]];
+ nsupc = krep - fsupc + 1;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+ nrow = nsupr - nsupc;
+ lptr = xlsub[fsupc];
+ krep_ind = lptr + nsupc - 1;
+
+ repfnz_col = repfnz;
+ dense_col = dense;
+
+ if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
+
+ TriTmp = tempv;
+
+ /* Sequence through each column in panel -- triangular solves */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ luptr = xlusup[fsupc];
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+ /* Case 1: Update U-segment of size 1 -- col-col update */
+ if ( segsze == 1 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+ irow = lsub[i];
+ dense_col[irow] -= ukj * lusup[luptr];
+ ++luptr;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ ukj1 = dense_col[lsub[krep_ind - 1]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) {
+ ukj -= ukj1 * lusup[luptr1];
+ dense_col[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++; luptr1++;
+ dense_col[irow] -= (ukj*lusup[luptr]
+ + ukj1*lusup[luptr1]);
+ }
+ } else {
+ ukj2 = dense_col[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense_col[lsub[krep_ind]] = ukj;
+ dense_col[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++; luptr1++; luptr2++;
+ dense_col[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+ } else { /* segsze >= 4 */
+
+ /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
+ holds the result of triangular solves. */
+ no_zeros = kfnz - fsupc;
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; ++i) {
+ irow = lsub[isub];
+ TriTmp[i] = dense_col[irow]; /* Gather */
+ ++isub;
+ }
+
+ /* start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, TriTmp, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, TriTmp, &incx );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
+#endif
+
+
+ } /* else ... */
+
+ } /* for jj ... end tri-solves */
+
+ /* Block row updates; push all the way into dense[*] block */
+ for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
+
+ r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
+ block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
+ luptr = xlusup[fsupc] + nsupc + r_ind;
+ isub1 = lptr + nsupc + r_ind;
+
+ repfnz_col = repfnz;
+ TriTmp = tempv;
+ dense_col = dense;
+
+ /* Sequence through each column in panel -- matrix-vector */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ if ( segsze <= 3 ) continue; /* skip unrolled cases */
+
+ /* Perform a block update, and scatter the result of
+ matrix-vector to dense[]. */
+ no_zeros = kfnz - fsupc;
+ luptr1 = luptr + nsupr * no_zeros;
+ MatvecTmp = &TriTmp[maxsuper];
+
+#ifdef USE_VENDOR_BLAS
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
+ &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#else
+ sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
+ &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#endif
+#else
+ smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
+ TriTmp, MatvecTmp);
+#endif
+
+ /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
+ * such that MatvecTmp[*] can be re-used for the
+ * the next blok row update. dense[] will be copied into
+ * global store after the whole panel has been finished.
+ */
+ isub = isub1;
+ for (i = 0; i < block_nrow; i++) {
+ irow = lsub[isub];
+ dense_col[irow] -= MatvecTmp[i];
+ MatvecTmp[i] = zero;
+ ++isub;
+ }
+
+ } /* for jj ... */
+
+ } /* for each block row ... */
+
+ /* Scatter the triangular solves into SPA dense[*] */
+ repfnz_col = repfnz;
+ TriTmp = tempv;
+ dense_col = dense;
+
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ if ( segsze <= 3 ) continue; /* skip unrolled cases */
+
+ no_zeros = kfnz - fsupc;
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense_col[irow] = TriTmp[i];
+ TriTmp[i] = zero;
+ ++isub;
+ }
+
+ } /* for jj ... */
+
+ } else { /* 1-D block modification */
+
+
+ /* Sequence through each column in the panel */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ luptr = xlusup[fsupc];
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+ /* Case 1: Update U-segment of size 1 -- col-col update */
+ if ( segsze == 1 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+ irow = lsub[i];
+ dense_col[irow] -= ukj * lusup[luptr];
+ ++luptr;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ ukj1 = dense_col[lsub[krep_ind - 1]];
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) {
+ ukj -= ukj1 * lusup[luptr1];
+ dense_col[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ ++luptr; ++luptr1;
+ dense_col[irow] -= (ukj*lusup[luptr]
+ + ukj1*lusup[luptr1]);
+ }
+ } else {
+ ukj2 = dense_col[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense_col[lsub[krep_ind]] = ukj;
+ dense_col[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ ++luptr; ++luptr1; ++luptr2;
+ dense_col[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+ } else { /* segsze >= 4 */
+ /*
+ * Perform a triangular solve and block update,
+ * then scatter the result of sup-col update to dense[].
+ */
+ no_zeros = kfnz - fsupc;
+
+ /* Copy U[*,j] segment from dense[*] to tempv[*]:
+ * The result of triangular solve is in tempv[*];
+ * The result of matrix vector update is in dense_col[*]
+ */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; ++i) {
+ irow = lsub[isub];
+ tempv[i] = dense_col[irow]; /* Gather */
+ ++isub;
+ }
+
+ /* start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#endif
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+ sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
+#endif
+
+ /* Scatter tempv[*] into SPA dense[*] temporarily, such
+ * that tempv[*] can be used for the triangular solve of
+ * the next column of the panel. They will be copied into
+ * ucol[*] after the whole panel has been finished.
+ */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense_col[irow] = tempv[i];
+ tempv[i] = zero;
+ isub++;
+ }
+
+ /* Scatter the update from tempv1[*] into SPA dense[*] */
+ /* Start dense rectangular L */
+ for (i = 0; i < nrow; i++) {
+ irow = lsub[isub];
+ dense_col[irow] -= tempv1[i];
+ tempv1[i] = zero;
+ ++isub;
+ }
+
+ } /* else segsze>=4 ... */
+
+ } /* for each column in the panel... */
+
+ } /* else 1-D update ... */
+
+ } /* for each updating supernode ... */
+
+}
+
+
+