diff options
42 files changed, 46 insertions, 9991 deletions
diff --git a/intern/opennl/CMakeLists.txt b/intern/opennl/CMakeLists.txt index 32bd438d531..9416bc2b9ea 100644 --- a/intern/opennl/CMakeLists.txt +++ b/intern/opennl/CMakeLists.txt @@ -43,51 +43,16 @@ add_definitions( set(INC extern - superlu ) set(INC_SYS ../../extern/colamd/Include + ../../extern/Eigen3 ) set(SRC intern/opennl.cpp - superlu/get_perm_c.c - superlu/heap_relax_snode.c - superlu/lsame.c - superlu/memory.c - superlu/mmd.c - superlu/relax_snode.c - superlu/scolumn_bmod.c - superlu/scolumn_dfs.c - superlu/scopy_to_ucol.c - superlu/sgssv.c - superlu/sgstrf.c - superlu/sgstrs.c - superlu/smemory.c - superlu/smyblas2.c - superlu/sp_coletree.c - superlu/sp_ienv.c - superlu/sp_preorder.c - superlu/spanel_bmod.c - superlu/spanel_dfs.c - superlu/spivotL.c - superlu/spruneL.c - superlu/ssnode_bmod.c - superlu/ssnode_dfs.c - superlu/ssp_blas2.c - superlu/ssp_blas3.c - superlu/strsv.c - superlu/superlu_timer.c - superlu/sutil.c - superlu/util.c - superlu/xerbla.c - extern/ONL_opennl.h - superlu/Cnames.h - superlu/ssp_defs.h - superlu/supermatrix.h - superlu/util.h ) blender_add_lib(bf_intern_opennl "${SRC}" "${INC}" "${INC_SYS}") diff --git a/intern/opennl/SConscript b/intern/opennl/SConscript index dcd9ea4a985..99df29b780a 100644 --- a/intern/opennl/SConscript +++ b/intern/opennl/SConscript @@ -27,9 +27,9 @@ Import ('env') -sources = env.Glob('intern/*.cpp') + env.Glob('superlu/*.c') +sources = env.Glob('intern/*.cpp') -incs = 'extern superlu ../../extern/colamd/Include' +incs = 'extern ../../extern/colamd/Include ../../extern/Eigen3' env.BlenderLib ('bf_intern_opennl', sources, Split(incs), [], libtype=['intern','player'], priority=[100,90] ) diff --git a/intern/opennl/doc/OpenNL_License.txt b/intern/opennl/doc/OpenNL_License.txt deleted file mode 100644 index 4e8d97fd526..00000000000 --- a/intern/opennl/doc/OpenNL_License.txt +++ /dev/null @@ -1,341 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - - END OF TERMS AND CONDITIONS - - Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) 19yy <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - diff --git a/intern/opennl/doc/OpenNL_Readme.txt b/intern/opennl/doc/OpenNL_Readme.txt deleted file mode 100644 index e6aea3c0286..00000000000 --- a/intern/opennl/doc/OpenNL_Readme.txt +++ /dev/null @@ -1,13 +0,0 @@ - -This is OpenNL, a library to easily construct and solve sparse linear systems. -* OpenNL is supplied with a set of iterative solvers (Conjugate gradient, - BICGSTAB, GMRes) and preconditioners (Jacobi, SSOR). -* OpenNL can also use other solvers (SuperLU 3.0 supported as an OpenNL - extension) - -Note that to be compatible with OpenNL, SuperLU 3.0 needs to be compiled with -the following flag (see make.inc in SuperLU3.0): -CDEFS = -DAdd_ (the default is -DAdd__, just remove the second underscore) - -OpenNL was modified for Blender to be used only as a wrapper for SuperLU. - diff --git a/intern/opennl/doc/SuperLU_License.txt b/intern/opennl/doc/SuperLU_License.txt deleted file mode 100644 index f31a01782e2..00000000000 --- a/intern/opennl/doc/SuperLU_License.txt +++ /dev/null @@ -1,31 +0,0 @@ -Copyright (c) 2003, The Regents of the University of California, through -Lawrence Berkeley National Laboratory (subject to receipt of any required -approvals from U.S. Dept. of Energy) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, -are permitted provided that the following conditions are met: - -(1) Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -(2) Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. -(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of -Energy nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/intern/opennl/doc/SuperLU_Readme.txt b/intern/opennl/doc/SuperLU_Readme.txt deleted file mode 100644 index c1cedd09893..00000000000 --- a/intern/opennl/doc/SuperLU_Readme.txt +++ /dev/null @@ -1,52 +0,0 @@ - SuperLU (Version 3.0) - ===================== - -Copyright (c) 2003, The Regents of the University of California, through -Lawrence Berkeley National Laboratory (subject to receipt of any required -approvals from U.S. Dept. of Energy) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -(1) Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. -(2) Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. -(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of -Energy nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -SuperLU contains a set of subroutines to solve a sparse linear system -A*X=B. It uses Gaussian elimination with partial pivoting (GEPP). -The columns of A may be preordered before factorization; the -preordering for sparsity is completely separate from the factorization. - -SuperLU is implemented in ANSI C, and must be compiled with standard -ANSI C compilers. It provides functionality for both real and complex -matrices, in both single and double precision. The file names for the -single-precision real version start with letter "s" (such as sgstrf.c); -the file names for the double-precision real version start with letter "d" -(such as dgstrf.c); the file names for the single-precision complex -version start with letter "c" (such as cgstrf.c); the file names -for the double-precision complex version start with letter "z" -(such as zgstrf.c). - -SuperLU was modified for Blender to only include single-precision -functionality. - diff --git a/intern/opennl/extern/ONL_opennl.h b/intern/opennl/extern/ONL_opennl.h index 6a36e13542f..d550a638693 100644 --- a/intern/opennl/extern/ONL_opennl.h +++ b/intern/opennl/extern/ONL_opennl.h @@ -42,8 +42,6 @@ #define NL_PARANOID */ -#define NL_USE_SUPERLU - #ifndef nlOPENNL_H #define nlOPENNL_H diff --git a/intern/opennl/intern/opennl.cpp b/intern/opennl/intern/opennl.cpp index dbe59c9dec9..085375b0d8c 100644 --- a/intern/opennl/intern/opennl.cpp +++ b/intern/opennl/intern/opennl.cpp @@ -51,9 +51,11 @@ #endif #endif -/* SuperLU includes */ -#include <ssp_defs.h> -#include <util.h> +#include <Eigen/Sparse> +#include <iostream> + +typedef Eigen::SparseMatrix<double, Eigen::ColMajor> EigenSparseMatrix; +typedef Eigen::SparseLU<EigenSparseMatrix> EigenSparseSolver; /************************************************************************************/ /* Assertions */ @@ -316,24 +318,6 @@ static void __nlSparseMatrixClear( __NLSparseMatrix* M) { __NL_CLEAR_ARRAY(NLdouble, M->diag, M->diag_size); } -/* Returns the number of non-zero coefficients */ -static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) { - NLuint nnz = 0; - NLuint i; - if(M->storage & __NL_ROWS) { - for(i = 0; i<M->m; i++) { - nnz += M->row[i].size; - } - } else if (M->storage & __NL_COLUMNS) { - for(i = 0; i<M->n; i++) { - nnz += M->column[i].size; - } - } else { - __nl_assert_not_reached; - } - return nnz; -} - /************************************************************************************/ /* SparseMatrix x Vector routines, internal helper routines */ @@ -523,12 +507,9 @@ typedef struct { NLdouble error; __NLMatrixFunc matrix_vector_prod; - struct __NLSuperLUContext { - NLboolean alloc_slu; - SuperMatrix L, U; - NLint *perm_c, *perm_r; - SuperLUStat_t stat; - } slu; + struct __NLEigenContext { + EigenSparseSolver *sparse_solver; + } eigen; } __NLContext; static __NLContext* __nlCurrentContext = NULL; @@ -547,7 +528,7 @@ NLContext nlNewContext(void) { return result; } -static void __nlFree_SUPERLU(__NLContext *context); +static void __nlFree_EIGEN(__NLContext *context); void nlDeleteContext(NLContext context_in) { __NLContext* context = (__NLContext*)(context_in); @@ -581,8 +562,8 @@ void nlDeleteContext(NLContext context_in) { if(context->alloc_x) { __NL_DELETE_ARRAY(context->x); } - if (context->slu.alloc_slu) { - __nlFree_SUPERLU(context); + if (context->eigen.sparse_solver) { + __nlFree_EIGEN(context); } #ifdef NL_PARANOID @@ -914,31 +895,15 @@ void nlEnd(NLenum prim) { } /************************************************************************/ -/* SuperLU wrapper */ +/* Eigen wrapper */ -/* Note: SuperLU is difficult to call, but it is worth it. */ +/* Note: Eigen is difficult to call, but it is worth it. */ /* Here is a driver inspired by A. Sheffer's "cow flattener". */ -static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) { +static NLboolean __nlFactorize_EIGEN(__NLContext *context, NLint *permutation) { /* OpenNL Context */ __NLSparseMatrix* M = (context->least_squares)? &context->MtM: &context->M; NLuint n = context->n; - NLuint nnz = __nlSparseMatrixNNZ(M); /* number of non-zero coeffs */ - - /*if(n > 10) - n = 10;*/ - - /* Compressed Row Storage matrix representation */ - NLint *xa = __NL_NEW_ARRAY(NLint, n+1); - NLdouble *rhs = __NL_NEW_ARRAY(NLdouble, n); - NLdouble *a = __NL_NEW_ARRAY(NLdouble, nnz); - NLint *asub = __NL_NEW_ARRAY(NLint, nnz); - NLint *etree = __NL_NEW_ARRAY(NLint, n); - - /* SuperLU variables */ - SuperMatrix At, AtP; - NLint info, panel_size, relax; - superlu_options_t options; /* Temporary variables */ NLuint i, jj, count; @@ -946,121 +911,57 @@ static NLboolean __nlFactorize_SUPERLU(__NLContext *context, NLint *permutation) __nl_assert(!(M->storage & __NL_SYMMETRIC)); __nl_assert(M->storage & __NL_ROWS); __nl_assert(M->m == M->n); - + /* Convert M to compressed column format */ + EigenSparseMatrix A(M->m, M->n); + for(i=0, count=0; i<n; i++) { __NLRowColumn *Ri = M->row + i; - xa[i] = count; - for(jj=0; jj<Ri->size; jj++, count++) { - a[count] = Ri->coeff[jj].value; - asub[count] = Ri->coeff[jj].index; - } + for(jj=0; jj<Ri->size; jj++, count++) + A.insert(i, Ri->coeff[jj].index) = Ri->coeff[jj].value; } - xa[n] = nnz; + + A.makeCompressed(); /* Free M, don't need it anymore at this point */ __nlSparseMatrixClear(M); - /* Create superlu A matrix transposed */ - sCreate_CompCol_Matrix( - &At, n, n, nnz, a, asub, xa, - SLU_NC, /* Colum wise, no supernode */ - SLU_S, /* doubles */ - SLU_GE /* general storage */ - ); - - /* Set superlu options */ - set_default_options(&options); - options.ColPerm = MY_PERMC; - options.Fact = DOFACT; - - StatInit(&(context->slu.stat)); - - panel_size = sp_ienv(1); /* sp_ienv give us the defaults */ - relax = sp_ienv(2); - - /* Compute permutation and permuted matrix */ - context->slu.perm_r = __NL_NEW_ARRAY(NLint, n); - context->slu.perm_c = __NL_NEW_ARRAY(NLint, n); - - if ((permutation == NULL) || (*permutation == -1)) { - get_perm_c(3, &At, context->slu.perm_c); - - if (permutation) - memcpy(permutation, context->slu.perm_c, sizeof(NLint)*n); - } - else - memcpy(context->slu.perm_c, permutation, sizeof(NLint)*n); - - sp_preorder(&options, &At, context->slu.perm_c, etree, &AtP); - - /* Decompose into L and U */ - sgstrf(&options, &AtP, relax, panel_size, - etree, NULL, 0, context->slu.perm_c, context->slu.perm_r, - &(context->slu.L), &(context->slu.U), &(context->slu.stat), &info); - - /* Cleanup */ + /* Performance Sparse LU factorization */ + EigenSparseSolver *sparse_solver = new EigenSparseSolver(); + context->eigen.sparse_solver = sparse_solver; - Destroy_SuperMatrix_Store(&At); - Destroy_CompCol_Permuted(&AtP); + sparse_solver->analyzePattern(A); + sparse_solver->factorize(A); - __NL_DELETE_ARRAY(etree); - __NL_DELETE_ARRAY(xa); - __NL_DELETE_ARRAY(rhs); - __NL_DELETE_ARRAY(a); - __NL_DELETE_ARRAY(asub); - - context->slu.alloc_slu = NL_TRUE; - - return (info == 0); + return (sparse_solver->info() == Eigen::Success); } -static NLboolean __nlInvert_SUPERLU(__NLContext *context) { +static NLboolean __nlInvert_EIGEN(__NLContext *context) { /* OpenNL Context */ NLdouble* b = (context->least_squares)? context->Mtb: context->b; NLdouble* x = context->x; NLuint n = context->n, j; - /* SuperLU variables */ - SuperMatrix B; - NLint info = 0; - + /* Solve each right hand side */ for(j=0; j<context->nb_rhs; j++, b+=n, x+=n) { - /* Create superlu array for B */ - sCreate_Dense_Matrix( - &B, n, 1, b, n, - SLU_DN, /* Fortran-type column-wise storage */ - SLU_S, /* doubles */ - SLU_GE /* general */ - ); - - /* Forward/Back substitution to compute x */ - sgstrs(TRANS, &(context->slu.L), &(context->slu.U), - context->slu.perm_c, context->slu.perm_r, &B, - &(context->slu.stat), &info); - - if(info == 0) - memcpy(x, ((DNformat*)B.Store)->nzval, sizeof(*x)*n); - - Destroy_SuperMatrix_Store(&B); - } + Eigen::Map<Eigen::VectorXd> eigen_b(b, n); - return (info == 0); -} + Eigen::VectorXd eigen_x = context->eigen.sparse_solver->solve(eigen_b); + for (NLuint i = 0; i < n; i++) + x[i] = eigen_x[i]; -static void __nlFree_SUPERLU(__NLContext *context) { - - Destroy_SuperNode_Matrix(&(context->slu.L)); - Destroy_CompCol_Matrix(&(context->slu.U)); - - StatFree(&(context->slu.stat)); + if (context->eigen.sparse_solver->info() != Eigen::Success) + return false; + } - __NL_DELETE_ARRAY(context->slu.perm_r); - __NL_DELETE_ARRAY(context->slu.perm_c); + return true; +} - context->slu.alloc_slu = NL_FALSE; +static void __nlFree_EIGEN(__NLContext *context) { + delete context->eigen.sparse_solver; + context->eigen.sparse_solver = NULL; } void nlPrintMatrix(void) { @@ -1129,10 +1030,10 @@ NLboolean nlSolveAdvanced(NLint *permutation, NLboolean solveAgain) { __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED); if (!__nlCurrentContext->solve_again) - result = __nlFactorize_SUPERLU(__nlCurrentContext, permutation); + result = __nlFactorize_EIGEN(__nlCurrentContext, permutation); if (result) { - result = __nlInvert_SUPERLU(__nlCurrentContext); + result = __nlInvert_EIGEN(__nlCurrentContext); if (result) { __nlVectorToVariables(); diff --git a/intern/opennl/superlu/Cnames.h b/intern/opennl/superlu/Cnames.h deleted file mode 100644 index 1be2aa8962a..00000000000 --- a/intern/opennl/superlu/Cnames.h +++ /dev/null @@ -1,284 +0,0 @@ -/** \file opennl/superlu/Cnames.h - * \ingroup opennl - */ -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 1, 1997 - * - */ -#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */ -#define __SUPERLU_CNAMES - -/* We want this flag, safer than putting in build system */ -#define Add_ - -/* - * These macros define how C routines will be called. ADD_ assumes that - * they will be called by fortran, which expects C routines to have an - * underscore postfixed to the name (Suns, and the Intel expect this). - * NOCHANGE indicates that fortran will be calling, and that it expects - * the name called by fortran to be identical to that compiled by the C - * (RS6K's do this). UPCASE says it expects C routines called by fortran - * to be in all upcase (CRAY wants this). - */ - -#define ADD_ 0 -#define ADD__ 1 -#define NOCHANGE 2 -#define UPCASE 3 -#define C_CALL 4 - -#ifdef UpCase -#define F77_CALL_C UPCASE -#endif - -#ifdef NoChange -#define F77_CALL_C NOCHANGE -#endif - -#ifdef Add_ -#define F77_CALL_C ADD_ -#endif - -#ifdef Add__ -#define F77_CALL_C ADD__ -#endif - -/* Default */ -#ifndef F77_CALL_C -#define F77_CALL_C ADD_ -#endif - - -#if (F77_CALL_C == ADD_) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * No redefinition necessary to have following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm_(...) - * - * This is the default. - */ - -#endif - -#if (F77_CALL_C == ADD__) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * for following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm__(...) - */ -#define sasum_ sasum__ -#define isamax_ isamax__ -#define scopy_ scopy__ -#define sscal_ sscal__ -#define sger_ sger__ -#define snrm2_ snrm2__ -#define ssymv_ ssymv__ -#define sdot_ sdot__ -#define saxpy_ saxpy__ -#define ssyr2_ ssyr2__ -#define srot_ srot__ -#define sgemv_ sgemv__ -#define strsv_ strsv__ -#define sgemm_ sgemm__ -#define strsm_ strsm__ - -#define dasum_ dasum__ -#define idamax_ idamax__ -#define dcopy_ dcopy__ -#define dscal_ dscal__ -#define dger_ dger__ -#define dnrm2_ dnrm2__ -#define dsymv_ dsymv__ -#define ddot_ ddot__ -#define daxpy_ daxpy__ -#define dsyr2_ dsyr2__ -#define drot_ drot__ -#define dgemv_ dgemv__ -#define dtrsv_ dtrsv__ -#define dgemm_ dgemm__ -#define dtrsm_ dtrsm__ - -#define scasum_ scasum__ -#define icamax_ icamax__ -#define ccopy_ ccopy__ -#define cscal_ cscal__ -#define scnrm2_ scnrm2__ -#define caxpy_ caxpy__ -#define cgemv_ cgemv__ -#define ctrsv_ ctrsv__ -#define cgemm_ cgemm__ -#define ctrsm_ ctrsm__ -#define cgerc_ cgerc__ -#define chemv_ chemv__ -#define cher2_ cher2__ - -#define dzasum_ dzasum__ -#define izamax_ izamax__ -#define zcopy_ zcopy__ -#define zscal_ zscal__ -#define dznrm2_ dznrm2__ -#define zaxpy_ zaxpy__ -#define zgemv_ zgemv__ -#define ztrsv_ ztrsv__ -#define zgemm_ zgemm__ -#define ztrsm_ ztrsm__ -#define zgerc_ zgerc__ -#define zhemv_ zhemv__ -#define zher2_ zher2__ - -#define c_bridge_dgssv_ c_bridge_dgssv__ -#define c_fortran_dgssv_ c_fortran_dgssv__ -#endif - -#if (F77_CALL_C == UPCASE) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void DGEMM(...) - */ -#define sasum_ SASUM -#define isamax_ ISAMAX -#define scopy_ SCOPY -#define sscal_ SSCAL -#define sger_ SGER -#define snrm2_ SNRM2 -#define ssymv_ SSYMV -#define sdot_ SDOT -#define saxpy_ SAXPY -#define ssyr2_ SSYR2 -#define srot_ SROT -#define sgemv_ SGEMV -#define strsv_ STRSV -#define sgemm_ SGEMM -#define strsm_ STRSM - -#define dasum_ SASUM -#define idamax_ ISAMAX -#define dcopy_ SCOPY -#define dscal_ SSCAL -#define dger_ SGER -#define dnrm2_ SNRM2 -#define dsymv_ SSYMV -#define ddot_ SDOT -#define daxpy_ SAXPY -#define dsyr2_ SSYR2 -#define drot_ SROT -#define dgemv_ SGEMV -#define dtrsv_ STRSV -#define dgemm_ SGEMM -#define dtrsm_ STRSM - -#define scasum_ SCASUM -#define icamax_ ICAMAX -#define ccopy_ CCOPY -#define cscal_ CSCAL -#define scnrm2_ SCNRM2 -#define caxpy_ CAXPY -#define cgemv_ CGEMV -#define ctrsv_ CTRSV -#define cgemm_ CGEMM -#define ctrsm_ CTRSM -#define cgerc_ CGERC -#define chemv_ CHEMV -#define cher2_ CHER2 - -#define dzasum_ SCASUM -#define izamax_ ICAMAX -#define zcopy_ CCOPY -#define zscal_ CSCAL -#define dznrm2_ SCNRM2 -#define zaxpy_ CAXPY -#define zgemv_ CGEMV -#define ztrsv_ CTRSV -#define zgemm_ CGEMM -#define ztrsm_ CTRSM -#define zgerc_ CGERC -#define zhemv_ CHEMV -#define zher2_ CHER2 - -#define c_bridge_dgssv_ C_BRIDGE_DGSSV -#define c_fortran_dgssv_ C_FORTRAN_DGSSV -#endif - -#if (F77_CALL_C == NOCHANGE) -/* - * These defines set up the naming scheme required to have a fortran 77 - * routine call a C routine - * for following Fortran to C interface: - * FORTRAN CALL C DECLARATION - * call dgemm(...) void dgemm(...) - */ -#define sasum_ sasum -#define isamax_ isamax -#define scopy_ scopy -#define sscal_ sscal -#define sger_ sger -#define snrm2_ snrm2 -#define ssymv_ ssymv -#define sdot_ sdot -#define saxpy_ saxpy -#define ssyr2_ ssyr2 -#define srot_ srot -#define sgemv_ sgemv -#define strsv_ strsv -#define sgemm_ sgemm -#define strsm_ strsm - -#define dasum_ dasum -#define idamax_ idamax -#define dcopy_ dcopy -#define dscal_ dscal -#define dger_ dger -#define dnrm2_ dnrm2 -#define dsymv_ dsymv -#define ddot_ ddot -#define daxpy_ daxpy -#define dsyr2_ dsyr2 -#define drot_ drot -#define dgemv_ dgemv -#define dtrsv_ dtrsv -#define dgemm_ dgemm -#define dtrsm_ dtrsm - -#define scasum_ scasum -#define icamax_ icamax -#define ccopy_ ccopy -#define cscal_ cscal -#define scnrm2_ scnrm2 -#define caxpy_ caxpy -#define cgemv_ cgemv -#define ctrsv_ ctrsv -#define cgemm_ cgemm -#define ctrsm_ ctrsm -#define cgerc_ cgerc -#define chemv_ chemv -#define cher2_ cher2 - -#define dzasum_ dzasum -#define izamax_ izamax -#define zcopy_ zcopy -#define zscal_ zscal -#define dznrm2_ dznrm2 -#define zaxpy_ zaxpy -#define zgemv_ zgemv -#define ztrsv_ ztrsv -#define zgemm_ zgemm -#define ztrsm_ ztrsm -#define zgerc_ zgerc -#define zhemv_ zhemv -#define zher2_ zher2 - -#define c_bridge_dgssv_ c_bridge_dgssv -#define c_fortran_dgssv_ c_fortran_dgssv -#endif - -#endif /* __SUPERLU_CNAMES */ diff --git a/intern/opennl/superlu/get_perm_c.c b/intern/opennl/superlu/get_perm_c.c deleted file mode 100644 index 59889645988..00000000000 --- a/intern/opennl/superlu/get_perm_c.c +++ /dev/null @@ -1,466 +0,0 @@ -/** \file opennl/superlu/get_perm_c.c - * \ingroup opennl - */ -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ - -#include "ssp_defs.h" -#include "colamd.h" - -extern int genmmd_(int *, int *, int *, int *, int *, int *, int *, - int *, int *, int *, int *, int *); - -static void -get_colamd( - const int m, /* number of rows in matrix A. */ - const int n, /* number of columns in matrix A. */ - const int nnz,/* number of nonzeros in matrix A. */ - int *colptr, /* column pointer of size n+1 for matrix A. */ - int *rowind, /* row indices of size nz for matrix A. */ - int *perm_c /* out - the column permutation vector. */ - ) -{ - int Alen, *A, i, info, *p; - double *knobs; - int stats[COLAMD_STATS]; - - Alen = colamd_recommended(nnz, m, n); - - if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) ) - ABORT("Malloc fails for knobs"); - colamd_set_defaults(knobs); - - if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) ) - ABORT("Malloc fails for A[]"); - if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) ) - ABORT("Malloc fails for p[]"); - for (i = 0; i <= n; ++i) p[i] = colptr[i]; - for (i = 0; i < nnz; ++i) A[i] = rowind[i]; - info = colamd(m, n, Alen, A, p, knobs, stats); - if ( info == FALSE ) ABORT("COLAMD failed"); - - for (i = 0; i < n; ++i) perm_c[p[i]] = i; - - SUPERLU_FREE(knobs); - SUPERLU_FREE(A); - SUPERLU_FREE(p); -} - -static void -getata( - const int m, /* number of rows in matrix A. */ - const int n, /* number of columns in matrix A. */ - const int nz, /* number of nonzeros in matrix A */ - int *colptr, /* column pointer of size n+1 for matrix A. */ - int *rowind, /* row indices of size nz for matrix A. */ - int *atanz, /* out - on exit, returns the actual number of - nonzeros in matrix A'*A. */ - int **ata_colptr, /* out - size n+1 */ - int **ata_rowind /* out - size *atanz */ - ) -/* - * Purpose - * ======= - * - * Form the structure of A'*A. A is an m-by-n matrix in column oriented - * format represented by (colptr, rowind). The output A'*A is in column - * oriented format (symmetrically, also row oriented), represented by - * (ata_colptr, ata_rowind). - * - * This routine is modified from GETATA routine by Tim Davis. - * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2, - * i.e., the sum of the square of the row counts. - * - * Questions - * ========= - * o Do I need to withhold the *dense* rows? - * o How do I know the number of nonzeros in A'*A? - * - */ -{ - register int i, j, k, col, num_nz, ti, trow; - int *marker, *b_colptr, *b_rowind; - int *t_colptr, *t_rowind; /* a column oriented form of T = A' */ - - if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) ) - ABORT("SUPERLU_MALLOC fails for marker[]"); - if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) ) - ABORT("SUPERLU_MALLOC t_colptr[]"); - if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) ) - ABORT("SUPERLU_MALLOC fails for t_rowind[]"); - - - /* Get counts of each column of T, and set up column pointers */ - for (i = 0; i < m; ++i) marker[i] = 0; - for (j = 0; j < n; ++j) { - for (i = colptr[j]; i < colptr[j+1]; ++i) - ++marker[rowind[i]]; - } - t_colptr[0] = 0; - for (i = 0; i < m; ++i) { - t_colptr[i+1] = t_colptr[i] + marker[i]; - marker[i] = t_colptr[i]; - } - - /* Transpose the matrix from A to T */ - for (j = 0; j < n; ++j) - for (i = colptr[j]; i < colptr[j+1]; ++i) { - col = rowind[i]; - t_rowind[marker[col]] = j; - ++marker[col]; - } - - - /* ---------------------------------------------------------------- - compute B = T * A, where column j of B is: - - Struct (B_*j) = UNION ( Struct (T_*k) ) - A_kj != 0 - - do not include the diagonal entry - - ( Partition A as: A = (A_*1, ..., A_*n) - Then B = T * A = (T * A_*1, ..., T * A_*n), where - T * A_*j = (T_*1, ..., T_*m) * A_*j. ) - ---------------------------------------------------------------- */ - - /* Zero the diagonal flag */ - for (i = 0; i < n; ++i) marker[i] = -1; - - /* First pass determines number of nonzeros in B */ - num_nz = 0; - for (j = 0; j < n; ++j) { - /* Flag the diagonal so it's not included in the B matrix */ - marker[j] = j; - - for (i = colptr[j]; i < colptr[j+1]; ++i) { - /* A_kj is nonzero, add pattern of column T_*k to B_*j */ - k = rowind[i]; - for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) { - trow = t_rowind[ti]; - if ( marker[trow] != j ) { - marker[trow] = j; - num_nz++; - } - } - } - } - *atanz = num_nz; - - /* Allocate storage for A'*A */ - if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails for ata_colptr[]"); - if ( *atanz ) { - if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails for ata_rowind[]"); - } - b_colptr = *ata_colptr; /* aliasing */ - b_rowind = *ata_rowind; - - /* Zero the diagonal flag */ - for (i = 0; i < n; ++i) marker[i] = -1; - - /* Compute each column of B, one at a time */ - num_nz = 0; - for (j = 0; j < n; ++j) { - b_colptr[j] = num_nz; - - /* Flag the diagonal so it's not included in the B matrix */ - marker[j] = j; - - if ( *atanz ) { - for (i = colptr[j]; i < colptr[j+1]; ++i) { - /* A_kj is nonzero, add pattern of column T_*k to B_*j */ - k = rowind[i]; - for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) { - trow = t_rowind[ti]; - if ( marker[trow] != j ) { - marker[trow] = j; - b_rowind[num_nz++] = trow; - } - } - } - } - } - b_colptr[n] = num_nz; - - SUPERLU_FREE(marker); - SUPERLU_FREE(t_colptr); - SUPERLU_FREE(t_rowind); -} - - -static void -at_plus_a( - const int n, /* number of columns in matrix A. */ - const int nz, /* number of nonzeros in matrix A */ - int *colptr, /* column pointer of size n+1 for matrix A. */ - int *rowind, /* row indices of size nz for matrix A. */ - int *bnz, /* out - on exit, returns the actual number of - nonzeros in matrix A'*A. */ - int **b_colptr, /* out - size n+1 */ - int **b_rowind /* out - size *bnz */ - ) -{ -/* - * Purpose - * ======= - * - * Form the structure of A'+A. A is an n-by-n matrix in column oriented - * format represented by (colptr, rowind). The output A'+A is in column - * oriented format (symmetrically, also row oriented), represented by - * (b_colptr, b_rowind). - * - */ - register int i, j, k, col, num_nz; - int *t_colptr, *t_rowind; /* a column oriented form of T = A' */ - int *marker; - - if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails for marker[]"); - if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails for t_colptr[]"); - if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails t_rowind[]"); - - - /* Get counts of each column of T, and set up column pointers */ - for (i = 0; i < n; ++i) marker[i] = 0; - for (j = 0; j < n; ++j) { - for (i = colptr[j]; i < colptr[j+1]; ++i) - ++marker[rowind[i]]; - } - t_colptr[0] = 0; - for (i = 0; i < n; ++i) { - t_colptr[i+1] = t_colptr[i] + marker[i]; - marker[i] = t_colptr[i]; - } - - /* Transpose the matrix from A to T */ - for (j = 0; j < n; ++j) - for (i = colptr[j]; i < colptr[j+1]; ++i) { - col = rowind[i]; - t_rowind[marker[col]] = j; - ++marker[col]; - } - - - /* ---------------------------------------------------------------- - compute B = A + T, where column j of B is: - - Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k) - - do not include the diagonal entry - ---------------------------------------------------------------- */ - - /* Zero the diagonal flag */ - for (i = 0; i < n; ++i) marker[i] = -1; - - /* First pass determines number of nonzeros in B */ - num_nz = 0; - for (j = 0; j < n; ++j) { - /* Flag the diagonal so it's not included in the B matrix */ - marker[j] = j; - - /* Add pattern of column A_*k to B_*j */ - for (i = colptr[j]; i < colptr[j+1]; ++i) { - k = rowind[i]; - if ( marker[k] != j ) { - marker[k] = j; - ++num_nz; - } - } - - /* Add pattern of column T_*k to B_*j */ - for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { - k = t_rowind[i]; - if ( marker[k] != j ) { - marker[k] = j; - ++num_nz; - } - } - } - *bnz = num_nz; - - /* Allocate storage for A+A' */ - if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails for b_colptr[]"); - if ( *bnz) { - if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) ) - ABORT("SUPERLU_MALLOC fails for b_rowind[]"); - } - - /* Zero the diagonal flag */ - for (i = 0; i < n; ++i) marker[i] = -1; - - /* Compute each column of B, one at a time */ - num_nz = 0; - for (j = 0; j < n; ++j) { - (*b_colptr)[j] = num_nz; - - /* Flag the diagonal so it's not included in the B matrix */ - marker[j] = j; - - /* Add pattern of column A_*k to B_*j */ - if (*bnz) { - for (i = colptr[j]; i < colptr[j+1]; ++i) { - k = rowind[i]; - if ( marker[k] != j ) { - marker[k] = j; - (*b_rowind)[num_nz++] = k; - } - } - - /* Add pattern of column T_*k to B_*j */ - for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) { - k = t_rowind[i]; - if ( marker[k] != j ) { - marker[k] = j; - (*b_rowind)[num_nz++] = k; - } - } - } - } - (*b_colptr)[n] = num_nz; - - SUPERLU_FREE(marker); - SUPERLU_FREE(t_colptr); - SUPERLU_FREE(t_rowind); -} - -void -get_perm_c(int ispec, SuperMatrix *A, int *perm_c) -/* - * Purpose - * ======= - * - * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple - * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'. - * or using approximate minimum degree column ordering by Davis et. al. - * The LU factorization of A*Pc tends to have less fill than the LU - * factorization of A. - * - * Arguments - * ========= - * - * ispec (input) int - * Specifies the type of column ordering to reduce fill: - * = 1: minimum degree on the structure of A^T * A - * = 2: minimum degree on the structure of A^T + A - * = 3: approximate minimum degree for unsymmetric matrices - * If ispec == 0, the natural ordering (i.e., Pc = I) is returned. - * - * A (input) SuperMatrix* - * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number - * of the linear equations is A->nrow. Currently, the type of A - * can be: Stype = NC; Dtype = _D; Mtype = GE. In the future, - * more general A can be handled. - * - * perm_c (output) int* - * Column permutation vector of size A->ncol, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * - */ -{ - NCformat *Astore = A->Store; - int m, n, bnz, *b_colptr, i; - int delta, maxint, nofsub, *invp; - int *b_rowind, *dhead, *qsize, *llist, *marker; - /* double t, SuperLU_timer_(); */ - - /* make gcc happy */ - b_rowind=NULL; - b_colptr=NULL; - - m = A->nrow; - n = A->ncol; - - /* t = SuperLU_timer_(); */ - switch ( ispec ) { - case 0: /* Natural ordering */ - for (i = 0; i < n; ++i) perm_c[i] = i; -#if ( PRNTlevel>=1 ) - printf("Use natural column ordering.\n"); -#endif - return; - case 1: /* Minimum degree ordering on A'*A */ - getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind, - &bnz, &b_colptr, &b_rowind); -#if ( PRNTlevel>=1 ) - printf("Use minimum degree ordering on A'*A.\n"); -#endif - /*t = SuperLU_timer_() - t; - printf("Form A'*A time = %8.3f\n", t);*/ - break; - case 2: /* Minimum degree ordering on A'+A */ - if ( m != n ) ABORT("Matrix is not square"); - at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind, - &bnz, &b_colptr, &b_rowind); -#if ( PRNTlevel>=1 ) - printf("Use minimum degree ordering on A'+A.\n"); -#endif - /*t = SuperLU_timer_() - t; - printf("Form A'+A time = %8.3f\n", t);*/ - break; - case 3: /* Approximate minimum degree column ordering. */ - get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind, - perm_c); -#if ( PRNTlevel>=1 ) - printf(".. Use approximate minimum degree column ordering.\n"); -#endif - return; - default: - ABORT("Invalid ISPEC"); - return; - } - - if ( bnz != 0 ) { - /* t = SuperLU_timer_(); */ - - /* Initialize and allocate storage for GENMMD. */ - delta = 1; /* DELTA is a parameter to allow the choice of nodes - whose degree <= min-degree + DELTA. */ - maxint = 2147483647; /* 2**31 - 1 */ - invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int)); - if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp."); - dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int)); - if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead."); - qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int)); - if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize."); - llist = (int *) SUPERLU_MALLOC(n*sizeof(int)); - if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist."); - marker = (int *) SUPERLU_MALLOC(n*sizeof(int)); - if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker."); - - /* Transform adjacency list into 1-based indexing required by GENMMD.*/ - for (i = 0; i <= n; ++i) ++b_colptr[i]; - for (i = 0; i < bnz; ++i) ++b_rowind[i]; - - genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, - qsize, llist, marker, &maxint, &nofsub); - - /* Transform perm_c into 0-based indexing. */ - for (i = 0; i < n; ++i) --perm_c[i]; - - SUPERLU_FREE(b_colptr); - SUPERLU_FREE(b_rowind); - SUPERLU_FREE(invp); - SUPERLU_FREE(dhead); - SUPERLU_FREE(qsize); - SUPERLU_FREE(llist); - SUPERLU_FREE(marker); - - /* t = SuperLU_timer_() - t; - printf("call GENMMD time = %8.3f\n", t);*/ - - } else { /* Empty adjacency structure */ - for (i = 0; i < n; ++i) perm_c[i] = i; - } - -} diff --git a/intern/opennl/superlu/heap_relax_snode.c b/intern/opennl/superlu/heap_relax_snode.c deleted file mode 100644 index cd88179bbb7..00000000000 --- a/intern/opennl/superlu/heap_relax_snode.c +++ /dev/null @@ -1,119 +0,0 @@ -/** \file opennl/superlu/heap_relax_snode.c - * \ingroup opennl - */ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" - -void -heap_relax_snode ( - const int n, - int *et, /* column elimination tree */ - const int relax_columns, /* max no of columns allowed in a - relaxed snode */ - int *descendants, /* no of descendants of each node - in the etree */ - int *relax_end /* last column in a supernode */ - ) -{ -/* - * Purpose - * ======= - * relax_snode() - Identify the initial relaxed supernodes, assuming that - * the matrix has been reordered according to the postorder of the etree. - * - */ - register int i, j, k, l, parent; - register int snode_start; /* beginning of a snode */ - int *et_save, *post, *inv_post, *iwork; - int nsuper_et = 0, nsuper_et_post = 0; - - /* The etree may not be postordered, but is heap ordered. */ - - iwork = (int*) intMalloc(3*n+2); - if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]"); - inv_post = iwork + n+1; - et_save = inv_post + n+1; - - /* Post order etree */ - post = (int *) TreePostorder(n, et); - for (i = 0; i < n+1; ++i) inv_post[post[i]] = i; - - /* Renumber etree in postorder */ - for (i = 0; i < n; ++i) { - iwork[post[i]] = post[et[i]]; - et_save[i] = et[i]; /* Save the original etree */ - } - for (i = 0; i < n; ++i) et[i] = iwork[i]; - - /* Compute the number of descendants of each node in the etree */ - ifill (relax_end, n, EMPTY); - for (j = 0; j < n; j++) descendants[j] = 0; - for (j = 0; j < n; j++) { - parent = et[j]; - if ( parent != n ) /* not the dummy root */ - descendants[parent] += descendants[j] + 1; - } - - /* Identify the relaxed supernodes by postorder traversal of the etree. */ - for (j = 0; j < n; ) { - parent = et[j]; - snode_start = j; - while ( parent != n && descendants[parent] < relax_columns ) { - j = parent; - parent = et[j]; - } - /* Found a supernode in postordered etree; j is the last column. */ - ++nsuper_et_post; - k = n; - for (i = snode_start; i <= j; ++i) - k = SUPERLU_MIN(k, inv_post[i]); - l = inv_post[j]; - if ( (l - k) == (j - snode_start) ) { - /* It's also a supernode in the original etree */ - relax_end[k] = l; /* Last column is recorded */ - ++nsuper_et; - } else { - for (i = snode_start; i <= j; ++i) { - l = inv_post[i]; - if ( descendants[i] == 0 ) relax_end[l] = l; - } - } - j++; - /* Search for a new leaf */ - while ( j < n && descendants[j] != 0 ) j++; - } - -#if ( PRNTlevel>=1 ) - printf(".. heap_snode_relax:\n" - "\tNo of relaxed snodes in postordered etree:\t%d\n" - "\tNo of relaxed snodes in original etree:\t%d\n", - nsuper_et_post, nsuper_et); -#endif - - /* Recover the original etree */ - for (i = 0; i < n; ++i) et[i] = et_save[i]; - - SUPERLU_FREE(post); - SUPERLU_FREE(iwork); -} - - diff --git a/intern/opennl/superlu/lsame.c b/intern/opennl/superlu/lsame.c deleted file mode 100644 index 2f2337d5001..00000000000 --- a/intern/opennl/superlu/lsame.c +++ /dev/null @@ -1,76 +0,0 @@ -/** \file opennl/superlu/lsame.c - * \ingroup opennl - */ -int lsame_(char *, char *); - - -int lsame_(char *ca, char *cb) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - Purpose - ======= - - LSAME returns .TRUE. if CA is the same letter as CB regardless of case. - - Arguments - ========= - - CA (input) CHARACTER*1 - CB (input) CHARACTER*1 - CA and CB specify the single characters to be compared. - - ===================================================================== -*/ - - /* System generated locals */ - int ret_val; - - /* Local variables */ - int inta, intb, zcode; - - ret_val = *(unsigned char *)ca == *(unsigned char *)cb; - if (ret_val) { - return ret_val; - } - - /* Now test for equivalence if both characters are alphabetic. */ - - zcode = 'Z'; - - /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime - machines, on which ICHAR returns a value with bit 8 set. - ICHAR('A') on Prime machines returns 193 which is the same as - ICHAR('A') on an EBCDIC machine. */ - - inta = *(unsigned char *)ca; - intb = *(unsigned char *)cb; - - if (zcode == 90 || zcode == 122) { - /* ASCII is assumed - ZCODE is the ASCII code of either lower or - upper case 'Z'. */ - if (inta >= 97 && inta <= 122) inta += -32; - if (intb >= 97 && intb <= 122) intb += -32; - - } else if (zcode == 233 || zcode == 169) { - /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or - upper case 'Z'. */ - if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta - >= 162 && inta <= 169)) - inta += 64; - if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb - >= 162 && intb <= 169)) - intb += 64; - } else if (zcode == 218 || zcode == 250) { - /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code - plus 128 of either lower or upper case 'Z'. */ - if (inta >= 225 && inta <= 250) inta += -32; - if (intb >= 225 && intb <= 250) intb += -32; - } - ret_val = inta == intb; - return ret_val; - -} /* lsame_ */ diff --git a/intern/opennl/superlu/memory.c b/intern/opennl/superlu/memory.c deleted file mode 100644 index a239f685424..00000000000 --- a/intern/opennl/superlu/memory.c +++ /dev/null @@ -1,214 +0,0 @@ -/** \file opennl/superlu/memory.c - * \ingroup opennl - */ -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/** Precision-independent memory-related routines. - (Shared by [sdcz]memory.c) **/ - -#include "ssp_defs.h" - -/* prototypes --------------------------------- */ -void copy_mem_int(int, void *, void *); -void user_bcopy(char *, char *, int); - - -#if ( DEBUGlevel>=1 ) /* Debug malloc/free. */ -int superlu_malloc_total = 0; - -#define PAD_FACTOR 2 -#define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */ - -void *superlu_malloc(size_t size) -{ - char *buf; - - buf = (char *) malloc(size + DWORD); - if ( !buf ) { - printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n", - superlu_malloc_total*1e-6, size); - ABORT("superlu_malloc: out of memory"); - } - - ((int_t *) buf)[0] = size; -#if 0 - superlu_malloc_total += size + DWORD; -#else - superlu_malloc_total += size; -#endif - return (void *) (buf + DWORD); -} - -void superlu_free(void *addr) -{ - char *p = ((char *) addr) - DWORD; - - if ( !addr ) - ABORT("superlu_free: tried to free NULL pointer"); - - if ( !p ) - ABORT("superlu_free: tried to free NULL+DWORD pointer"); - - { - int_t n = ((int_t *) p)[0]; - - if ( !n ) - ABORT("superlu_free: tried to free a freed pointer"); - *((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */ -#if 0 - superlu_malloc_total -= (n + DWORD); -#else - superlu_malloc_total -= n; -#endif - - if ( superlu_malloc_total < 0 ) - ABORT("superlu_malloc_total went negative!"); - - /*free (addr);*/ - free (p); - } - -} - -#else /* production mode */ - -void *superlu_malloc(size_t size) -{ - void *buf; - buf = (void *) malloc(size); - return (buf); -} - -void superlu_free(void *addr) -{ - free (addr); -} - -#endif - - -/* - * Set up pointers for integer working arrays. - */ -void -SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep, - int **parent, int **xplore, int **repfnz, int **panel_lsub, - int **xprune, int **marker) -{ - *segrep = iworkptr; - *parent = iworkptr + m; - *xplore = *parent + m; - *repfnz = *xplore + m; - *panel_lsub = *repfnz + panel_size * m; - *xprune = *panel_lsub + panel_size * m; - *marker = *xprune + n; - ifill (*repfnz, m * panel_size, EMPTY); - ifill (*panel_lsub, m * panel_size, EMPTY); -} - - -void -copy_mem_int(int howmany, void *old, void *new) -{ - register int i; - int *iold = old; - int *inew = new; - for (i = 0; i < howmany; i++) inew[i] = iold[i]; -} - - -void -user_bcopy(char *src, char *dest, int bytes) -{ - char *s_ptr, *d_ptr; - - s_ptr = src + bytes - 1; - d_ptr = dest + bytes - 1; - for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr; -} - - - -int *intMalloc(int n) -{ - int *buf; - buf = (int *) SUPERLU_MALLOC(n * sizeof(int)); - if ( !buf ) { - ABORT("SUPERLU_MALLOC fails for buf in intMalloc()"); - } - return (buf); -} - -int *intCalloc(int n) -{ - int *buf; - register int i; - buf = (int *) SUPERLU_MALLOC(n * sizeof(int)); - if ( !buf ) { - ABORT("SUPERLU_MALLOC fails for buf in intCalloc()"); - } - for (i = 0; i < n; ++i) buf[i] = 0; - return (buf); -} - - - -#if 0 -check_expanders() -{ - int p; - printf("Check expanders:\n"); - for (p = 0; p < NO_MEMTYPE; p++) { - printf("type %d, size %d, mem %d\n", - p, expanders[p].size, (int)expanders[p].mem); - } - - return 0; -} - - -StackInfo() -{ - printf("Stack: size %d, used %d, top1 %d, top2 %d\n", - stack.size, stack.used, stack.top1, stack.top2); - return 0; -} - - - -PrintStack(char *msg, GlobalLU_t *Glu) -{ - int i; - int *xlsub, *lsub, *xusub, *usub; - - xlsub = Glu->xlsub; - lsub = Glu->lsub; - xusub = Glu->xusub; - usub = Glu->usub; - - printf("%s\n", msg); - -/* printf("\nUCOL: "); - for (i = 0; i < xusub[ndim]; ++i) - printf("%f ", ucol[i]); - - printf("\nLSUB: "); - for (i = 0; i < xlsub[ndim]; ++i) - printf("%d ", lsub[i]); - - printf("\nUSUB: "); - for (i = 0; i < xusub[ndim]; ++i) - printf("%d ", usub[i]); - - printf("\n");*/ - return 0; -} -#endif - - - diff --git a/intern/opennl/superlu/mmd.c b/intern/opennl/superlu/mmd.c deleted file mode 100644 index a9c20fd9e81..00000000000 --- a/intern/opennl/superlu/mmd.c +++ /dev/null @@ -1,1028 +0,0 @@ -/** \file opennl/superlu/mmd.c - * \ingroup opennl - */ - -typedef int shortint; - - -/* prototypes -------------------- */ -int genmmd_(int *, int *, int *, int *, int *, int *, int *, - int *, int *, int *, int *, int *); -int mmdint_(int *, int *, shortint *, shortint *, shortint *, shortint *, shortint *, - shortint *, shortint *); -int mmdelm_(int *, int *, shortint *, shortint *, shortint *, shortint *, shortint *, - shortint *, shortint *, int *, int *); -int mmdupd_(int *, int *, int *, shortint *, int *, int *, shortint *, - shortint *, shortint *, shortint *, shortint *, shortint *, int *, int *); -int mmdnum_(int *, shortint *, shortint *, shortint *); - - -/* *************************************************************** */ -/* *************************************************************** */ -/* **** GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE **** */ -/* *************************************************************** */ -/* *************************************************************** */ - -/* AUTHOR - JOSEPH W.H. LIU */ -/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ - -/* PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE */ -/* ALGORITHM. IT MAKES USE OF THE IMPLICIT REPRESENTATION */ -/* OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE */ -/* NOTION OF INDISTINGUISHABLE NODES. IT ALSO IMPLEMENTS */ -/* THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM */ -/* EXTERNAL DEGREE. */ -/* --------------------------------------------- */ -/* CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE */ -/* DESTROYED. */ -/* --------------------------------------------- */ - -/* INPUT PARAMETERS - */ -/* NEQNS - NUMBER OF EQUATIONS. */ -/* (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE. */ -/* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */ -/* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER */ -/* (ANY SMALLER ESTIMATE WILL DO) FOR MARKING */ -/* NODES. */ - -/* OUTPUT PARAMETERS - */ -/* PERM - THE MINIMUM DEGREE ORDERING. */ -/* INVP - THE INVERSE OF PERM. */ -/* NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO */ -/* SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME. */ - -/* WORKING PARAMETERS - */ -/* DHEAD - VECTOR FOR HEAD OF DEGREE LISTS. */ -/* INVP - USED TEMPORARILY FOR DEGREE FORWARD LINK. */ -/* PERM - USED TEMPORARILY FOR DEGREE BACKWARD LINK. */ -/* QSIZE - VECTOR FOR SIZE OF SUPERNODES. */ -/* LLIST - VECTOR FOR TEMPORARY LINKED LISTS. */ -/* MARKER - A TEMPORARY MARKER VECTOR. */ - -/* PROGRAM SUBROUTINES - */ -/* MMDELM, MMDINT, MMDNUM, MMDUPD. */ - -/* *************************************************************** */ - -/* Subroutine */ int genmmd_(int *neqns, int *xadj, shortint *adjncy, - shortint *invp, shortint *perm, int *delta, shortint *dhead, - shortint *qsize, shortint *llist, shortint *marker, int *maxint, - int *nofsub) -{ - /* System generated locals */ - int i__1; - - /* Local variables */ - static int mdeg, ehead, i, mdlmt, mdnode; - extern /* Subroutine */ int mmdelm_(int *, int *, shortint *, - shortint *, shortint *, shortint *, shortint *, shortint *, - shortint *, int *, int *), mmdupd_(int *, int *, - int *, shortint *, int *, int *, shortint *, shortint - *, shortint *, shortint *, shortint *, shortint *, int *, - int *), mmdint_(int *, int *, shortint *, shortint *, - shortint *, shortint *, shortint *, shortint *, shortint *), - mmdnum_(int *, shortint *, shortint *, shortint *); - static int nextmd, tag, num; - - -/* *************************************************************** */ - - -/* *************************************************************** */ - - /* Parameter adjustments */ - --marker; - --llist; - --qsize; - --dhead; - --perm; - --invp; - --adjncy; - --xadj; - - /* Function Body */ - if (*neqns <= 0) { - return 0; - } - -/* ------------------------------------------------ */ -/* INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM. */ -/* ------------------------------------------------ */ - *nofsub = 0; - mmdint_(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], & - qsize[1], &llist[1], &marker[1]); - -/* ---------------------------------------------- */ -/* NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1. */ -/* ---------------------------------------------- */ - num = 1; - -/* ----------------------------- */ -/* ELIMINATE ALL ISOLATED NODES. */ -/* ----------------------------- */ - nextmd = dhead[1]; -L100: - if (nextmd <= 0) { - goto L200; - } - mdnode = nextmd; - nextmd = invp[mdnode]; - marker[mdnode] = *maxint; - invp[mdnode] = -num; - ++num; - goto L100; - -L200: -/* ---------------------------------------- */ -/* SEARCH FOR NODE OF THE MINIMUM DEGREE. */ -/* MDEG IS THE CURRENT MINIMUM DEGREE; */ -/* TAG IS USED TO FACILITATE MARKING NODES. */ -/* ---------------------------------------- */ - if (num > *neqns) { - goto L1000; - } - tag = 1; - dhead[1] = 0; - mdeg = 2; -L300: - if (dhead[mdeg] > 0) { - goto L400; - } - ++mdeg; - goto L300; -L400: -/* ------------------------------------------------- */ -/* USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS */ -/* WHEN A DEGREE UPDATE IS TO BE PERFORMED. */ -/* ------------------------------------------------- */ - mdlmt = mdeg + *delta; - ehead = 0; - -L500: - mdnode = dhead[mdeg]; - if (mdnode > 0) { - goto L600; - } - ++mdeg; - if (mdeg > mdlmt) { - goto L900; - } - goto L500; -L600: -/* ---------------------------------------- */ -/* REMOVE MDNODE FROM THE DEGREE STRUCTURE. */ -/* ---------------------------------------- */ - nextmd = invp[mdnode]; - dhead[mdeg] = nextmd; - if (nextmd > 0) { - perm[nextmd] = -mdeg; - } - invp[mdnode] = -num; - *nofsub = *nofsub + mdeg + qsize[mdnode] - 2; - if (num + qsize[mdnode] > *neqns) { - goto L1000; - } -/* ---------------------------------------------- */ -/* ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH */ -/* TRANSFORMATION. RESET TAG VALUE IF NECESSARY. */ -/* ---------------------------------------------- */ - ++tag; - if (tag < *maxint) { - goto L800; - } - tag = 1; - i__1 = *neqns; - for (i = 1; i <= i__1; ++i) { - if (marker[i] < *maxint) { - marker[i] = 0; - } -/* L700: */ - } -L800: - mmdelm_(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], & - qsize[1], &llist[1], &marker[1], maxint, &tag); - num += qsize[mdnode]; - llist[mdnode] = ehead; - ehead = mdnode; - if (*delta >= 0) { - goto L500; - } -L900: -/* ------------------------------------------- */ -/* UPDATE DEGREES OF THE NODES INVOLVED IN THE */ -/* MINIMUM DEGREE NODES ELIMINATION. */ -/* ------------------------------------------- */ - if (num > *neqns) { - goto L1000; - } - mmdupd_(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], & - invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag) - ; - goto L300; - -L1000: - mmdnum_(neqns, &perm[1], &invp[1], &qsize[1]); - return 0; - -} /* genmmd_ */ - -/* *************************************************************** */ -/* *************************************************************** */ -/* *** MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION *** */ -/* *************************************************************** */ -/* *************************************************************** */ - -/* AUTHOR - JOSEPH W.H. LIU */ -/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ - -/* PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE */ -/* MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE */ -/* ALGORITHM. */ - -/* INPUT PARAMETERS - */ -/* NEQNS - NUMBER OF EQUATIONS. */ -/* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */ - -/* OUTPUT PARAMETERS - */ -/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */ -/* QSIZE - SIZE OF SUPERNODE (INITIALIZED TO ONE). */ -/* LLIST - LINKED LIST. */ -/* MARKER - MARKER VECTOR. */ - -/* *************************************************************** */ - -/* Subroutine */ int mmdint_(int *neqns, int *xadj, shortint *adjncy, - shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize, - shortint *llist, shortint *marker) -{ - /* System generated locals */ - int i__1; - - /* Local variables */ - static int ndeg, node, fnode; - - -/* *************************************************************** */ - - -/* *************************************************************** */ - - /* Parameter adjustments */ - --marker; - --llist; - --qsize; - --dbakw; - --dforw; - --dhead; - --adjncy; - --xadj; - - /* Function Body */ - i__1 = *neqns; - for (node = 1; node <= i__1; ++node) { - dhead[node] = 0; - qsize[node] = 1; - marker[node] = 0; - llist[node] = 0; -/* L100: */ - } -/* ------------------------------------------ */ -/* INITIALIZE THE DEGREE DOUBLY LINKED LISTS. */ -/* ------------------------------------------ */ - i__1 = *neqns; - for (node = 1; node <= i__1; ++node) { - ndeg = xadj[node + 1] - xadj[node] + 1; - fnode = dhead[ndeg]; - dforw[node] = fnode; - dhead[ndeg] = node; - if (fnode > 0) { - dbakw[fnode] = node; - } - dbakw[node] = -ndeg; -/* L200: */ - } - return 0; - -} /* mmdint_ */ - -/* *************************************************************** */ -/* *************************************************************** */ -/* ** MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION *** */ -/* *************************************************************** */ -/* *************************************************************** */ - -/* AUTHOR - JOSEPH W.H. LIU */ -/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ - -/* PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF */ -/* MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH */ -/* IS STORED IN THE QUOTIENT GRAPH FORMAT. IT ALSO */ -/* TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE */ -/* ELIMINATION GRAPH. */ - -/* INPUT PARAMETERS - */ -/* MDNODE - NODE OF MINIMUM DEGREE. */ -/* MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT) */ -/* INT. */ -/* TAG - TAG VALUE. */ - -/* UPDATED PARAMETERS - */ -/* (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE. */ -/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */ -/* QSIZE - SIZE OF SUPERNODE. */ -/* MARKER - MARKER VECTOR. */ -/* LLIST - TEMPORARY LINKED LIST OF ELIMINATED NABORS. */ - -/* *************************************************************** */ - -/* Subroutine */ int mmdelm_(int *mdnode, int *xadj, shortint *adjncy, - shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize, - shortint *llist, shortint *marker, int *maxint, int *tag) -{ - /* System generated locals */ - int i__1, i__2; - - /* Local variables */ - static int node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr, - istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv; - - -/* *************************************************************** */ - - -/* *************************************************************** */ - -/* ----------------------------------------------- */ -/* FIND REACHABLE SET AND PLACE IN DATA STRUCTURE. */ -/* ----------------------------------------------- */ - /* Parameter adjustments */ - --marker; - --llist; - --qsize; - --dbakw; - --dforw; - --dhead; - --adjncy; - --xadj; - - /* Function Body */ - marker[*mdnode] = *tag; - istrt = xadj[*mdnode]; - istop = xadj[*mdnode + 1] - 1; -/* ------------------------------------------------------- */ -/* ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED */ -/* NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION */ -/* FOR THE NEXT REACHABLE NODE. */ -/* ------------------------------------------------------- */ - elmnt = 0; - rloc = istrt; - rlmt = istop; - i__1 = istop; - for (i = istrt; i <= i__1; ++i) { - nabor = adjncy[i]; - if (nabor == 0) { - goto L300; - } - if (marker[nabor] >= *tag) { - goto L200; - } - marker[nabor] = *tag; - if (dforw[nabor] < 0) { - goto L100; - } - adjncy[rloc] = nabor; - ++rloc; - goto L200; -L100: - llist[nabor] = elmnt; - elmnt = nabor; -L200: - ; - } -L300: -/* ----------------------------------------------------- */ -/* MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS. */ -/* ----------------------------------------------------- */ - if (elmnt <= 0) { - goto L1000; - } - adjncy[rlmt] = -elmnt; - link = elmnt; -L400: - jstrt = xadj[link]; - jstop = xadj[link + 1] - 1; - i__1 = jstop; - for (j = jstrt; j <= i__1; ++j) { - node = adjncy[j]; - link = -node; - if (node < 0) { - goto L400; - } else if (node == 0) { - goto L900; - } else { - goto L500; - } -L500: - if (marker[node] >= *tag || dforw[node] < 0) { - goto L800; - } - marker[node] = *tag; -/* --------------------------------- */ -/* USE STORAGE FROM ELIMINATED NODES */ -/* IF NECESSARY. */ -/* --------------------------------- */ -L600: - if (rloc < rlmt) { - goto L700; - } - link = -adjncy[rlmt]; - rloc = xadj[link]; - rlmt = xadj[link + 1] - 1; - goto L600; -L700: - adjncy[rloc] = node; - ++rloc; -L800: - ; - } -L900: - elmnt = llist[elmnt]; - goto L300; -L1000: - if (rloc <= rlmt) { - adjncy[rloc] = 0; - } -/* -------------------------------------------------------- */ -/* FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ... */ -/* -------------------------------------------------------- */ - link = *mdnode; -L1100: - istrt = xadj[link]; - istop = xadj[link + 1] - 1; - i__1 = istop; - for (i = istrt; i <= i__1; ++i) { - rnode = adjncy[i]; - link = -rnode; - if (rnode < 0) { - goto L1100; - } else if (rnode == 0) { - goto L1800; - } else { - goto L1200; - } -L1200: -/* -------------------------------------------- */ -/* IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */ -/* -------------------------------------------- */ - pvnode = dbakw[rnode]; - if (pvnode == 0 || pvnode == -(*maxint)) { - goto L1300; - } -/* ------------------------------------- */ -/* THEN REMOVE RNODE FROM THE STRUCTURE. */ -/* ------------------------------------- */ - nxnode = dforw[rnode]; - if (nxnode > 0) { - dbakw[nxnode] = pvnode; - } - if (pvnode > 0) { - dforw[pvnode] = nxnode; - } - npv = -pvnode; - if (pvnode < 0) { - dhead[npv] = nxnode; - } -L1300: -/* ---------------------------------------- */ -/* PURGE INACTIVE QUOTIENT NABORS OF RNODE. */ -/* ---------------------------------------- */ - jstrt = xadj[rnode]; - jstop = xadj[rnode + 1] - 1; - xqnbr = jstrt; - i__2 = jstop; - for (j = jstrt; j <= i__2; ++j) { - nabor = adjncy[j]; - if (nabor == 0) { - goto L1500; - } - if (marker[nabor] >= *tag) { - goto L1400; - } - adjncy[xqnbr] = nabor; - ++xqnbr; -L1400: - ; - } -L1500: -/* ---------------------------------------- */ -/* IF NO ACTIVE NABOR AFTER THE PURGING ... */ -/* ---------------------------------------- */ - nqnbrs = xqnbr - jstrt; - if (nqnbrs > 0) { - goto L1600; - } -/* ----------------------------- */ -/* THEN MERGE RNODE WITH MDNODE. */ -/* ----------------------------- */ - qsize[*mdnode] += qsize[rnode]; - qsize[rnode] = 0; - marker[rnode] = *maxint; - dforw[rnode] = -(*mdnode); - dbakw[rnode] = -(*maxint); - goto L1700; -L1600: -/* -------------------------------------- */ -/* ELSE FLAG RNODE FOR DEGREE UPDATE, AND */ -/* ADD MDNODE AS A NABOR OF RNODE. */ -/* -------------------------------------- */ - dforw[rnode] = nqnbrs + 1; - dbakw[rnode] = 0; - adjncy[xqnbr] = *mdnode; - ++xqnbr; - if (xqnbr <= jstop) { - adjncy[xqnbr] = 0; - } - -L1700: - ; - } -L1800: - return 0; - -} /* mmdelm_ */ - -/* *************************************************************** */ -/* *************************************************************** */ -/* ***** MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE ***** */ -/* *************************************************************** */ -/* *************************************************************** */ - -/* AUTHOR - JOSEPH W.H. LIU */ -/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ - -/* PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES */ -/* AFTER A MULTIPLE ELIMINATION STEP. */ - -/* INPUT PARAMETERS - */ -/* EHEAD - THE BEGINNING OF THE LIST OF ELIMINATED */ -/* NODES (I.E., NEWLY FORMED ELEMENTS). */ -/* NEQNS - NUMBER OF EQUATIONS. */ -/* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */ -/* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */ -/* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) */ -/* INTEGER. */ - -/* UPDATED PARAMETERS - */ -/* MDEG - NEW MINIMUM DEGREE AFTER DEGREE UPDATE. */ -/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */ -/* QSIZE - SIZE OF SUPERNODE. */ -/* LLIST - WORKING LINKED LIST. */ -/* MARKER - MARKER VECTOR FOR DEGREE UPDATE. */ -/* TAG - TAG VALUE. */ - -/* *************************************************************** */ - -/* Subroutine */ int mmdupd_(int *ehead, int *neqns, int *xadj, - shortint *adjncy, int *delta, int *mdeg, shortint *dhead, - shortint *dforw, shortint *dbakw, shortint *qsize, shortint *llist, - shortint *marker, int *maxint, int *tag) -{ - /* System generated locals */ - int i__1, i__2; - - /* Local variables */ - static int node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt, - istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0; - - -/* *************************************************************** */ - - -/* *************************************************************** */ - - /* Parameter adjustments */ - --marker; - --llist; - --qsize; - --dbakw; - --dforw; - --dhead; - --adjncy; - --xadj; - - /* Function Body */ - mdeg0 = *mdeg + *delta; - elmnt = *ehead; -L100: -/* ------------------------------------------------------- */ -/* FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING. */ -/* (RESET TAG VALUE IF NECESSARY.) */ -/* ------------------------------------------------------- */ - if (elmnt <= 0) { - return 0; - } - mtag = *tag + mdeg0; - if (mtag < *maxint) { - goto L300; - } - *tag = 1; - i__1 = *neqns; - for (i = 1; i <= i__1; ++i) { - if (marker[i] < *maxint) { - marker[i] = 0; - } -/* L200: */ - } - mtag = *tag + mdeg0; -L300: -/* --------------------------------------------- */ -/* CREATE TWO LINKED LISTS FROM NODES ASSOCIATED */ -/* WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN */ -/* ADJACENCY STRUCTURE, AND THE OTHER WITH MORE */ -/* THAN TWO NABORS (QXHEAD). ALSO COMPUTE DEG0, */ -/* NUMBER OF NODES IN THIS ELEMENT. */ -/* --------------------------------------------- */ - q2head = 0; - qxhead = 0; - deg0 = 0; - link = elmnt; -L400: - istrt = xadj[link]; - istop = xadj[link + 1] - 1; - i__1 = istop; - for (i = istrt; i <= i__1; ++i) { - enode = adjncy[i]; - link = -enode; - if (enode < 0) { - goto L400; - } else if (enode == 0) { - goto L800; - } else { - goto L500; - } - -L500: - if (qsize[enode] == 0) { - goto L700; - } - deg0 += qsize[enode]; - marker[enode] = mtag; -/* ---------------------------------- */ -/* IF ENODE REQUIRES A DEGREE UPDATE, */ -/* THEN DO THE FOLLOWING. */ -/* ---------------------------------- */ - if (dbakw[enode] != 0) { - goto L700; - } -/* --------------------------------------- -*/ -/* PLACE EITHER IN QXHEAD OR Q2HEAD LISTS. -*/ -/* --------------------------------------- -*/ - if (dforw[enode] == 2) { - goto L600; - } - llist[enode] = qxhead; - qxhead = enode; - goto L700; -L600: - llist[enode] = q2head; - q2head = enode; -L700: - ; - } -L800: -/* -------------------------------------------- */ -/* FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING. */ -/* -------------------------------------------- */ - enode = q2head; - iq2 = 1; -L900: - if (enode <= 0) { - goto L1500; - } - if (dbakw[enode] != 0) { - goto L2200; - } - ++(*tag); - deg = deg0; -/* ------------------------------------------ */ -/* IDENTIFY THE OTHER ADJACENT ELEMENT NABOR. */ -/* ------------------------------------------ */ - istrt = xadj[enode]; - nabor = adjncy[istrt]; - if (nabor == elmnt) { - nabor = adjncy[istrt + 1]; - } -/* ------------------------------------------------ */ -/* IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT. */ -/* ------------------------------------------------ */ - link = nabor; - if (dforw[nabor] < 0) { - goto L1000; - } - deg += qsize[nabor]; - goto L2100; -L1000: -/* -------------------------------------------- */ -/* OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */ -/* DO THE FOLLOWING. */ -/* -------------------------------------------- */ - istrt = xadj[link]; - istop = xadj[link + 1] - 1; - i__1 = istop; - for (i = istrt; i <= i__1; ++i) { - node = adjncy[i]; - link = -node; - if (node == enode) { - goto L1400; - } - if (node < 0) { - goto L1000; - } else if (node == 0) { - goto L2100; - } else { - goto L1100; - } - -L1100: - if (qsize[node] == 0) { - goto L1400; - } - if (marker[node] >= *tag) { - goto L1200; - } -/* ----------------------------------- --- */ -/* CASE WHEN NODE IS NOT YET CONSIDERED -. */ -/* ----------------------------------- --- */ - marker[node] = *tag; - deg += qsize[node]; - goto L1400; -L1200: -/* ---------------------------------------- - */ -/* CASE WHEN NODE IS INDISTINGUISHABLE FROM - */ -/* ENODE. MERGE THEM INTO A NEW SUPERNODE. - */ -/* ---------------------------------------- - */ - if (dbakw[node] != 0) { - goto L1400; - } - if (dforw[node] != 2) { - goto L1300; - } - qsize[enode] += qsize[node]; - qsize[node] = 0; - marker[node] = *maxint; - dforw[node] = -enode; - dbakw[node] = -(*maxint); - goto L1400; -L1300: -/* -------------------------------------- -*/ -/* CASE WHEN NODE IS OUTMATCHED BY ENODE. -*/ -/* -------------------------------------- -*/ - if (dbakw[node] == 0) { - dbakw[node] = -(*maxint); - } -L1400: - ; - } - goto L2100; -L1500: -/* ------------------------------------------------ */ -/* FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING. */ -/* ------------------------------------------------ */ - enode = qxhead; - iq2 = 0; -L1600: - if (enode <= 0) { - goto L2300; - } - if (dbakw[enode] != 0) { - goto L2200; - } - ++(*tag); - deg = deg0; -/* --------------------------------- */ -/* FOR EACH UNMARKED NABOR OF ENODE, */ -/* DO THE FOLLOWING. */ -/* --------------------------------- */ - istrt = xadj[enode]; - istop = xadj[enode + 1] - 1; - i__1 = istop; - for (i = istrt; i <= i__1; ++i) { - nabor = adjncy[i]; - if (nabor == 0) { - goto L2100; - } - if (marker[nabor] >= *tag) { - goto L2000; - } - marker[nabor] = *tag; - link = nabor; -/* ------------------------------ */ -/* IF UNELIMINATED, INCLUDE IT IN */ -/* DEG COUNT. */ -/* ------------------------------ */ - if (dforw[nabor] < 0) { - goto L1700; - } - deg += qsize[nabor]; - goto L2000; -L1700: -/* ------------------------------- -*/ -/* IF ELIMINATED, INCLUDE UNMARKED -*/ -/* NODES IN THIS ELEMENT INTO THE -*/ -/* DEGREE COUNT. */ -/* ------------------------------- -*/ - jstrt = xadj[link]; - jstop = xadj[link + 1] - 1; - i__2 = jstop; - for (j = jstrt; j <= i__2; ++j) { - node = adjncy[j]; - link = -node; - if (node < 0) { - goto L1700; - } else if (node == 0) { - goto L2000; - } else { - goto L1800; - } - -L1800: - if (marker[node] >= *tag) { - goto L1900; - } - marker[node] = *tag; - deg += qsize[node]; -L1900: - ; - } -L2000: - ; - } -L2100: -/* ------------------------------------------- */ -/* UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE */ -/* STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */ -/* ------------------------------------------- */ - deg = deg - qsize[enode] + 1; - fnode = dhead[deg]; - dforw[enode] = fnode; - dbakw[enode] = -deg; - if (fnode > 0) { - dbakw[fnode] = enode; - } - dhead[deg] = enode; - if (deg < *mdeg) { - *mdeg = deg; - } -L2200: -/* ---------------------------------- */ -/* GET NEXT ENODE IN CURRENT ELEMENT. */ -/* ---------------------------------- */ - enode = llist[enode]; - if (iq2 == 1) { - goto L900; - } - goto L1600; -L2300: -/* ----------------------------- */ -/* GET NEXT ELEMENT IN THE LIST. */ -/* ----------------------------- */ - *tag = mtag; - elmnt = llist[elmnt]; - goto L100; - -} /* mmdupd_ */ - -/* *************************************************************** */ -/* *************************************************************** */ -/* ***** MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING ***** */ -/* *************************************************************** */ -/* *************************************************************** */ - -/* AUTHOR - JOSEPH W.H. LIU */ -/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */ - -/* PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN */ -/* PRODUCING THE PERMUTATION AND INVERSE PERMUTATION */ -/* VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE */ -/* MINIMUM DEGREE ORDERING ALGORITHM. */ - -/* INPUT PARAMETERS - */ -/* NEQNS - NUMBER OF EQUATIONS. */ -/* QSIZE - SIZE OF SUPERNODES AT ELIMINATION. */ - -/* UPDATED PARAMETERS - */ -/* INVP - INVERSE PERMUTATION VECTOR. ON INPUT, */ -/* IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */ -/* INTO THE NODE -INVP(NODE); OTHERWISE, */ -/* -INVP(NODE) IS ITS INVERSE LABELLING. */ - -/* OUTPUT PARAMETERS - */ -/* PERM - THE PERMUTATION VECTOR. */ - -/* *************************************************************** */ - -/* Subroutine */ int mmdnum_(int *neqns, shortint *perm, shortint *invp, - shortint *qsize) -{ - /* System generated locals */ - int i__1; - - /* Local variables */ - static int node, root, nextf, father, nqsize, num; - - -/* *************************************************************** */ - - -/* *************************************************************** */ - - /* Parameter adjustments */ - --qsize; - --invp; - --perm; - - /* Function Body */ - i__1 = *neqns; - for (node = 1; node <= i__1; ++node) { - nqsize = qsize[node]; - if (nqsize <= 0) { - perm[node] = invp[node]; - } - if (nqsize > 0) { - perm[node] = -invp[node]; - } -/* L100: */ - } -/* ------------------------------------------------------ */ -/* FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING. */ -/* ------------------------------------------------------ */ - i__1 = *neqns; - for (node = 1; node <= i__1; ++node) { - if (perm[node] > 0) { - goto L500; - } -/* ----------------------------------------- */ -/* TRACE THE MERGED TREE UNTIL ONE WHICH HAS */ -/* NOT BEEN MERGED, CALL IT ROOT. */ -/* ----------------------------------------- */ - father = node; -L200: - if (perm[father] > 0) { - goto L300; - } - father = -perm[father]; - goto L200; -L300: -/* ----------------------- */ -/* NUMBER NODE AFTER ROOT. */ -/* ----------------------- */ - root = father; - num = perm[root] + 1; - invp[node] = -num; - perm[root] = num; -/* ------------------------ */ -/* SHORTEN THE MERGED TREE. */ -/* ------------------------ */ - father = node; -L400: - nextf = -perm[father]; - if (nextf <= 0) { - goto L500; - } - perm[father] = -root; - father = nextf; - goto L400; -L500: - ; - } -/* ---------------------- */ -/* READY TO COMPUTE PERM. */ -/* ---------------------- */ - i__1 = *neqns; - for (node = 1; node <= i__1; ++node) { - num = -invp[node]; - invp[node] = num; - perm[num] = node; -/* L600: */ - } - return 0; - -} /* mmdnum_ */ - diff --git a/intern/opennl/superlu/relax_snode.c b/intern/opennl/superlu/relax_snode.c deleted file mode 100644 index 4a9265e0fde..00000000000 --- a/intern/opennl/superlu/relax_snode.c +++ /dev/null @@ -1,74 +0,0 @@ -/** \file opennl/superlu/relax_snode.c - * \ingroup opennl - */ -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" - -void -relax_snode ( - const int n, - int *et, /* column elimination tree */ - const int relax_columns, /* max no of columns allowed in a - relaxed snode */ - int *descendants, /* no of descendants of each node - in the etree */ - int *relax_end /* last column in a supernode */ - ) -{ -/* - * Purpose - * ======= - * relax_snode() - Identify the initial relaxed supernodes, assuming that - * the matrix has been reordered according to the postorder of the etree. - * - */ - register int j, parent; - register int snode_start; /* beginning of a snode */ - - ifill (relax_end, n, EMPTY); - for (j = 0; j < n; j++) descendants[j] = 0; - - /* Compute the number of descendants of each node in the etree */ - for (j = 0; j < n; j++) { - parent = et[j]; - if ( parent != n ) /* not the dummy root */ - descendants[parent] += descendants[j] + 1; - } - - /* Identify the relaxed supernodes by postorder traversal of the etree. */ - for (j = 0; j < n; ) { - parent = et[j]; - snode_start = j; - while ( parent != n && descendants[parent] < relax_columns ) { - j = parent; - parent = et[j]; - } - /* Found a supernode with j being the last column. */ - relax_end[snode_start] = j; /* Last column is recorded */ - j++; - /* Search for a new leaf */ - while ( j < n && descendants[j] != 0 ) j++; - } - - /*printf("No of relaxed snodes: %d; relaxed columns: %d\n", - nsuper, no_relaxed_col); */ -} diff --git a/intern/opennl/superlu/scolumn_bmod.c b/intern/opennl/superlu/scolumn_bmod.c deleted file mode 100644 index 9854115b894..00000000000 --- a/intern/opennl/superlu/scolumn_bmod.c +++ /dev/null @@ -1,355 +0,0 @@ -/** \file opennl/superlu/scolumn_bmod.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include "ssp_defs.h" - -/* - * Function prototypes - */ -void susolve(int, int, double*, double*); -void slsolve(int, int, double*, double*); -void smatvec(int, int, int, double*, double*, double*); - - - -/* Return value: 0 - successful return - * > 0 - number of bytes allocated when run out of space - */ -int -scolumn_bmod ( - const int jcol, /* in */ - const int nseg, /* in */ - double *dense, /* in */ - double *tempv, /* working array */ - int *segrep, /* in */ - int *repfnz, /* in */ - int fpanelc, /* in -- first column in the current panel */ - GlobalLU_t *Glu, /* modified */ - SuperLUStat_t *stat /* output */ - ) -{ -/* - * Purpose: - * ======== - * Performs numeric block updates (sup-col) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - */ -#ifdef _CRAY - _fcd ftcs1 = _cptofcd("L", strlen("L")), - ftcs2 = _cptofcd("N", strlen("N")), - ftcs3 = _cptofcd("U", strlen("U")); -#endif - -#ifdef USE_VENDOR_BLAS - int incx = 1, incy = 1; - double alpha, beta; -#endif - - /* krep = representative of current k-th supernode - * fsupc = first supernodal column - * nsupc = no of columns in supernode - * nsupr = no of rows in supernode (used as leading dimension) - * luptr = location of supernodal LU-block in storage - * kfnz = first nonz in the k-th supernodal segment - * no_zeros = no of leading zeros in a supernodal U-segment - */ - double ukj, ukj1, ukj2; - int luptr, luptr1, luptr2; - int fsupc, nsupc, nsupr, segsze; - int nrow; /* No of rows in the matrix of matrix-vector */ - int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno; - register int lptr, kfnz, isub, irow, i; - register int no_zeros, new_next; - int ufirst, nextlu; - int fst_col; /* First column within small LU update */ - int d_fsupc; /* Distance between the first column of the current - panel and the first column of the current snode. */ - int *xsup, *supno; - int *lsub, *xlsub; - double *lusup; - int *xlusup; - int nzlumax; - double *tempv1; - double zero = 0.0; -#ifdef USE_VENDOR_BLAS - double one = 1.0; - double none = -1.0; -#endif - int mem_error; - flops_t *ops = stat->ops; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - lusup = Glu->lusup; - xlusup = Glu->xlusup; - nzlumax = Glu->nzlumax; - jcolp1 = jcol + 1; - jsupno = supno[jcol]; - - /* - * For each nonz supernode segment of U[*,j] in topological order - */ - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { - - krep = segrep[k]; - k--; - ksupno = supno[krep]; - if ( jsupno != ksupno ) { /* Outside the rectangular supernode */ - - fsupc = xsup[ksupno]; - fst_col = SUPERLU_MAX ( fsupc, fpanelc ); - - /* Distance from the current supernode to the current panel; - d_fsupc=0 if fsupc > fpanelc. */ - d_fsupc = fst_col - fsupc; - - luptr = xlusup[fst_col] + d_fsupc; - lptr = xlsub[fsupc] + d_fsupc; - - kfnz = repfnz[krep]; - kfnz = SUPERLU_MAX ( kfnz, fpanelc ); - - segsze = krep - kfnz + 1; - nsupc = krep - fst_col + 1; - nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */ - nrow = nsupr - d_fsupc - nsupc; - krep_ind = lptr + nsupc - 1; - - ops[TRSV] += segsze * (segsze - 1); - ops[GEMV] += 2 * nrow * segsze; - - - /* - * Case 1: Update U-segment of size 1 -- col-col update - */ - if ( segsze == 1 ) { - ukj = dense[lsub[krep_ind]]; - luptr += nsupr*(nsupc-1) + nsupc; - - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - dense[irow] -= ukj*lusup[luptr]; - luptr++; - } - - } else if ( segsze <= 3 ) { - ukj = dense[lsub[krep_ind]]; - luptr += nsupr*(nsupc-1) + nsupc-1; - ukj1 = dense[lsub[krep_ind - 1]]; - luptr1 = luptr - nsupr; - - if ( segsze == 2 ) { /* Case 2: 2cols-col update */ - ukj -= ukj1 * lusup[luptr1]; - dense[lsub[krep_ind]] = ukj; - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - luptr++; - luptr1++; - dense[irow] -= ( ukj*lusup[luptr] - + ukj1*lusup[luptr1] ); - } - } else { /* Case 3: 3cols-col update */ - ukj2 = dense[lsub[krep_ind - 2]]; - luptr2 = luptr1 - nsupr; - ukj1 -= ukj2 * lusup[luptr2-1]; - ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2]; - dense[lsub[krep_ind]] = ukj; - dense[lsub[krep_ind-1]] = ukj1; - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - luptr++; - luptr1++; - luptr2++; - dense[irow] -= ( ukj*lusup[luptr] - + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] ); - } - } - - - - } else { - /* - * Case: sup-col update - * Perform a triangular solve and block update, - * then scatter the result of sup-col update to dense - */ - - no_zeros = kfnz - fst_col; - - /* Copy U[*,j] segment from dense[*] to tempv[*] */ - isub = lptr + no_zeros; - for (i = 0; i < segsze; i++) { - irow = lsub[isub]; - tempv[i] = dense[irow]; - ++isub; - } - - /* Dense triangular solve -- start effective triangle */ - luptr += nsupr * no_zeros + no_zeros; - -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], - &nsupr, tempv, &incx ); -#else - strsv_( "L", "N", "U", &segsze, &lusup[luptr], - &nsupr, tempv, &incx ); -#endif - luptr += segsze; /* Dense matrix-vector */ - tempv1 = &tempv[segsze]; - alpha = one; - beta = zero; -#ifdef _CRAY - SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], - &nsupr, tempv, &incx, &beta, tempv1, &incy ); -#else - sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], - &nsupr, tempv, &incx, &beta, tempv1, &incy ); -#endif -#else - slsolve ( nsupr, segsze, &lusup[luptr], tempv ); - - luptr += segsze; /* Dense matrix-vector */ - tempv1 = &tempv[segsze]; - smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1); -#endif - - - /* Scatter tempv[] into SPA dense[] as a temporary storage */ - isub = lptr + no_zeros; - for (i = 0; i < segsze; i++) { - irow = lsub[isub]; - dense[irow] = tempv[i]; - tempv[i] = zero; - ++isub; - } - - /* Scatter tempv1[] into SPA dense[] */ - for (i = 0; i < nrow; i++) { - irow = lsub[isub]; - dense[irow] -= tempv1[i]; - tempv1[i] = zero; - ++isub; - } - } - - } /* if jsupno ... */ - - } /* for each segment... */ - - /* - * Process the supernodal portion of L\U[*,j] - */ - nextlu = xlusup[jcol]; - fsupc = xsup[jsupno]; - - /* Copy the SPA dense into L\U[*,j] */ - new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc]; - while ( new_next > nzlumax ) { - if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))) - return (mem_error); - lusup = Glu->lusup; - lsub = Glu->lsub; - } - - for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) { - irow = lsub[isub]; - lusup[nextlu] = dense[irow]; - dense[irow] = zero; - ++nextlu; - } - - xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */ - - /* For more updates within the panel (also within the current supernode), - * should start from the first column of the panel, or the first column - * of the supernode, whichever is bigger. There are 2 cases: - * 1) fsupc < fpanelc, then fst_col := fpanelc - * 2) fsupc >= fpanelc, then fst_col := fsupc - */ - fst_col = SUPERLU_MAX ( fsupc, fpanelc ); - - if ( fst_col < jcol ) { - - /* Distance between the current supernode and the current panel. - d_fsupc=0 if fsupc >= fpanelc. */ - d_fsupc = fst_col - fsupc; - - luptr = xlusup[fst_col] + d_fsupc; - nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */ - nsupc = jcol - fst_col; /* Excluding jcol */ - nrow = nsupr - d_fsupc - nsupc; - - /* Points to the beginning of jcol in snode L\U(jsupno) */ - ufirst = xlusup[jcol] + d_fsupc; - - ops[TRSV] += nsupc * (nsupc - 1); - ops[GEMV] += 2 * nrow * nsupc; - -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], - &nsupr, &lusup[ufirst], &incx ); -#else - strsv_( "L", "N", "U", &nsupc, &lusup[luptr], - &nsupr, &lusup[ufirst], &incx ); -#endif - - alpha = none; beta = one; /* y := beta*y + alpha*A*x */ - -#ifdef _CRAY - SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, - &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); -#else - sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, - &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); -#endif -#else - slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); - - smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], - &lusup[ufirst], tempv ); - - /* Copy updates from tempv[*] into lusup[*] */ - isub = ufirst + nsupc; - for (i = 0; i < nrow; i++) { - lusup[isub] -= tempv[i]; - tempv[i] = 0.0; - ++isub; - } - -#endif - - - } /* if fst_col < jcol ... */ - - return 0; -} diff --git a/intern/opennl/superlu/scolumn_dfs.c b/intern/opennl/superlu/scolumn_dfs.c deleted file mode 100644 index 8f7da485a86..00000000000 --- a/intern/opennl/superlu/scolumn_dfs.c +++ /dev/null @@ -1,273 +0,0 @@ -/** \file opennl/superlu/scolumn_dfs.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" - -/* What type of supernodes we want */ -#define T2_SUPER - -int -scolumn_dfs( - const int m, /* in - number of rows in the matrix */ - const int jcol, /* in */ - int *perm_r, /* in */ - int *nseg, /* modified - with new segments appended */ - int *lsub_col, /* in - defines the RHS vector to start the dfs */ - int *segrep, /* modified - with new segments appended */ - int *repfnz, /* modified */ - int *xprune, /* modified */ - int *marker, /* modified */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* - * Purpose - * ======= - * "column_dfs" performs a symbolic factorization on column jcol, and - * decide the supernode boundary. - * - * This routine does not use numeric values, but only use the RHS - * row indices to start the dfs. - * - * A supernode representative is the last column of a supernode. - * The nonzeros in U[*,j] are segments that end at supernodal - * representatives. The routine returns a list of such supernodal - * representatives in topological order of the dfs that generates them. - * The location of the first nonzero in each such supernodal segment - * (supernodal entry location) is also returned. - * - * Local parameters - * ================ - * nseg: no of segments in current U[*,j] - * jsuper: jsuper=EMPTY if column j does not belong to the same - * supernode as j-1. Otherwise, jsuper=nsuper. - * - * marker2: A-row --> A-row/col (0/1) - * repfnz: SuperA-col --> PA-row - * parent: SuperA-col --> SuperA-col - * xplore: SuperA-col --> index to L-structure - * - * Return value - * ============ - * 0 success; - * > 0 number of bytes allocated when run out of space. - * - */ - int jcolp1, jcolm1, jsuper, nsuper, nextl; - int k, krep, krow, kmark, kperm; - int *marker2; /* Used for small panel LU */ - int fsupc; /* First column of a snode */ - int myfnz; /* First nonz column of a U-segment */ - int chperm, chmark, chrep, kchild; - int xdfs, maxdfs, kpar, oldrep; - int jptr, jm1ptr; - int ito, ifrom, istop; /* Used to compress row subscripts */ - int mem_error; - int *xsup, *supno, *lsub, *xlsub; - int nzlmax; - static int first = 1, maxsuper; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - nzlmax = Glu->nzlmax; - - if ( first ) { - maxsuper = sp_ienv(3); - first = 0; - } - jcolp1 = jcol + 1; - jcolm1 = jcol - 1; - nsuper = supno[jcol]; - jsuper = nsuper; - nextl = xlsub[jcol]; - marker2 = &marker[2*m]; - - - /* For each nonzero in A[*,jcol] do dfs */ - for (k = 0; lsub_col[k] != EMPTY; k++) { - - krow = lsub_col[k]; - lsub_col[k] = EMPTY; - kmark = marker2[krow]; - - /* krow was visited before, go to the next nonz */ - if ( kmark == jcol ) continue; - - /* For each unmarked nbr krow of jcol - * krow is in L: place it in structure of L[*,jcol] - */ - marker2[krow] = jcol; - kperm = perm_r[krow]; - - if ( kperm == EMPTY ) { - lsub[nextl++] = krow; /* krow is indexed into A */ - if ( nextl >= nzlmax ) { - if ((mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu))) - return (mem_error); - lsub = Glu->lsub; - } - if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */ - } else { - /* krow is in U: if its supernode-rep krep - * has been explored, update repfnz[*] - */ - krep = xsup[supno[kperm]+1] - 1; - myfnz = repfnz[krep]; - - if ( myfnz != EMPTY ) { /* Visited before */ - if ( myfnz > kperm ) repfnz[krep] = kperm; - /* continue; */ - } - else { - /* Otherwise, perform dfs starting at krep */ - oldrep = EMPTY; - parent[krep] = oldrep; - repfnz[krep] = kperm; - xdfs = xlsub[krep]; - maxdfs = xprune[krep]; - - do { - /* - * For each unmarked kchild of krep - */ - while ( xdfs < maxdfs ) { - - kchild = lsub[xdfs]; - xdfs++; - chmark = marker2[kchild]; - - if ( chmark != jcol ) { /* Not reached yet */ - marker2[kchild] = jcol; - chperm = perm_r[kchild]; - - /* Case kchild is in L: place it in L[*,k] */ - if ( chperm == EMPTY ) { - lsub[nextl++] = kchild; - if ( nextl >= nzlmax ) { - if ((mem_error = - sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu))) - return (mem_error); - lsub = Glu->lsub; - } - if ( chmark != jcolm1 ) jsuper = EMPTY; - } else { - /* Case kchild is in U: - * chrep = its supernode-rep. If its rep has - * been explored, update its repfnz[*] - */ - chrep = xsup[supno[chperm]+1] - 1; - myfnz = repfnz[chrep]; - if ( myfnz != EMPTY ) { /* Visited before */ - if ( myfnz > chperm ) - repfnz[chrep] = chperm; - } else { - /* Continue dfs at super-rep of kchild */ - xplore[krep] = xdfs; - oldrep = krep; - krep = chrep; /* Go deeper down G(L^t) */ - parent[krep] = oldrep; - repfnz[krep] = chperm; - xdfs = xlsub[krep]; - maxdfs = xprune[krep]; - } /* else */ - - } /* else */ - - } /* if */ - - } /* while */ - - /* krow has no more unexplored nbrs; - * place supernode-rep krep in postorder DFS. - * backtrack dfs to its parent - */ - segrep[*nseg] = krep; - ++(*nseg); - kpar = parent[krep]; /* Pop from stack, mimic recursion */ - if ( kpar == EMPTY ) break; /* dfs done */ - krep = kpar; - xdfs = xplore[krep]; - maxdfs = xprune[krep]; - - } while ( kpar != EMPTY ); /* Until empty stack */ - - } /* else */ - - } /* else */ - - } /* for each nonzero ... */ - - /* Check to see if j belongs in the same supernode as j-1 */ - if ( jcol == 0 ) { /* Do nothing for column 0 */ - nsuper = supno[0] = 0; - } else { - fsupc = xsup[nsuper]; - jptr = xlsub[jcol]; /* Not compressed yet */ - jm1ptr = xlsub[jcolm1]; - -#ifdef T2_SUPER - if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; -#endif - /* Make sure the number of columns in a supernode doesn't - exceed threshold. */ - if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; - - /* If jcol starts a new supernode, reclaim storage space in - * lsub from the previous supernode. Note we only store - * the subscript set of the first and last columns of - * a supernode. (first for num values, last for pruning) - */ - if ( jsuper == EMPTY ) { /* starts a new supernode */ - if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */ -#ifdef CHK_COMPRESS - printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); -#endif - ito = xlsub[fsupc+1]; - xlsub[jcolm1] = ito; - istop = ito + jptr - jm1ptr; - xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */ - xlsub[jcol] = istop; - for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) - lsub[ito] = lsub[ifrom]; - nextl = ito; /* = istop + length(jcol) */ - } - nsuper++; - supno[jcol] = nsuper; - } /* if a new supernode */ - - } /* else: jcol > 0 */ - - /* Tidy up the pointers before exit */ - xsup[nsuper+1] = jcolp1; - supno[jcolp1] = nsuper; - xprune[jcol] = nextl; /* Initialize upper bound for pruning */ - xlsub[jcolp1] = nextl; - - return 0; -} diff --git a/intern/opennl/superlu/scopy_to_ucol.c b/intern/opennl/superlu/scopy_to_ucol.c deleted file mode 100644 index 4cf7d64a349..00000000000 --- a/intern/opennl/superlu/scopy_to_ucol.c +++ /dev/null @@ -1,108 +0,0 @@ -/** \file opennl/superlu/scopy_to_ucol.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" -#include "util.h" - -int -scopy_to_ucol( - int jcol, /* in */ - int nseg, /* in */ - int *segrep, /* in */ - int *repfnz, /* in */ - int *perm_r, /* in */ - double *dense, /* modified - reset to zero on return */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* - * Gather from SPA dense[*] to global ucol[*]. - */ - int ksub, krep, ksupno; - int i, k, kfnz, segsze; - int fsupc, isub, irow; - int jsupno, nextu; - int new_next, mem_error; - int *xsup, *supno; - int *lsub, *xlsub; - double *ucol; - int *usub, *xusub; - int nzumax; - - double zero = 0.0; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - ucol = Glu->ucol; - usub = Glu->usub; - xusub = Glu->xusub; - nzumax = Glu->nzumax; - - jsupno = supno[jcol]; - nextu = xusub[jcol]; - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { - krep = segrep[k--]; - ksupno = supno[krep]; - - if ( ksupno != jsupno ) { /* Should go into ucol[] */ - kfnz = repfnz[krep]; - if ( kfnz != EMPTY ) { /* Nonzero U-segment */ - - fsupc = xsup[ksupno]; - isub = xlsub[fsupc] + kfnz - fsupc; - segsze = krep - kfnz + 1; - - new_next = nextu + segsze; - while ( new_next > nzumax ) { - if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu))) - return (mem_error); - ucol = Glu->ucol; - if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, Glu))) - return (mem_error); - usub = Glu->usub; - lsub = Glu->lsub; - } - - for (i = 0; i < segsze; i++) { - irow = lsub[isub]; - usub[nextu] = perm_r[irow]; - ucol[nextu] = dense[irow]; - dense[irow] = zero; - nextu++; - isub++; - } - - } - - } - - } /* for each segment... */ - - xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ - return 0; -} diff --git a/intern/opennl/superlu/sgssv.c b/intern/opennl/superlu/sgssv.c deleted file mode 100644 index b2a9848e597..00000000000 --- a/intern/opennl/superlu/sgssv.c +++ /dev/null @@ -1,224 +0,0 @@ -/** \file opennl/superlu/sgssv.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#include "ssp_defs.h" - -void -sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, - SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, - SuperLUStat_t *stat, int *info ) -{ -/* - * Purpose - * ======= - * - * SGSSV solves the system of linear equations A*X=B, using the - * LU factorization from SGSTRF. It performs the following steps: - * - * 1. If A is stored column-wise (A->Stype = SLU_NC): - * - * 1.1. Permute the columns of A, forming A*Pc, where Pc - * is a permutation matrix. For more details of this step, - * see sp_preorder.c. - * - * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined - * by Gaussian elimination with partial pivoting. - * L is unit lower triangular with offdiagonal entries - * bounded by 1 in magnitude, and U is upper triangular. - * - * 1.3. Solve the system of equations A*X=B using the factored - * form of A. - * - * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the - * above algorithm to the transpose of A: - * - * 2.1. Permute columns of transpose(A) (rows of A), - * forming transpose(A)*Pc, where Pc is a permutation matrix. - * For more details of this step, see sp_preorder.c. - * - * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr - * determined by Gaussian elimination with partial pivoting. - * L is unit lower triangular with offdiagonal entries - * bounded by 1 in magnitude, and U is upper triangular. - * - * 2.3. Solve the system of equations A*X=B using the factored - * form of A. - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the LU decomposition will be performed and how the - * system will be solved. - * - * A (input) SuperMatrix* - * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number - * of linear equations is A->nrow. Currently, the type of A can be: - * Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE. - * In the future, more general A may be handled. - * - * perm_c (input/output) int* - * If A->Stype = SLU_NC, column permutation vector of size A->ncol - * which defines the permutation matrix Pc; perm_c[i] = j means - * column i of A is in position j in A*Pc. - * If A->Stype = SLU_NR, column permutation vector of size A->nrow - * which describes permutation of columns of transpose(A) - * (rows of A) as described above. - * - * If options->ColPerm = MY_PERMC or options->Fact = SamePattern or - * options->Fact = SamePattern_SameRowPerm, it is an input argument. - * On exit, perm_c may be overwritten by the product of the input - * perm_c and a permutation that postorders the elimination tree - * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree - * is already in postorder. - * Otherwise, it is an output argument. - * - * perm_r (input/output) int* - * If A->Stype = SLU_NC, row permutation vector of size A->nrow, - * which defines the permutation matrix Pr, and is determined - * by partial pivoting. perm_r[i] = j means row i of A is in - * position j in Pr*A. - * If A->Stype = SLU_NR, permutation vector of size A->ncol, which - * determines permutation of rows of transpose(A) - * (columns of A) as described above. - * - * If options->RowPerm = MY_PERMR or - * options->Fact = SamePattern_SameRowPerm, perm_r is an - * input argument. - * otherwise it is an output argument. - * - * L (output) SuperMatrix* - * The factor L from the factorization - * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or - * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). - * Uses compressed row subscripts storage for supernodes, i.e., - * L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization - * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or - * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). - * Uses column-wise storage scheme, i.e., U has types: - * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU. - * - * B (input/output) SuperMatrix* - * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. - * On entry, the right hand side matrix. - * On exit, the solution matrix if info = 0; - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and doubleing-point operation count. - * See util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * > 0: if info = i, and i is - * <= A->ncol: U(i,i) is exactly zero. The factorization has - * been completed, but the factor U is exactly singular, - * so the solution could not be computed. - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. - * - */ - DNformat *Bstore; - SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/ - SuperMatrix AC; /* Matrix postmultiplied by Pc */ - int lwork = 0, *etree, i; - - /* Set default values for some parameters */ - int panel_size; /* panel size */ - int relax; /* no of columns in a relaxed snodes */ - int permc_spec; - trans_t trans = NOTRANS; - double *utime; - double t; /* Temporary time */ - - /* Test the input parameters ... */ - *info = 0; - Bstore = B->Store; - if ( options->Fact != DOFACT ) *info = -1; - else if ( A->nrow != A->ncol || A->nrow < 0 || - (A->Stype != SLU_NC && A->Stype != SLU_NR) || - A->Dtype != SLU_S || A->Mtype != SLU_GE ) - *info = -2; - else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || - B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) - *info = -7; - if ( *info != 0 ) { - i = -(*info); - xerbla_("sgssv", &i); - return; - } - - utime = stat->utime; - - /* Convert A to SLU_NC format when necessary. */ - if ( A->Stype == SLU_NR ) { - NRformat *Astore = A->Store; - AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); - sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, - Astore->nzval, Astore->colind, Astore->rowptr, - SLU_NC, A->Dtype, A->Mtype); - trans = TRANS; - } else { - if ( A->Stype == SLU_NC ) AA = A; - } - - t = SuperLU_timer_(); - /* - * Get column permutation vector perm_c[], according to permc_spec: - * permc_spec = NATURAL: natural ordering - * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A - * permc_spec = MMD_ATA: minimum degree on structure of A'*A - * permc_spec = COLAMD: approximate minimum degree column ordering - * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] - */ - permc_spec = options->ColPerm; - if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) - get_perm_c(permc_spec, AA, perm_c); - utime[COLPERM] = SuperLU_timer_() - t; - - etree = intMalloc(A->ncol); - - t = SuperLU_timer_(); - sp_preorder(options, AA, perm_c, etree, &AC); - utime[ETREE] = SuperLU_timer_() - t; - - panel_size = sp_ienv(1); - relax = sp_ienv(2); - - /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", - relax, panel_size, sp_ienv(3), sp_ienv(4));*/ - t = SuperLU_timer_(); - /* Compute the LU factorization of A. */ - sgstrf(options, &AC, relax, panel_size, - etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); - utime[FACT] = SuperLU_timer_() - t; - - t = SuperLU_timer_(); - if ( *info == 0 ) { - /* Solve the system A*X=B, overwriting B with X. */ - sgstrs (trans, L, U, perm_c, perm_r, B, stat, info); - } - utime[SOLVE] = SuperLU_timer_() - t; - - SUPERLU_FREE (etree); - Destroy_CompCol_Permuted(&AC); - if ( A->Stype == SLU_NR ) { - Destroy_SuperMatrix_Store(AA); - SUPERLU_FREE(AA); - } - -} diff --git a/intern/opennl/superlu/sgstrf.c b/intern/opennl/superlu/sgstrf.c deleted file mode 100644 index 78b1ba21865..00000000000 --- a/intern/opennl/superlu/sgstrf.c +++ /dev/null @@ -1,457 +0,0 @@ -/** \file opennl/superlu/sgstrf.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" - -void -sgstrf (superlu_options_t *options, SuperMatrix *A, - int relax, int panel_size, int *etree, void *work, int lwork, - int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, - SuperLUStat_t *stat, int *info) -{ -/* - * Purpose - * ======= - * - * SGSTRF computes an LU factorization of a general sparse m-by-n - * matrix A using partial pivoting with row interchanges. - * The factorization has the form - * Pr * A = L * U - * where Pr is a row permutation matrix, L is lower triangular with unit - * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper - * triangular (upper trapezoidal if A->nrow < A->ncol). - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * The structure defines the input parameters to control - * how the LU decomposition will be performed. - * - * A (input) SuperMatrix* - * Original matrix A, permuted by columns, of dimension - * (A->nrow, A->ncol). The type of A can be: - * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE. - * - * drop_tol (input) double (NOT IMPLEMENTED) - * Drop tolerance parameter. At step j of the Gaussian elimination, - * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij. - * 0 <= drop_tol <= 1. The default value of drop_tol is 0. - * - * relax (input) int - * To control degree of relaxing supernodes. If the number - * of nodes (columns) in a subtree of the elimination tree is less - * than relax, this subtree is considered as one supernode, - * regardless of the row structures of those columns. - * - * panel_size (input) int - * A panel consists of at most panel_size consecutive columns. - * - * etree (input) int*, dimension (A->ncol) - * Elimination tree of A'*A. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * On input, the columns of A should be permuted so that the - * etree is in a certain postorder. - * - * work (input/output) void*, size (lwork) (in bytes) - * User-supplied work space and space for the output data structures. - * Not referenced if lwork = 0; - * - * lwork (input) int - * Specifies the size of work array in bytes. - * = 0: allocate space internally by system malloc; - * > 0: use user-supplied work array of length lwork in bytes, - * returns error if space runs out. - * = -1: the routine guesses the amount of space needed without - * performing the factorization, and returns it in - * *info; no other side effects. - * - * perm_c (input) int*, dimension (A->ncol) - * Column permutation vector, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * When searching for diagonal, perm_c[*] is applied to the - * row subscripts of A, so that diagonal threshold pivoting - * can find the diagonal of A, rather than that of A*Pc. - * - * perm_r (input/output) int*, dimension (A->nrow) - * Row permutation vector which defines the permutation matrix Pr, - * perm_r[i] = j means row i of A is in position j in Pr*A. - * If options->Fact = SamePattern_SameRowPerm, the pivoting routine - * will try to use the input perm_r, unless a certain threshold - * criterion is violated. In that case, perm_r is overwritten by - * a new permutation determined by partial pivoting or diagonal - * threshold pivoting. - * Otherwise, perm_r is output argument; - * - * L (output) SuperMatrix* - * The factor L from the factorization Pr*A=L*U; use compressed row - * subscripts storage for supernodes, i.e., L has type: - * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. - * - * U (output) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise - * storage scheme, i.e., U has types: Stype = SLU_NC, - * Dtype = SLU_S, Mtype = SLU_TRU. - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and doubleing-point operation count. - * See util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * > 0: if info = i, and i is - * <= A->ncol: U(i,i) is exactly zero. The factorization has - * been completed, but the factor U is exactly singular, - * and division by zero will occur if it is used to solve a - * system of equations. - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. - * - * ====================================================================== - * - * Local Working Arrays: - * ====================== - * m = number of rows in the matrix - * n = number of columns in the matrix - * - * xprune[0:n-1]: xprune[*] points to locations in subscript - * vector lsub[*]. For column i, xprune[i] denotes the point where - * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need - * to be traversed for symbolic factorization. - * - * marker[0:3*m-1]: marker[i] = j means that node i has been - * reached when working on column j. - * Storage: relative to original row subscripts - * NOTE: There are 3 of them: marker/marker1 are used for panel dfs, - * see spanel_dfs.c; marker2 is used for inner-factorization, - * see scolumn_dfs.c. - * - * parent[0:m-1]: parent vector used during dfs - * Storage: relative to new row subscripts - * - * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) - * unexplored neighbor of i in lsub[*] - * - * segrep[0:nseg-1]: contains the list of supernodal representatives - * in topological order of the dfs. A supernode representative is the - * last column of a supernode. - * The maximum size of segrep[] is n. - * - * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a - * supernodal representative r, repfnz[r] is the location of the first - * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 - * indicates the supernode r has been explored. - * NOTE: There are W of them, each used for one column of a panel. - * - * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below - * the panel diagonal. These are filled in during spanel_dfs(), and are - * used later in the inner LU factorization within the panel. - * panel_lsub[]/dense[] pair forms the SPA data structure. - * NOTE: There are W of them. - * - * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; - * NOTE: there are W of them. - * - * tempv[0:*]: real temporary used for dense numeric kernels; - * The size of this array is defined by NUM_TEMPV() in ssp_defs.h. - * - */ - /* Local working arrays */ - NCPformat *Astore; - int *iperm_r = NULL; /* inverse of perm_r; - used when options->Fact == SamePattern_SameRowPerm */ - int *iperm_c; /* inverse of perm_c */ - int *iwork; - double *swork; - int *segrep, *repfnz, *parent, *xplore; - int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ - int *xprune; - int *marker; - double *dense, *tempv; - int *relax_end; - double *a; - int *asub; - int *xa_begin, *xa_end; - int *xsup, *supno; - int *xlsub, *xlusup, *xusub; - int nzlumax; - static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ - - /* Local scalars */ - fact_t fact = options->Fact; - double diag_pivot_thresh = options->DiagPivotThresh; - int pivrow; /* pivotal row number in the original matrix A */ - int nseg1; /* no of segments in U-column above panel row jcol */ - int nseg; /* no of segments in each U-column */ - register int jcol; - register int kcol; /* end column of a relaxed snode */ - register int icol; - register int i, k, jj, new_next, iinfo; - int m, n, min_mn, jsupno, fsupc, nextlu, nextu; - int w_def; /* upper bound on panel width */ - int usepr, iperm_r_allocated = 0; - int nnzL, nnzU; - int *panel_histo = stat->panel_histo; - flops_t *ops = stat->ops; - - iinfo = 0; - m = A->nrow; - n = A->ncol; - min_mn = SUPERLU_MIN(m, n); - Astore = A->Store; - a = Astore->nzval; - asub = Astore->rowind; - xa_begin = Astore->colbeg; - xa_end = Astore->colend; - - /* Allocate storage common to the factor routines */ - *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz, - panel_size, L, U, &Glu, &iwork, &swork); - if ( *info ) return; - - xsup = Glu.xsup; - supno = Glu.supno; - xlsub = Glu.xlsub; - xlusup = Glu.xlusup; - xusub = Glu.xusub; - - SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, - &repfnz, &panel_lsub, &xprune, &marker); - sSetRWork(m, panel_size, swork, &dense, &tempv); - - usepr = (fact == SamePattern_SameRowPerm); - if ( usepr ) { - /* Compute the inverse of perm_r */ - iperm_r = (int *) intMalloc(m); - for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; - iperm_r_allocated = 1; - } - iperm_c = (int *) intMalloc(n); - for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; - - /* Identify relaxed snodes */ - relax_end = (int *) intMalloc(n); - if ( options->SymmetricMode == YES ) { - heap_relax_snode(n, etree, relax, marker, relax_end); - } else { - relax_snode(n, etree, relax, marker, relax_end); - } - - ifill (perm_r, m, EMPTY); - ifill (marker, m * NO_MARKER, EMPTY); - supno[0] = -1; - xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; - w_def = panel_size; - - /* - * Work on one "panel" at a time. A panel is one of the following: - * (a) a relaxed supernode at the bottom of the etree, or - * (b) panel_size contiguous columns, defined by the user - */ - for (jcol = 0; jcol < min_mn; ) { - - if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ - kcol = relax_end[jcol]; /* end of the relaxed snode */ - panel_histo[kcol-jcol+1]++; - - /* -------------------------------------- - * Factorize the relaxed supernode(jcol:kcol) - * -------------------------------------- */ - /* Determine the union of the row structure of the snode */ - if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end, - xprune, marker, &Glu)) != 0 ) { - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - return; - } - - nextu = xusub[jcol]; - nextlu = xlusup[jcol]; - jsupno = supno[jcol]; - fsupc = xsup[jsupno]; - new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); - nzlumax = Glu.nzlumax; - while ( new_next > nzlumax ) { - if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) ) { - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - return; - } - } - - for (icol = jcol; icol<= kcol; icol++) { - xusub[icol+1] = nextu; - - /* Scatter into SPA dense[*] */ - for (k = xa_begin[icol]; k < xa_end[icol]; k++) - dense[asub[k]] = a[k]; - - /* Numeric update within the snode */ - ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat); - - if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r, - iperm_r, iperm_c, &pivrow, &Glu, stat)) ) - if ( iinfo == 0 ) iinfo = *info; - -#ifdef DEBUG - sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu); -#endif - - } - - jcol = icol; - - } else { /* Work on one panel of panel_size columns */ - - /* Adjust panel_size so that a panel won't overlap with the next - * relaxed snode. - */ - panel_size = w_def; - for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) - if ( relax_end[k] != EMPTY ) { - panel_size = k - jcol; - break; - } - if ( k == min_mn ) panel_size = min_mn - jcol; - panel_histo[panel_size]++; - - /* symbolic factor on a panel of columns */ - spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, - dense, panel_lsub, segrep, repfnz, xprune, - marker, parent, xplore, &Glu); - - /* numeric sup-panel updates in topological order */ - spanel_bmod(m, panel_size, jcol, nseg1, dense, - tempv, segrep, repfnz, &Glu, stat); - - /* Sparse LU within the panel, and below panel diagonal */ - for ( jj = jcol; jj < jcol + panel_size; jj++) { - k = (jj - jcol) * m; /* column index for w-wide arrays */ - - nseg = nseg1; /* Begin after all the panel segments */ - - if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], - segrep, &repfnz[k], xprune, marker, - parent, xplore, &Glu)) != 0) { - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - return; - } - - /* Numeric updates */ - if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k], - tempv, &segrep[nseg1], &repfnz[k], - jcol, &Glu, stat)) != 0) { - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - return; - } - - /* Copy the U-segments to ucol[*] */ - if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k], - perm_r, &dense[k], &Glu)) != 0) { - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); - return; - } - - if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r, - iperm_r, iperm_c, &pivrow, &Glu, stat)) ) - if ( iinfo == 0 ) iinfo = *info; - - /* Prune columns (0:jj-1) using column jj */ - spruneL(jj, perm_r, pivrow, nseg, segrep, - &repfnz[k], xprune, &Glu); - - /* Reset repfnz[] for this column */ - resetrep_col (nseg, segrep, &repfnz[k]); - -#ifdef DEBUG - sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu); -#endif - - } - - jcol += panel_size; /* Move to the next panel */ - - } /* else */ - - } /* for */ - - *info = iinfo; - - if ( m > n ) { - k = 0; - for (i = 0; i < m; ++i) - if ( perm_r[i] == EMPTY ) { - perm_r[i] = n + k; - ++k; - } - } - - countnz(min_mn, xprune, &nnzL, &nnzU, &Glu); - fixupL(min_mn, perm_r, &Glu); - - sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */ - - if ( fact == SamePattern_SameRowPerm ) { - /* L and U structures may have changed due to possibly different - pivoting, even though the storage is available. - There could also be memory expansions, so the array locations - may have changed, */ - ((SCformat *)L->Store)->nnz = nnzL; - ((SCformat *)L->Store)->nsuper = Glu.supno[n]; - ((SCformat *)L->Store)->nzval = Glu.lusup; - ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; - ((SCformat *)L->Store)->rowind = Glu.lsub; - ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; - ((NCformat *)U->Store)->nnz = nnzU; - ((NCformat *)U->Store)->nzval = Glu.ucol; - ((NCformat *)U->Store)->rowind = Glu.usub; - ((NCformat *)U->Store)->colptr = Glu.xusub; - } else { - sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup, - Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, - Glu.xsup, SLU_SC, SLU_S, SLU_TRLU); - sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, - Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU); - } - - ops[FACT] += ops[TRSV] + ops[GEMV]; - - if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); - SUPERLU_FREE (iperm_c); - SUPERLU_FREE (relax_end); -} diff --git a/intern/opennl/superlu/sgstrs.c b/intern/opennl/superlu/sgstrs.c deleted file mode 100644 index 5387e319d99..00000000000 --- a/intern/opennl/superlu/sgstrs.c +++ /dev/null @@ -1,334 +0,0 @@ -/** \file opennl/superlu/sgstrs.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" - - -/* - * Function prototypes - */ -void susolve(int, int, double*, double*); -void slsolve(int, int, double*, double*); -void smatvec(int, int, int, double*, double*, double*); -void sprint_soln(int , double *); - -void -sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, - int *perm_c, int *perm_r, SuperMatrix *B, - SuperLUStat_t *stat, int *info) -{ -/* - * Purpose - * ======= - * - * SGSTRS solves a system of linear equations A*X=B or A'*X=B - * with A sparse and B dense, using the LU factorization computed by - * SGSTRF. - * - * See supermatrix.h for the definition of 'SuperMatrix' structure. - * - * Arguments - * ========= - * - * trans (input) trans_t - * Specifies the form of the system of equations: - * = NOTRANS: A * X = B (No transpose) - * = TRANS: A'* X = B (Transpose) - * = CONJ: A**H * X = B (Conjugate transpose) - * - * L (input) SuperMatrix* - * The factor L from the factorization Pr*A*Pc=L*U as computed by - * sgstrf(). Use compressed row subscripts storage for supernodes, - * i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. - * - * U (input) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U as computed by - * sgstrf(). Use column-wise storage scheme, i.e., U has types: - * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU. - * - * perm_c (input) int*, dimension (L->ncol) - * Column permutation vector, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * - * perm_r (input) int*, dimension (L->nrow) - * Row permutation vector, which defines the permutation matrix Pr; - * perm_r[i] = j means row i of A is in position j in Pr*A. - * - * B (input/output) SuperMatrix* - * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. - * On entry, the right hand side matrix. - * On exit, the solution matrix if info = 0; - * - * stat (output) SuperLUStat_t* - * Record the statistics on runtime and doubleing-point operation count. - * See util.h for the definition of 'SuperLUStat_t'. - * - * info (output) int* - * = 0: successful exit - * < 0: if info = -i, the i-th argument had an illegal value - * - */ -#ifdef _CRAY - _fcd ftcs1, ftcs2, ftcs3, ftcs4; -#endif -#ifdef USE_VENDOR_BLAS - double alpha = 1.0, beta = 1.0; - double *work_col; -#endif - DNformat *Bstore; - double *Bmat; - SCformat *Lstore; - NCformat *Ustore; - double *Lval, *Uval; - int fsupc, nrow, nsupr, nsupc, luptr, istart, irow; - int i, j, k, iptr, jcol, n, ldb, nrhs; - double *work, *rhs_work, *soln; - flops_t solve_ops; - void sprint_soln(); - - /* Test input parameters ... */ - *info = 0; - Bstore = B->Store; - ldb = Bstore->lda; - nrhs = B->ncol; - if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1; - else if ( L->nrow != L->ncol || L->nrow < 0 || - L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU ) - *info = -2; - else if ( U->nrow != U->ncol || U->nrow < 0 || - U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU ) - *info = -3; - else if ( ldb < SUPERLU_MAX(0, L->nrow) || - B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) - *info = -6; - if ( *info ) { - i = -(*info); - xerbla_("sgstrs", &i); - return; - } - - n = L->nrow; - work = doubleCalloc(n * nrhs); - if ( !work ) ABORT("Malloc fails for local work[]."); - soln = doubleMalloc(n); - if ( !soln ) ABORT("Malloc fails for local soln[]."); - - Bmat = Bstore->nzval; - Lstore = L->Store; - Lval = Lstore->nzval; - Ustore = U->Store; - Uval = Ustore->nzval; - solve_ops = 0; - - if ( trans == NOTRANS ) { - /* Permute right hand sides to form Pr*B */ - for (i = 0; i < nrhs; i++) { - rhs_work = &Bmat[i*ldb]; - for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k]; - for (k = 0; k < n; k++) rhs_work[k] = soln[k]; - } - - /* Forward solve PLy=Pb. */ - for (k = 0; k <= Lstore->nsuper; k++) { - fsupc = L_FST_SUPC(k); - istart = L_SUB_START(fsupc); - nsupr = L_SUB_START(fsupc+1) - istart; - nsupc = L_FST_SUPC(k+1) - fsupc; - nrow = nsupr - nsupc; - - solve_ops += nsupc * (nsupc - 1) * nrhs; - solve_ops += 2 * nrow * nsupc * nrhs; - - if ( nsupc == 1 ) { - for (j = 0; j < nrhs; j++) { - rhs_work = &Bmat[j*ldb]; - luptr = L_NZ_START(fsupc); - for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){ - irow = L_SUB(iptr); - ++luptr; - rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr]; - } - } - } else { - luptr = L_NZ_START(fsupc); -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - ftcs1 = _cptofcd("L", strlen("L")); - ftcs2 = _cptofcd("N", strlen("N")); - ftcs3 = _cptofcd("U", strlen("U")); - STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha, - &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); - - SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, - &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, - &beta, &work[0], &n ); -#else - strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha, - &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); - - sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, - &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, - &beta, &work[0], &n ); -#endif - for (j = 0; j < nrhs; j++) { - rhs_work = &Bmat[j*ldb]; - work_col = &work[j*n]; - iptr = istart + nsupc; - for (i = 0; i < nrow; i++) { - irow = L_SUB(iptr); - rhs_work[irow] -= work_col[i]; /* Scatter */ - work_col[i] = 0.0; - iptr++; - } - } -#else - for (j = 0; j < nrhs; j++) { - rhs_work = &Bmat[j*ldb]; - slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]); - smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc], - &rhs_work[fsupc], &work[0] ); - - iptr = istart + nsupc; - for (i = 0; i < nrow; i++) { - irow = L_SUB(iptr); - rhs_work[irow] -= work[i]; - work[i] = 0.0; - iptr++; - } - } -#endif - } /* else ... */ - } /* for L-solve */ - -#ifdef DEBUG - printf("After L-solve: y=\n"); - sprint_soln(n, Bmat); -#endif - - /* - * Back solve Ux=y. - */ - for (k = Lstore->nsuper; k >= 0; k--) { - fsupc = L_FST_SUPC(k); - istart = L_SUB_START(fsupc); - nsupr = L_SUB_START(fsupc+1) - istart; - nsupc = L_FST_SUPC(k+1) - fsupc; - luptr = L_NZ_START(fsupc); - - solve_ops += nsupc * (nsupc + 1) * nrhs; - - if ( nsupc == 1 ) { - rhs_work = &Bmat[0]; - for (j = 0; j < nrhs; j++) { - rhs_work[fsupc] /= Lval[luptr]; - rhs_work += ldb; - } - } else { -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - ftcs1 = _cptofcd("L", strlen("L")); - ftcs2 = _cptofcd("U", strlen("U")); - ftcs3 = _cptofcd("N", strlen("N")); - STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha, - &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); -#else - strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha, - &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); -#endif -#else - for (j = 0; j < nrhs; j++) - susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] ); -#endif - } - - for (j = 0; j < nrhs; ++j) { - rhs_work = &Bmat[j*ldb]; - for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) { - solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); - for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){ - irow = U_SUB(i); - rhs_work[irow] -= rhs_work[jcol] * Uval[i]; - } - } - } - - } /* for U-solve */ - -#ifdef DEBUG - printf("After U-solve: x=\n"); - sprint_soln(n, Bmat); -#endif - - /* Compute the final solution X := Pc*X. */ - for (i = 0; i < nrhs; i++) { - rhs_work = &Bmat[i*ldb]; - for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]]; - for (k = 0; k < n; k++) rhs_work[k] = soln[k]; - } - - stat->ops[SOLVE] = solve_ops; - - } else { /* Solve A'*X=B or CONJ(A)*X=B */ - /* Permute right hand sides to form Pc'*B. */ - for (i = 0; i < nrhs; i++) { - rhs_work = &Bmat[i*ldb]; - for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k]; - for (k = 0; k < n; k++) rhs_work[k] = soln[k]; - } - - stat->ops[SOLVE] = 0; - for (k = 0; k < nrhs; ++k) { - - /* Multiply by inv(U'). */ - sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); - - /* Multiply by inv(L'). */ - sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); - - } - /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ - for (i = 0; i < nrhs; i++) { - rhs_work = &Bmat[i*ldb]; - for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]]; - for (k = 0; k < n; k++) rhs_work[k] = soln[k]; - } - - } - - SUPERLU_FREE(work); - SUPERLU_FREE(soln); -} - -/* - * Diagnostic print of the solution vector - */ -void -sprint_soln(int n, double *soln) -{ - int i; - - for (i = 0; i < n; i++) - printf("\t%d: %.4f\n", i, soln[i]); -} diff --git a/intern/opennl/superlu/smemory.c b/intern/opennl/superlu/smemory.c deleted file mode 100644 index c3b28a90e62..00000000000 --- a/intern/opennl/superlu/smemory.c +++ /dev/null @@ -1,683 +0,0 @@ -/** \file smemory.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#include "ssp_defs.h" - - -/* blender only: needed for int_ptr, no other BLI used here */ -#include "../../../source/blender/blenlib/BLI_sys_types.h" - -/* Constants */ -#define NO_MEMTYPE 4 /* 0: lusup; - 1: ucol; - 2: lsub; - 3: usub */ -#define GluIntArray(n) (5 * (n) + 5) - -/* Internal prototypes */ -void *sexpand (int *, MemType,int, int, GlobalLU_t *); -int sLUWorkInit (int, int, int, int **, double **, LU_space_t); -void copy_mem_double (int, void *, void *); -void sStackCompress (GlobalLU_t *); -void sSetupSpace (void *, int, LU_space_t *); -void *suser_malloc (int, int); -void suser_free (int, int); - -/* External prototypes (in memory.c - prec-indep) */ -extern void copy_mem_int (int, void *, void *); -extern void user_bcopy (char *, char *, int); - -/* Headers for 4 types of dynamatically managed memory */ -typedef struct e_node { - int size; /* length of the memory that has been used */ - void *mem; /* pointer to the new malloc'd store */ -} ExpHeader; - -typedef struct { - int size; - int used; - int top1; /* grow upward, relative to &array[0] */ - int top2; /* grow downward */ - void *array; -} LU_stack_t; - -/* Variables local to this file */ -static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */ -static LU_stack_t stack; -static int no_expand; - -/* Macros to manipulate stack */ -#define StackFull(x) ( x + stack.used >= stack.size ) -#define NotDoubleAlign(addr) ( (intptr_t)addr & 7 ) -#define DoubleAlign(addr) ( ((intptr_t)addr + 7) & ~7L ) -#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \ - (w + 1) * m * sizeof(double) ) -#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */ - - - - -/* - * Setup the memory model to be used for factorization. - * lwork = 0: use system malloc; - * lwork > 0: use user-supplied work[] space. - */ -void sSetupSpace(void *work, int lwork, LU_space_t *MemModel) -{ - if ( lwork == 0 ) { - *MemModel = SYSTEM; /* malloc/free */ - } else if ( lwork > 0 ) { - *MemModel = USER; /* user provided space */ - stack.used = 0; - stack.top1 = 0; - stack.top2 = (lwork/4)*4; /* must be word addressable */ - stack.size = stack.top2; - stack.array = (void *) work; - } -} - - - -void *suser_malloc(int bytes, int which_end) -{ - void *buf; - - if ( StackFull(bytes) ) return (NULL); - - if ( which_end == HEAD ) { - buf = (char*) stack.array + stack.top1; - stack.top1 += bytes; - } else { - stack.top2 -= bytes; - buf = (char*) stack.array + stack.top2; - } - - stack.used += bytes; - return buf; -} - - -void suser_free(int bytes, int which_end) -{ - if ( which_end == HEAD ) { - stack.top1 -= bytes; - } else { - stack.top2 += bytes; - } - stack.used -= bytes; -} - - - -/* - * mem_usage consists of the following fields: - * - for_lu (double) - * The amount of space used in bytes for the L\U data structures. - * - total_needed (double) - * The amount of space needed in bytes to perform factorization. - * - expansions (int) - * Number of memory expansions during the LU factorization. - */ -int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage) -{ - SCformat *Lstore; - NCformat *Ustore; - register int n, iword, dword, panel_size = sp_ienv(1); - - Lstore = L->Store; - Ustore = U->Store; - n = L->ncol; - iword = sizeof(int); - dword = sizeof(double); - - /* For LU factors */ - mem_usage->for_lu = (double)( (4*n + 3) * iword + Lstore->nzval_colptr[n] * - dword + Lstore->rowind_colptr[n] * iword ); - mem_usage->for_lu += (double)( (n + 1) * iword + - Ustore->colptr[n] * (dword + iword) ); - - /* Working storage to support factorization */ - mem_usage->total_needed = mem_usage->for_lu + - (double)( (2 * panel_size + 4 + NO_MARKER) * n * iword + - (panel_size + 1) * n * dword ); - - mem_usage->expansions = --no_expand; - - return 0; -} /* sQuerySpace */ - -/* - * Allocate storage for the data structures common to all factor routines. - * For those unpredictable size, make a guess as FILL * nnz(A). - * Return value: - * If lwork = -1, return the estimated amount of space required, plus n; - * otherwise, return the amount of space actually allocated when - * memory allocation failure occurred. - */ -int -sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz, - int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, - int **iwork, double **dwork) -{ - int info, iword, dword; - SCformat *Lstore; - NCformat *Ustore; - int *xsup, *supno; - int *lsub, *xlsub; - double *lusup; - int *xlusup; - double *ucol; - int *usub, *xusub; - int nzlmax, nzumax, nzlumax; - int FILL = sp_ienv(6); - - Glu->n = n; - no_expand = 0; - iword = sizeof(int); - dword = sizeof(double); - - if ( !expanders ) - expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader)); - if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders"); - - if ( fact != SamePattern_SameRowPerm ) { - /* Guess for L\U factors */ - nzumax = nzlumax = FILL * annz; - nzlmax = SUPERLU_MAX(1, FILL/4.) * annz; - - if ( lwork == -1 ) { - return ( GluIntArray(n) * iword + TempSpace(m, panel_size) - + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); - } else { - sSetupSpace(work, lwork, &Glu->MemModel); - } - -#ifdef DEBUG - printf("sLUMemInit() called: annz %d, MemModel %d\n", - annz, Glu->MemModel); -#endif - - /* Integer pointers for L\U factors */ - if ( Glu->MemModel == SYSTEM ) { - xsup = intMalloc(n+1); - supno = intMalloc(n+1); - xlsub = intMalloc(n+1); - xlusup = intMalloc(n+1); - xusub = intMalloc(n+1); - } else { - xsup = (int *)suser_malloc((n+1) * iword, HEAD); - supno = (int *)suser_malloc((n+1) * iword, HEAD); - xlsub = (int *)suser_malloc((n+1) * iword, HEAD); - xlusup = (int *)suser_malloc((n+1) * iword, HEAD); - xusub = (int *)suser_malloc((n+1) * iword, HEAD); - } - - lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu ); - ucol = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu ); - lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu ); - usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu ); - - while ( !lusup || !ucol || !lsub || !usub ) { - if ( Glu->MemModel == SYSTEM ) { - SUPERLU_FREE(lusup); - SUPERLU_FREE(ucol); - SUPERLU_FREE(lsub); - SUPERLU_FREE(usub); - } else { - suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD); - } - nzlumax /= 2; - nzumax /= 2; - nzlmax /= 2; - if ( nzlumax < annz ) { - printf("Not enough memory to perform factorization.\n"); - return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n); - } - lusup = (double *) sexpand( &nzlumax, LUSUP, 0, 0, Glu ); - ucol = (double *) sexpand( &nzumax, UCOL, 0, 0, Glu ); - lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu ); - usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu ); - } - - } else { - /* fact == SamePattern_SameRowPerm */ - Lstore = L->Store; - Ustore = U->Store; - xsup = Lstore->sup_to_col; - supno = Lstore->col_to_sup; - xlsub = Lstore->rowind_colptr; - xlusup = Lstore->nzval_colptr; - xusub = Ustore->colptr; - nzlmax = Glu->nzlmax; /* max from previous factorization */ - nzumax = Glu->nzumax; - nzlumax = Glu->nzlumax; - - if ( lwork == -1 ) { - return ( GluIntArray(n) * iword + TempSpace(m, panel_size) - + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n ); - } else if ( lwork == 0 ) { - Glu->MemModel = SYSTEM; - } else { - Glu->MemModel = USER; - stack.top2 = (lwork/4)*4; /* must be word-addressable */ - stack.size = stack.top2; - } - - lsub = expanders[LSUB].mem = Lstore->rowind; - lusup = expanders[LUSUP].mem = Lstore->nzval; - usub = expanders[USUB].mem = Ustore->rowind; - ucol = expanders[UCOL].mem = Ustore->nzval;; - expanders[LSUB].size = nzlmax; - expanders[LUSUP].size = nzlumax; - expanders[USUB].size = nzumax; - expanders[UCOL].size = nzumax; - } - - Glu->xsup = xsup; - Glu->supno = supno; - Glu->lsub = lsub; - Glu->xlsub = xlsub; - Glu->lusup = lusup; - Glu->xlusup = xlusup; - Glu->ucol = ucol; - Glu->usub = usub; - Glu->xusub = xusub; - Glu->nzlmax = nzlmax; - Glu->nzumax = nzumax; - Glu->nzlumax = nzlumax; - - info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel); - if ( info ) - return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n); - - ++no_expand; - return 0; - -} /* sLUMemInit */ - -/* Allocate known working storage. Returns 0 if success, otherwise - returns the number of bytes allocated so far when failure occurred. */ -int -sLUWorkInit(int m, int n, int panel_size, int **iworkptr, - double **dworkptr, LU_space_t MemModel) -{ - int isize, dsize, extra; - double *old_ptr; - int maxsuper = sp_ienv(3), - rowblk = sp_ienv(4); - - isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int); - dsize = (m * panel_size + - NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double); - - if ( MemModel == SYSTEM ) - *iworkptr = (int *) intCalloc(isize/sizeof(int)); - else - *iworkptr = (int *) suser_malloc(isize, TAIL); - if ( ! *iworkptr ) { - fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n"); - return (isize + n); - } - - if ( MemModel == SYSTEM ) - *dworkptr = (double *) SUPERLU_MALLOC(dsize); - else { - *dworkptr = (double *) suser_malloc(dsize, TAIL); - if ( NotDoubleAlign(*dworkptr) ) { - old_ptr = *dworkptr; - *dworkptr = (double*) DoubleAlign(*dworkptr); - *dworkptr = (double*) ((double*)*dworkptr - 1); - extra = (char*)old_ptr - (char*)*dworkptr; -#ifdef DEBUG - printf("sLUWorkInit: not aligned, extra %d\n", extra); -#endif - stack.top2 -= extra; - stack.used += extra; - } - } - if ( ! *dworkptr ) { - fprintf(stderr, "malloc fails for local dworkptr[]."); - return (isize + dsize + n); - } - - return 0; -} - - -/* - * Set up pointers for real working arrays. - */ -void -sSetRWork(int m, int panel_size, double *dworkptr, - double **dense, double **tempv) -{ - double zero = 0.0; - - int maxsuper = sp_ienv(3), - rowblk = sp_ienv(4); - *dense = dworkptr; - *tempv = *dense + panel_size*m; - sfill (*dense, m * panel_size, zero); - sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero); -} - -/* - * Free the working storage used by factor routines. - */ -void sLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu) -{ - if ( Glu->MemModel == SYSTEM ) { - SUPERLU_FREE (iwork); - SUPERLU_FREE (dwork); - } else { - stack.used -= (stack.size - stack.top2); - stack.top2 = stack.size; -/* sStackCompress(Glu); */ - } - - SUPERLU_FREE (expanders); - expanders = 0; -} - -/* Expand the data structures for L and U during the factorization. - * Return value: 0 - successful return - * > 0 - number of bytes allocated when run out of space - */ -int -sLUMemXpand(int jcol, - int next, /* number of elements currently in the factors */ - MemType mem_type, /* which type of memory to expand */ - int *maxlen, /* modified - maximum length of a data structure */ - GlobalLU_t *Glu /* modified - global LU data structures */ - ) -{ - void *new_mem; - -#ifdef DEBUG - printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n", - jcol, next, *maxlen, mem_type); -#endif - - if (mem_type == USUB) - new_mem = sexpand(maxlen, mem_type, next, 1, Glu); - else - new_mem = sexpand(maxlen, mem_type, next, 0, Glu); - - if ( !new_mem ) { - int nzlmax = Glu->nzlmax; - int nzumax = Glu->nzumax; - int nzlumax = Glu->nzlumax; - fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol); - return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n); - } - - switch ( mem_type ) { - case LUSUP: - Glu->lusup = (double *) new_mem; - Glu->nzlumax = *maxlen; - break; - case UCOL: - Glu->ucol = (double *) new_mem; - Glu->nzumax = *maxlen; - break; - case LSUB: - Glu->lsub = (int *) new_mem; - Glu->nzlmax = *maxlen; - break; - case USUB: - Glu->usub = (int *) new_mem; - Glu->nzumax = *maxlen; - break; - } - - return 0; - -} - - - -void -copy_mem_double(int howmany, void *old, void *new) -{ - register int i; - double *dold = old; - double *dnew = new; - for (i = 0; i < howmany; i++) dnew[i] = dold[i]; -} - -/* - * Expand the existing storage to accommodate more fill-ins. - */ -void -*sexpand ( - int *prev_len, /* length used from previous call */ - MemType type, /* which part of the memory to expand */ - int len_to_copy, /* size of the memory to be copied to new store */ - int keep_prev, /* = 1: use prev_len; - = 0: compute new_len to expand */ - GlobalLU_t *Glu /* modified - global LU data structures */ - ) -{ - double EXPAND = 1.5; - double alpha; - void *new_mem, *old_mem; - int new_len, tries, lword, extra, bytes_to_copy; - - alpha = EXPAND; - - if ( no_expand == 0 || keep_prev ) /* First time allocate requested */ - new_len = *prev_len; - else { - new_len = alpha * *prev_len; - } - - if ( type == LSUB || type == USUB ) lword = sizeof(int); - else lword = sizeof(double); - - if ( Glu->MemModel == SYSTEM ) { - new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * (size_t)lword); -/* new_mem = (void *) calloc(new_len, lword); */ - if ( no_expand != 0 ) { - tries = 0; - if ( keep_prev ) { - if ( !new_mem ) return (NULL); - } else { - while ( !new_mem ) { - if ( ++tries > 10 ) return (NULL); - alpha = Reduce(alpha); - new_len = alpha * *prev_len; - new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * (size_t)lword); -/* new_mem = (void *) calloc(new_len, lword); */ - } - } - if ( type == LSUB || type == USUB ) { - copy_mem_int(len_to_copy, expanders[type].mem, new_mem); - } else { - copy_mem_double(len_to_copy, expanders[type].mem, new_mem); - } - SUPERLU_FREE (expanders[type].mem); - } - expanders[type].mem = (void *) new_mem; - - } else { /* MemModel == USER */ - if ( no_expand == 0 ) { - new_mem = suser_malloc((size_t)new_len * (size_t)lword, HEAD); - if ( NotDoubleAlign(new_mem) && - (type == LUSUP || type == UCOL) ) { - old_mem = new_mem; - new_mem = (void *)DoubleAlign(new_mem); - extra = (char*)new_mem - (char*)old_mem; -#ifdef DEBUG - printf("expand(): not aligned, extra %d\n", extra); -#endif - stack.top1 += extra; - stack.used += extra; - } - expanders[type].mem = (void *) new_mem; - } - else { - tries = 0; - extra = (new_len - *prev_len) * lword; - if ( keep_prev ) { - if ( StackFull(extra) ) return (NULL); - } else { - while ( StackFull(extra) ) { - if ( ++tries > 10 ) return (NULL); - alpha = Reduce(alpha); - new_len = alpha * *prev_len; - extra = (new_len - *prev_len) * lword; - } - } - - if ( type != USUB ) { - new_mem = (void*)((char*)expanders[type + 1].mem + extra); - bytes_to_copy = (char*)stack.array + stack.top1 - - (char*)expanders[type + 1].mem; - user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy); - - if ( type < USUB ) { - Glu->usub = expanders[USUB].mem = - (void*)((char*)expanders[USUB].mem + extra); - } - if ( type < LSUB ) { - Glu->lsub = expanders[LSUB].mem = - (void*)((char*)expanders[LSUB].mem + extra); - } - if ( type < UCOL ) { - Glu->ucol = expanders[UCOL].mem = - (void*)((char*)expanders[UCOL].mem + extra); - } - stack.top1 += extra; - stack.used += extra; - if ( type == UCOL ) { - stack.top1 += extra; /* Add same amount for USUB */ - stack.used += extra; - } - - } /* if ... */ - - } /* else ... */ - } - - expanders[type].size = new_len; - *prev_len = new_len; - if ( no_expand ) ++no_expand; - - return (void *) expanders[type].mem; - -} /* sexpand */ - - -/* - * Compress the work[] array to remove fragmentation. - */ -void -sStackCompress(GlobalLU_t *Glu) -{ - register int iword, dword, ndim; - char *last, *fragment; - int *ifrom, *ito; - double *dfrom, *dto; - int *xlsub, *lsub, *xusub, *usub, *xlusup; - double *ucol, *lusup; - - iword = sizeof(int); - dword = sizeof(double); - ndim = Glu->n; - - xlsub = Glu->xlsub; - lsub = Glu->lsub; - xusub = Glu->xusub; - usub = Glu->usub; - xlusup = Glu->xlusup; - ucol = Glu->ucol; - lusup = Glu->lusup; - - dfrom = ucol; - dto = (double *)((char*)lusup + xlusup[ndim] * dword); - copy_mem_double(xusub[ndim], dfrom, dto); - ucol = dto; - - ifrom = lsub; - ito = (int *) ((char*)ucol + xusub[ndim] * iword); - copy_mem_int(xlsub[ndim], ifrom, ito); - lsub = ito; - - ifrom = usub; - ito = (int *) ((char*)lsub + xlsub[ndim] * iword); - copy_mem_int(xusub[ndim], ifrom, ito); - usub = ito; - - last = (char*)usub + xusub[ndim] * iword; - fragment = (char*) (((char*)stack.array + stack.top1) - last); - stack.used -= (intptr_t) fragment; - stack.top1 -= (intptr_t) fragment; - - Glu->ucol = ucol; - Glu->lsub = lsub; - Glu->usub = usub; - -#ifdef DEBUG - printf("sStackCompress: fragment %d\n", (int)*fragment); - /* for (last = 0; last < ndim; ++last) - print_lu_col("After compress:", last, 0);*/ -#endif - -} - -/* - * Allocate storage for original matrix A - */ -void -sallocateA(int n, int nnz, double **a, int **asub, int **xa) -{ - *a = (double *) doubleMalloc(nnz); - *asub = (int *) intMalloc(nnz); - *xa = (int *) intMalloc(n+1); -} - - -double *doubleMalloc(int n) -{ - double *buf; - buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); - if ( !buf ) { - ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n"); - } - return (buf); -} - -double *doubleCalloc(int n) -{ - double *buf; - register int i; - double zero = 0.0; - buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); - if ( !buf ) { - ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n"); - } - for (i = 0; i < n; ++i) buf[i] = zero; - return (buf); -} - - -int smemory_usage(const int nzlmax, const int nzumax, - const int nzlumax, const int n) -{ - register int iword, dword; - - iword = sizeof(int); - dword = sizeof(double); - - return (10 * n * iword + - nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword); - -} diff --git a/intern/opennl/superlu/smyblas2.c b/intern/opennl/superlu/smyblas2.c deleted file mode 100644 index 11e3b4b4761..00000000000 --- a/intern/opennl/superlu/smyblas2.c +++ /dev/null @@ -1,235 +0,0 @@ -/** \file opennl/superlu/smyblas2.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - * File name: smyblas2.c - * Purpose: - * Level 2 BLAS operations: solves and matvec, written in C. - * Note: - * This is only used when the system lacks an efficient BLAS library. - */ - -/* - * Solves a dense UNIT lower triangular system. The unit lower - * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). - * The solution will be returned in the rhs vector. - */ - -/* local prototypes*/ -void slsolve ( int, int, double *, double *); -void susolve ( int, int, double *, double *); -void smatvec ( int, int, int, double *, double *, double *); - - -void slsolve ( int ldm, int ncol, double *M, double *rhs ) -{ - int k; - double x0, x1, x2, x3, x4, x5, x6, x7; - double *M0; - register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; - register int firstcol = 0; - - M0 = &M[0]; - - while ( firstcol < ncol - 7 ) { /* Do 8 columns */ - Mki0 = M0 + 1; - Mki1 = Mki0 + ldm + 1; - Mki2 = Mki1 + ldm + 1; - Mki3 = Mki2 + ldm + 1; - Mki4 = Mki3 + ldm + 1; - Mki5 = Mki4 + ldm + 1; - Mki6 = Mki5 + ldm + 1; - Mki7 = Mki6 + ldm + 1; - - x0 = rhs[firstcol]; - x1 = rhs[firstcol+1] - x0 * *Mki0++; - x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; - x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; - x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - - x3 * *Mki3++; - x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - - x3 * *Mki3++ - x4 * *Mki4++; - x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++; - x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++ - - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++ - - x6 * *Mki6++; - - rhs[++firstcol] = x1; - rhs[++firstcol] = x2; - rhs[++firstcol] = x3; - rhs[++firstcol] = x4; - rhs[++firstcol] = x5; - rhs[++firstcol] = x6; - rhs[++firstcol] = x7; - ++firstcol; - - for (k = firstcol; k < ncol; k++) - rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ - - x2 * *Mki2++ - x3 * *Mki3++ - - x4 * *Mki4++ - x5 * *Mki5++ - - x6 * *Mki6++ - x7 * *Mki7++; - - M0 += 8 * ldm + 8; - } - - while ( firstcol < ncol - 3 ) { /* Do 4 columns */ - Mki0 = M0 + 1; - Mki1 = Mki0 + ldm + 1; - Mki2 = Mki1 + ldm + 1; - Mki3 = Mki2 + ldm + 1; - - x0 = rhs[firstcol]; - x1 = rhs[firstcol+1] - x0 * *Mki0++; - x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++; - x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++; - - rhs[++firstcol] = x1; - rhs[++firstcol] = x2; - rhs[++firstcol] = x3; - ++firstcol; - - for (k = firstcol; k < ncol; k++) - rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++ - - x2 * *Mki2++ - x3 * *Mki3++; - - M0 += 4 * ldm + 4; - } - - if ( firstcol < ncol - 1 ) { /* Do 2 columns */ - Mki0 = M0 + 1; - Mki1 = Mki0 + ldm + 1; - - x0 = rhs[firstcol]; - x1 = rhs[firstcol+1] - x0 * *Mki0++; - - rhs[++firstcol] = x1; - ++firstcol; - - for (k = firstcol; k < ncol; k++) - rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++; - - } - -} - -/* - * Solves a dense upper triangular system. The upper triangular matrix is - * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned - * in the rhs vector. - */ -void -susolve ( ldm, ncol, M, rhs ) -int ldm; /* in */ -int ncol; /* in */ -double *M; /* in */ -double *rhs; /* modified */ -{ - double xj; - int jcol, j, irow; - - jcol = ncol - 1; - - for (j = 0; j < ncol; j++) { - - xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */ - rhs[jcol] = xj; - - for (irow = 0; irow < jcol; irow++) - rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */ - - jcol--; - - } -} - - -/* - * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec. - * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[]. - */ -void smatvec ( ldm, nrow, ncol, M, vec, Mxvec ) - -int ldm; /* in -- leading dimension of M */ -int nrow; /* in */ -int ncol; /* in */ -double *M; /* in */ -double *vec; /* in */ -double *Mxvec; /* in/out */ - -{ - double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7; - double *M0; - register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7; - register int firstcol = 0; - int k; - - M0 = &M[0]; - while ( firstcol < ncol - 7 ) { /* Do 8 columns */ - - Mki0 = M0; - Mki1 = Mki0 + ldm; - Mki2 = Mki1 + ldm; - Mki3 = Mki2 + ldm; - Mki4 = Mki3 + ldm; - Mki5 = Mki4 + ldm; - Mki6 = Mki5 + ldm; - Mki7 = Mki6 + ldm; - - vi0 = vec[firstcol++]; - vi1 = vec[firstcol++]; - vi2 = vec[firstcol++]; - vi3 = vec[firstcol++]; - vi4 = vec[firstcol++]; - vi5 = vec[firstcol++]; - vi6 = vec[firstcol++]; - vi7 = vec[firstcol++]; - - for (k = 0; k < nrow; k++) - Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ - + vi2 * *Mki2++ + vi3 * *Mki3++ - + vi4 * *Mki4++ + vi5 * *Mki5++ - + vi6 * *Mki6++ + vi7 * *Mki7++; - - M0 += 8 * ldm; - } - - while ( firstcol < ncol - 3 ) { /* Do 4 columns */ - - Mki0 = M0; - Mki1 = Mki0 + ldm; - Mki2 = Mki1 + ldm; - Mki3 = Mki2 + ldm; - - vi0 = vec[firstcol++]; - vi1 = vec[firstcol++]; - vi2 = vec[firstcol++]; - vi3 = vec[firstcol++]; - for (k = 0; k < nrow; k++) - Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++ - + vi2 * *Mki2++ + vi3 * *Mki3++ ; - - M0 += 4 * ldm; - } - - while ( firstcol < ncol ) { /* Do 1 column */ - - Mki0 = M0; - vi0 = vec[firstcol++]; - for (k = 0; k < nrow; k++) - Mxvec[k] += vi0 * *Mki0++; - - M0 += ldm; - } - -} - diff --git a/intern/opennl/superlu/sp_coletree.c b/intern/opennl/superlu/sp_coletree.c deleted file mode 100644 index 7e7187ae8b0..00000000000 --- a/intern/opennl/superlu/sp_coletree.c +++ /dev/null @@ -1,335 +0,0 @@ -/** \file opennl/superlu/sp_coletree.c - * \ingroup opennl - */ - -/* Elimination tree computation and layout routines */ - -#include <stdio.h> -#include <stdlib.h> -#include "ssp_defs.h" - -/* - * Implementation of disjoint set union routines. - * Elements are integers in 0..n-1, and the - * names of the sets themselves are of type int. - * - * Calls are: - * initialize_disjoint_sets (n) initial call. - * s = make_set (i) returns a set containing only i. - * s = link (t, u) returns s = t union u, destroying t and u. - * s = find (i) return name of set containing i. - * finalize_disjoint_sets final call. - * - * This implementation uses path compression but not weighted union. - * See Tarjan's book for details. - * John Gilbert, CMI, 1987. - * - * Implemented path-halving by XSL 07/05/95. - */ - -static int *pp; /* parent array for sets */ - -static -int *mxCallocInt(int n) -{ - register int i; - int *buf; - - buf = (int *) SUPERLU_MALLOC( n * sizeof(int) ); - if ( !buf ) { - ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()"); - } - for (i = 0; i < n; i++) buf[i] = 0; - return (buf); -} - -static -void initialize_disjoint_sets ( - int n - ) -{ - pp = mxCallocInt(n); -} - - -static -int make_set ( - int i - ) -{ - pp[i] = i; - return i; -} - - -static -int link ( - int s, - int t - ) -{ - pp[s] = t; - return t; -} - - -/* PATH HALVING */ -static -int find (int i) -{ - register int p, gp; - - p = pp[i]; - gp = pp[p]; - while (gp != p) { - pp[i] = gp; - i = gp; - p = pp[i]; - gp = pp[p]; - } - return (p); -} - -#if 0 -/* PATH COMPRESSION */ -static -int find ( - int i - ) -{ - if (pp[i] != i) - pp[i] = find (pp[i]); - return pp[i]; -} -#endif - -static -void finalize_disjoint_sets ( - void - ) -{ - SUPERLU_FREE(pp); -} - - -/* - * Find the elimination tree for A'*A. - * This uses something similar to Liu's algorithm. - * It runs in time O(nz(A)*log n) and does not form A'*A. - * - * Input: - * Sparse matrix A. Numeric values are ignored, so any - * explicit zeros are treated as nonzero. - * Output: - * Integer array of parents representing the elimination - * tree of the symbolic product A'*A. Each vertex is a - * column of A, and nc means a root of the elimination forest. - * - * John R. Gilbert, Xerox, 10 Dec 1990 - * Based on code by JRG dated 1987, 1988, and 1990. - */ - -/* - * Nonsymmetric elimination tree - */ -int -sp_coletree( - int *acolst, int *acolend, /* column start and end past 1 */ - int *arow, /* row indices of A */ - int nr, int nc, /* dimension of A */ - int *parent /* parent in elim tree */ - ) -{ - int *root; /* root of subtee of etree */ - int *firstcol; /* first nonzero col in each row*/ - int rset, cset; - int row, col; - int rroot; - int p; - - root = mxCallocInt (nc); - initialize_disjoint_sets (nc); - - /* Compute firstcol[row] = first nonzero column in row */ - - firstcol = mxCallocInt (nr); - for (row = 0; row < nr; firstcol[row++] = nc); - for (col = 0; col < nc; col++) - for (p = acolst[col]; p < acolend[col]; p++) { - row = arow[p]; - firstcol[row] = SUPERLU_MIN(firstcol[row], col); - } - - /* Compute etree by Liu's algorithm for symmetric matrices, - except use (firstcol[r],c) in place of an edge (r,c) of A. - Thus each row clique in A'*A is replaced by a star - centered at its first vertex, which has the same fill. */ - - for (col = 0; col < nc; col++) { - cset = make_set (col); - root[cset] = col; - parent[col] = nc; /* Matlab */ - for (p = acolst[col]; p < acolend[col]; p++) { - row = firstcol[arow[p]]; - if (row >= col) continue; - rset = find (row); - rroot = root[rset]; - if (rroot != col) { - parent[rroot] = col; - cset = link (cset, rset); - root[cset] = col; - } - } - } - - SUPERLU_FREE (root); - SUPERLU_FREE (firstcol); - finalize_disjoint_sets (); - return 0; -} - -/* - * q = TreePostorder (n, p); - * - * Postorder a tree. - * Input: - * p is a vector of parent pointers for a forest whose - * vertices are the integers 0 to n-1; p[root]==n. - * Output: - * q is a vector indexed by 0..n-1 such that q[i] is the - * i-th vertex in a postorder numbering of the tree. - * - * ( 2/7/95 modified by X.Li: - * q is a vector indexed by 0:n-1 such that vertex i is the - * q[i]-th vertex in a postorder numbering of the tree. - * That is, this is the inverse of the previous q. ) - * - * In the child structure, lower-numbered children are represented - * first, so that a tree which is already numbered in postorder - * will not have its order changed. - * - * Written by John Gilbert, Xerox, 10 Dec 1990. - * Based on code written by John Gilbert at CMI in 1987. - */ - -static int *first_kid, *next_kid; /* Linked list of children. */ -static int *post, postnum; - -static -/* - * Depth-first search from vertex v. - */ -void etdfs ( - int v - ) -{ - int w; - - for (w = first_kid[v]; w != -1; w = next_kid[w]) { - etdfs (w); - } - /* post[postnum++] = v; in Matlab */ - post[v] = postnum++; /* Modified by X.Li on 2/14/95 */ -} - - -/* - * Post order a tree - */ -int *TreePostorder( - int n, - int *parent -) -{ - int v, dad; - - /* Allocate storage for working arrays and results */ - first_kid = mxCallocInt (n+1); - next_kid = mxCallocInt (n+1); - post = mxCallocInt (n+1); - - /* Set up structure describing children */ - for (v = 0; v <= n; first_kid[v++] = -1); - for (v = n-1; v >= 0; v--) { - dad = parent[v]; - next_kid[v] = first_kid[dad]; - first_kid[dad] = v; - } - - /* Depth-first search from dummy root vertex #n */ - postnum = 0; - etdfs (n); - - SUPERLU_FREE (first_kid); - SUPERLU_FREE (next_kid); - return post; -} - - -/* - * p = spsymetree (A); - * - * Find the elimination tree for symmetric matrix A. - * This uses Liu's algorithm, and runs in time O(nz*log n). - * - * Input: - * Square sparse matrix A. No check is made for symmetry; - * elements below and on the diagonal are ignored. - * Numeric values are ignored, so any explicit zeros are - * treated as nonzero. - * Output: - * Integer array of parents representing the etree, with n - * meaning a root of the elimination forest. - * Note: - * This routine uses only the upper triangle, while sparse - * Cholesky (as in spchol.c) uses only the lower. Matlab's - * dense Cholesky uses only the upper. This routine could - * be modified to use the lower triangle either by transposing - * the matrix or by traversing it by rows with auxiliary - * pointer and link arrays. - * - * John R. Gilbert, Xerox, 10 Dec 1990 - * Based on code by JRG dated 1987, 1988, and 1990. - * Modified by X.S. Li, November 1999. - */ - -/* - * Symmetric elimination tree - */ -int -sp_symetree( - int *acolst, int *acolend, /* column starts and ends past 1 */ - int *arow, /* row indices of A */ - int n, /* dimension of A */ - int *parent /* parent in elim tree */ - ) -{ - int *root; /* root of subtree of etree */ - int rset, cset; - int row, col; - int rroot; - int p; - - root = mxCallocInt (n); - initialize_disjoint_sets (n); - - for (col = 0; col < n; col++) { - cset = make_set (col); - root[cset] = col; - parent[col] = n; /* Matlab */ - for (p = acolst[col]; p < acolend[col]; p++) { - row = arow[p]; - if (row >= col) continue; - rset = find (row); - rroot = root[rset]; - if (rroot != col) { - parent[rroot] = col; - cset = link (cset, rset); - root[cset] = col; - } - } - } - SUPERLU_FREE (root); - finalize_disjoint_sets (); - return 0; -} /* SP_SYMETREE */ diff --git a/intern/opennl/superlu/sp_ienv.c b/intern/opennl/superlu/sp_ienv.c deleted file mode 100644 index e0f9693ae71..00000000000 --- a/intern/opennl/superlu/sp_ienv.c +++ /dev/null @@ -1,68 +0,0 @@ -/** \file opennl/superlu/sp_ienv.c - * \ingroup opennl - */ -/* - * File name: sp_ienv.c - * History: Modified from lapack routine ILAENV - */ - -#include "ssp_defs.h" -#include "util.h" - -int -sp_ienv(int ispec) -{ -/* - Purpose - ======= - - sp_ienv() is inquired to choose machine-dependent parameters for the - local environment. See ISPEC for a description of the parameters. - - This version provides a set of parameters which should give good, - but not optimal, performance on many of the currently available - computers. Users are encouraged to modify this subroutine to set - the tuning parameters for their particular machine using the option - and problem size information in the arguments. - - Arguments - ========= - - ISPEC (input) int - Specifies the parameter to be returned as the value of SP_IENV. - = 1: the panel size w; a panel consists of w consecutive - columns of matrix A in the process of Gaussian elimination. - The best value depends on machine's cache characters. - = 2: the relaxation parameter relax; if the number of - nodes (columns) in a subtree of the elimination tree is less - than relax, this subtree is considered as one supernode, - regardless of their row structures. - = 3: the maximum size for a supernode; - = 4: the minimum row dimension for 2-D blocking to be used; - = 5: the minimum column dimension for 2-D blocking to be used; - = 6: the estimated fills factor for L and U, compared with A; - - (SP_IENV) (output) int - >= 0: the value of the parameter specified by ISPEC - < 0: if SP_IENV = -k, the k-th argument had an illegal value. - - ===================================================================== -*/ - int i; - - switch (ispec) { - case 1: return (10); - case 2: return (5); - case 3: return (100); - case 4: return (200); - case 5: return (40); - case 6: return (20); - } - - /* Invalid value for ISPEC */ - i = 1; - xerbla_("sp_ienv", &i); - return 0; - -} /* sp_ienv_ */ - diff --git a/intern/opennl/superlu/sp_preorder.c b/intern/opennl/superlu/sp_preorder.c deleted file mode 100644 index 9504669726e..00000000000 --- a/intern/opennl/superlu/sp_preorder.c +++ /dev/null @@ -1,209 +0,0 @@ -/** \file opennl/superlu/sp_preorder.c - * \ingroup opennl - */ -#include "ssp_defs.h" - -int check_perm(char *, int , int *); - - -void -sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c, - int *etree, SuperMatrix *AC) -{ -/* - * Purpose - * ======= - * - * sp_preorder() permutes the columns of the original matrix. It performs - * the following steps: - * - * 1. Apply column permutation perm_c[] to A's column pointers to form AC; - * - * 2. If options->Fact = DOFACT, then - * (1) Compute column elimination tree etree[] of AC'AC; - * (2) Post order etree[] to get a postordered elimination tree etree[], - * and a postorder permutation post[]; - * (3) Apply post[] permutation to columns of AC; - * (4) Overwrite perm_c[] with the product perm_c * post. - * - * Arguments - * ========= - * - * options (input) superlu_options_t* - * Specifies whether or not the elimination tree will be re-used. - * If options->Fact == DOFACT, this means first time factor A, - * etree is computed, postered, and output. - * Otherwise, re-factor A, etree is input, unchanged on exit. - * - * A (input) SuperMatrix* - * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number - * of the linear equations is A->nrow. Currently, the type of A can be: - * Stype = NC or SLU_NCP; Mtype = SLU_GE. - * In the future, more general A may be handled. - * - * perm_c (input/output) int* - * Column permutation vector of size A->ncol, which defines the - * permutation matrix Pc; perm_c[i] = j means column i of A is - * in position j in A*Pc. - * If options->Fact == DOFACT, perm_c is both input and output. - * On output, it is changed according to a postorder of etree. - * Otherwise, perm_c is input. - * - * etree (input/output) int* - * Elimination tree of Pc'*A'*A*Pc, dimension A->ncol. - * If options->Fact == DOFACT, etree is an output argument, - * otherwise it is an input argument. - * Note: etree is a vector of parent pointers for a forest whose - * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. - * - * AC (output) SuperMatrix* - * The resulting matrix after applied the column permutation - * perm_c[] to matrix A. The type of AC can be: - * Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE. - * - */ - - NCformat *Astore; - NCPformat *ACstore; - int *iwork, *post; - register int n, i; - - n = A->ncol; - - /* Apply column permutation perm_c to A's column pointers so to - obtain NCP format in AC = A*Pc. */ - AC->Stype = SLU_NCP; - AC->Dtype = A->Dtype; - AC->Mtype = A->Mtype; - AC->nrow = A->nrow; - AC->ncol = A->ncol; - Astore = A->Store; - ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) ); - if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore"); - ACstore->nnz = Astore->nnz; - ACstore->nzval = Astore->nzval; - ACstore->rowind = Astore->rowind; - ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int)); - if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for ACstore->colbeg"); - ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int)); - if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for ACstore->colend"); - -#ifdef DEBUG - print_int_vec("pre_order:", n, perm_c); - check_perm("Initial perm_c", n, perm_c); -#endif - - for (i = 0; i < n; i++) { - ACstore->colbeg[perm_c[i]] = Astore->colptr[i]; - ACstore->colend[perm_c[i]] = Astore->colptr[i+1]; - } - - if ( options->Fact == DOFACT ) { -#undef ETREE_ATplusA -#ifdef ETREE_ATplusA - /*-------------------------------------------- - COMPUTE THE ETREE OF Pc*(A'+A)*Pc'. - --------------------------------------------*/ - int *b_colptr, *b_rowind, bnz, j; - int *c_colbeg, *c_colend; - - /*printf("Use etree(A'+A)\n");*/ - - /* Form B = A + A'. */ - at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind, - &bnz, &b_colptr, &b_rowind); - - /* Form C = Pc*B*Pc'. */ - c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int)); - c_colend = c_colbeg + n; - if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend"); - for (i = 0; i < n; i++) { - c_colbeg[perm_c[i]] = b_colptr[i]; - c_colend[perm_c[i]] = b_colptr[i+1]; - } - for (j = 0; j < n; ++j) { - for (i = c_colbeg[j]; i < c_colend[j]; ++i) { - b_rowind[i] = perm_c[b_rowind[i]]; - } - } - - /* Compute etree of C. */ - sp_symetree(c_colbeg, c_colend, b_rowind, n, etree); - - SUPERLU_FREE(b_colptr); - if ( bnz ) SUPERLU_FREE(b_rowind); - SUPERLU_FREE(c_colbeg); - -#else - /*-------------------------------------------- - COMPUTE THE COLUMN ELIMINATION TREE. - --------------------------------------------*/ - sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind, - A->nrow, A->ncol, etree); -#endif -#ifdef DEBUG - print_int_vec("etree:", n, etree); -#endif - - /* In symmetric mode, do not do postorder here. */ - if ( options->SymmetricMode == NO ) { - /* Post order etree */ - post = (int *) TreePostorder(n, etree); - /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i; - iwork = post; */ - -#ifdef DEBUG - print_int_vec("post:", n+1, post); - check_perm("post", n, post); -#endif - iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int)); - if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]"); - - /* Renumber etree in postorder */ - for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]]; - for (i = 0; i < n; ++i) etree[i] = iwork[i]; - -#ifdef DEBUG - print_int_vec("postorder etree:", n, etree); -#endif - - /* Postmultiply A*Pc by post[] */ - for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i]; - for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i]; - for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i]; - for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i]; - - for (i = 0; i < n; ++i) - iwork[i] = post[perm_c[i]]; /* product of perm_c and post */ - for (i = 0; i < n; ++i) perm_c[i] = iwork[i]; - -#ifdef DEBUG - print_int_vec("Pc*post:", n, perm_c); - check_perm("final perm_c", n, perm_c); -#endif - SUPERLU_FREE (post); - SUPERLU_FREE (iwork); - } /* end postordering */ - - } /* if options->Fact == DOFACT ... */ - -} - -int check_perm(char *what, int n, int *perm) -{ - register int i; - int *marker; - marker = (int *) calloc(n, sizeof(int)); - - for (i = 0; i < n; ++i) { - if ( marker[perm[i]] == 1 || perm[i] >= n ) { - printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]); - ABORT("check_perm"); - } else { - marker[perm[i]] = 1; - } - } - - SUPERLU_FREE(marker); - return 0; -} diff --git a/intern/opennl/superlu/spanel_bmod.c b/intern/opennl/superlu/spanel_bmod.c deleted file mode 100644 index 5f150e640fd..00000000000 --- a/intern/opennl/superlu/spanel_bmod.c +++ /dev/null @@ -1,452 +0,0 @@ -/** \file opennl/superlu/spanel_bmod.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include "ssp_defs.h" - -/* - * Function prototypes - */ -void slsolve(int, int, double *, double *); -void smatvec(int, int, int, double *, double *, double *); -extern void scheck_tempv(); - -void -spanel_bmod ( - const int m, /* in - number of rows in the matrix */ - const int w, /* in */ - const int jcol, /* in */ - const int nseg, /* in */ - double *dense, /* out, of size n by w */ - double *tempv, /* working array */ - int *segrep, /* in */ - int *repfnz, /* in, of size n by w */ - GlobalLU_t *Glu, /* modified */ - SuperLUStat_t *stat /* output */ - ) -{ -/* - * Purpose - * ======= - * - * Performs numeric block updates (sup-panel) in topological order. - * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. - * Special processing on the supernodal portion of L\U[*,j] - * - * Before entering this routine, the original nonzeros in the panel - * were already copied into the spa[m,w]. - * - * Updated/Output parameters- - * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned - * collectively in the m-by-w vector dense[*]. - * - */ - -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - _fcd ftcs1 = _cptofcd("L", strlen("L")), - ftcs2 = _cptofcd("N", strlen("N")), - ftcs3 = _cptofcd("U", strlen("U")); -#endif - int incx = 1, incy = 1; - double alpha, beta; -#endif - - register int k, ksub; - int fsupc, nsupc, nsupr, nrow; - int krep, krep_ind; - double ukj, ukj1, ukj2; - int luptr, luptr1, luptr2; - int segsze; - int block_nrow; /* no of rows in a block row */ - register int lptr; /* Points to the row subscripts of a supernode */ - int kfnz, irow, no_zeros; - register int isub, isub1, i; - register int jj; /* Index through each column in the panel */ - int *xsup, *supno; - int *lsub, *xlsub; - double *lusup; - int *xlusup; - int *repfnz_col; /* repfnz[] for a column in the panel */ - double *dense_col; /* dense[] for a column in the panel */ - double *tempv1; /* Used in 1-D update */ - double *TriTmp, *MatvecTmp; /* used in 2-D update */ - double zero = 0.0; - register int ldaTmp; - register int r_ind, r_hi; - static int first = 1, maxsuper, rowblk, colblk; - flops_t *ops = stat->ops; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - lusup = Glu->lusup; - xlusup = Glu->xlusup; - - if ( first ) { - maxsuper = sp_ienv(3); - rowblk = sp_ienv(4); - colblk = sp_ienv(5); - first = 0; - } - ldaTmp = maxsuper + rowblk; - - /* - * For each nonz supernode segment of U[*,j] in topological order - */ - k = nseg - 1; - for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */ - - /* krep = representative of current k-th supernode - * fsupc = first supernodal column - * nsupc = no of columns in a supernode - * nsupr = no of rows in a supernode - */ - krep = segrep[k--]; - fsupc = xsup[supno[krep]]; - nsupc = krep - fsupc + 1; - nsupr = xlsub[fsupc+1] - xlsub[fsupc]; - nrow = nsupr - nsupc; - lptr = xlsub[fsupc]; - krep_ind = lptr + nsupc - 1; - - repfnz_col = repfnz; - dense_col = dense; - - if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */ - - TriTmp = tempv; - - /* Sequence through each column in panel -- triangular solves */ - for (jj = jcol; jj < jcol + w; jj++, - repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) { - - kfnz = repfnz_col[krep]; - if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ - - segsze = krep - kfnz + 1; - luptr = xlusup[fsupc]; - - ops[TRSV] += segsze * (segsze - 1); - ops[GEMV] += 2 * nrow * segsze; - - /* Case 1: Update U-segment of size 1 -- col-col update */ - if ( segsze == 1 ) { - ukj = dense_col[lsub[krep_ind]]; - luptr += nsupr*(nsupc-1) + nsupc; - - for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) { - irow = lsub[i]; - dense_col[irow] -= ukj * lusup[luptr]; - ++luptr; - } - - } else if ( segsze <= 3 ) { - ukj = dense_col[lsub[krep_ind]]; - ukj1 = dense_col[lsub[krep_ind - 1]]; - luptr += nsupr*(nsupc-1) + nsupc-1; - luptr1 = luptr - nsupr; - - if ( segsze == 2 ) { - ukj -= ukj1 * lusup[luptr1]; - dense_col[lsub[krep_ind]] = ukj; - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - luptr++; luptr1++; - dense_col[irow] -= (ukj*lusup[luptr] - + ukj1*lusup[luptr1]); - } - } else { - ukj2 = dense_col[lsub[krep_ind - 2]]; - luptr2 = luptr1 - nsupr; - ukj1 -= ukj2 * lusup[luptr2-1]; - ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2]; - dense_col[lsub[krep_ind]] = ukj; - dense_col[lsub[krep_ind-1]] = ukj1; - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - luptr++; luptr1++; luptr2++; - dense_col[irow] -= ( ukj*lusup[luptr] - + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] ); - } - } - - } else { /* segsze >= 4 */ - - /* Copy U[*,j] segment from dense[*] to TriTmp[*], which - holds the result of triangular solves. */ - no_zeros = kfnz - fsupc; - isub = lptr + no_zeros; - for (i = 0; i < segsze; ++i) { - irow = lsub[isub]; - TriTmp[i] = dense_col[irow]; /* Gather */ - ++isub; - } - - /* start effective triangle */ - luptr += nsupr * no_zeros + no_zeros; - -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], - &nsupr, TriTmp, &incx ); -#else - strsv_( "L", "N", "U", &segsze, &lusup[luptr], - &nsupr, TriTmp, &incx ); -#endif -#else - slsolve ( nsupr, segsze, &lusup[luptr], TriTmp ); -#endif - - - } /* else ... */ - - } /* for jj ... end tri-solves */ - - /* Block row updates; push all the way into dense[*] block */ - for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) { - - r_hi = SUPERLU_MIN(nrow, r_ind + rowblk); - block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind); - luptr = xlusup[fsupc] + nsupc + r_ind; - isub1 = lptr + nsupc + r_ind; - - repfnz_col = repfnz; - TriTmp = tempv; - dense_col = dense; - - /* Sequence through each column in panel -- matrix-vector */ - for (jj = jcol; jj < jcol + w; jj++, - repfnz_col += m, dense_col += m, TriTmp += ldaTmp) { - - kfnz = repfnz_col[krep]; - if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ - - segsze = krep - kfnz + 1; - if ( segsze <= 3 ) continue; /* skip unrolled cases */ - - /* Perform a block update, and scatter the result of - matrix-vector to dense[]. */ - no_zeros = kfnz - fsupc; - luptr1 = luptr + nsupr * no_zeros; - MatvecTmp = &TriTmp[maxsuper]; - -#ifdef USE_VENDOR_BLAS - alpha = one; - beta = zero; -#ifdef _CRAY - SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], - &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy); -#else - sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], - &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy); -#endif -#else - smatvec(nsupr, block_nrow, segsze, &lusup[luptr1], - TriTmp, MatvecTmp); -#endif - - /* Scatter MatvecTmp[*] into SPA dense[*] temporarily - * such that MatvecTmp[*] can be re-used for the - * the next blok row update. dense[] will be copied into - * global store after the whole panel has been finished. - */ - isub = isub1; - for (i = 0; i < block_nrow; i++) { - irow = lsub[isub]; - dense_col[irow] -= MatvecTmp[i]; - MatvecTmp[i] = zero; - ++isub; - } - - } /* for jj ... */ - - } /* for each block row ... */ - - /* Scatter the triangular solves into SPA dense[*] */ - repfnz_col = repfnz; - TriTmp = tempv; - dense_col = dense; - - for (jj = jcol; jj < jcol + w; jj++, - repfnz_col += m, dense_col += m, TriTmp += ldaTmp) { - kfnz = repfnz_col[krep]; - if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ - - segsze = krep - kfnz + 1; - if ( segsze <= 3 ) continue; /* skip unrolled cases */ - - no_zeros = kfnz - fsupc; - isub = lptr + no_zeros; - for (i = 0; i < segsze; i++) { - irow = lsub[isub]; - dense_col[irow] = TriTmp[i]; - TriTmp[i] = zero; - ++isub; - } - - } /* for jj ... */ - - } else { /* 1-D block modification */ - - - /* Sequence through each column in the panel */ - for (jj = jcol; jj < jcol + w; jj++, - repfnz_col += m, dense_col += m) { - - kfnz = repfnz_col[krep]; - if ( kfnz == EMPTY ) continue; /* Skip any zero segment */ - - segsze = krep - kfnz + 1; - luptr = xlusup[fsupc]; - - ops[TRSV] += segsze * (segsze - 1); - ops[GEMV] += 2 * nrow * segsze; - - /* Case 1: Update U-segment of size 1 -- col-col update */ - if ( segsze == 1 ) { - ukj = dense_col[lsub[krep_ind]]; - luptr += nsupr*(nsupc-1) + nsupc; - - for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) { - irow = lsub[i]; - dense_col[irow] -= ukj * lusup[luptr]; - ++luptr; - } - - } else if ( segsze <= 3 ) { - ukj = dense_col[lsub[krep_ind]]; - luptr += nsupr*(nsupc-1) + nsupc-1; - ukj1 = dense_col[lsub[krep_ind - 1]]; - luptr1 = luptr - nsupr; - - if ( segsze == 2 ) { - ukj -= ukj1 * lusup[luptr1]; - dense_col[lsub[krep_ind]] = ukj; - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - ++luptr; ++luptr1; - dense_col[irow] -= (ukj*lusup[luptr] - + ukj1*lusup[luptr1]); - } - } else { - ukj2 = dense_col[lsub[krep_ind - 2]]; - luptr2 = luptr1 - nsupr; - ukj1 -= ukj2 * lusup[luptr2-1]; - ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2]; - dense_col[lsub[krep_ind]] = ukj; - dense_col[lsub[krep_ind-1]] = ukj1; - for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { - irow = lsub[i]; - ++luptr; ++luptr1; ++luptr2; - dense_col[irow] -= ( ukj*lusup[luptr] - + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] ); - } - } - - } else { /* segsze >= 4 */ - /* - * Perform a triangular solve and block update, - * then scatter the result of sup-col update to dense[]. - */ - no_zeros = kfnz - fsupc; - - /* Copy U[*,j] segment from dense[*] to tempv[*]: - * The result of triangular solve is in tempv[*]; - * The result of matrix vector update is in dense_col[*] - */ - isub = lptr + no_zeros; - for (i = 0; i < segsze; ++i) { - irow = lsub[isub]; - tempv[i] = dense_col[irow]; /* Gather */ - ++isub; - } - - /* start effective triangle */ - luptr += nsupr * no_zeros + no_zeros; - -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], - &nsupr, tempv, &incx ); -#else - strsv_( "L", "N", "U", &segsze, &lusup[luptr], - &nsupr, tempv, &incx ); -#endif - - luptr += segsze; /* Dense matrix-vector */ - tempv1 = &tempv[segsze]; - alpha = one; - beta = zero; -#ifdef _CRAY - SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], - &nsupr, tempv, &incx, &beta, tempv1, &incy ); -#else - sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], - &nsupr, tempv, &incx, &beta, tempv1, &incy ); -#endif -#else - slsolve ( nsupr, segsze, &lusup[luptr], tempv ); - - luptr += segsze; /* Dense matrix-vector */ - tempv1 = &tempv[segsze]; - smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1); -#endif - - /* Scatter tempv[*] into SPA dense[*] temporarily, such - * that tempv[*] can be used for the triangular solve of - * the next column of the panel. They will be copied into - * ucol[*] after the whole panel has been finished. - */ - isub = lptr + no_zeros; - for (i = 0; i < segsze; i++) { - irow = lsub[isub]; - dense_col[irow] = tempv[i]; - tempv[i] = zero; - isub++; - } - - /* Scatter the update from tempv1[*] into SPA dense[*] */ - /* Start dense rectangular L */ - for (i = 0; i < nrow; i++) { - irow = lsub[isub]; - dense_col[irow] -= tempv1[i]; - tempv1[i] = zero; - ++isub; - } - - } /* else segsze>=4 ... */ - - } /* for each column in the panel... */ - - } /* else 1-D update ... */ - - } /* for each updating supernode ... */ - -} - - - diff --git a/intern/opennl/superlu/spanel_dfs.c b/intern/opennl/superlu/spanel_dfs.c deleted file mode 100644 index 80e6814dde9..00000000000 --- a/intern/opennl/superlu/spanel_dfs.c +++ /dev/null @@ -1,252 +0,0 @@ -/** \file opennl/superlu/spanel_dfs.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" -#include "util.h" - -void -spanel_dfs ( - const int m, /* in - number of rows in the matrix */ - const int w, /* in */ - const int jcol, /* in */ - SuperMatrix *A, /* in - original matrix */ - int *perm_r, /* in */ - int *nseg, /* out */ - double *dense, /* out */ - int *panel_lsub, /* out */ - int *segrep, /* out */ - int *repfnz, /* out */ - int *xprune, /* out */ - int *marker, /* out */ - int *parent, /* working array */ - int *xplore, /* working array */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* - * Purpose - * ======= - * - * Performs a symbolic factorization on a panel of columns [jcol, jcol+w). - * - * A supernode representative is the last column of a supernode. - * The nonzeros in U[*,j] are segments that end at supernodal - * representatives. - * - * The routine returns one list of the supernodal representatives - * in topological order of the dfs that generates them. This list is - * a superset of the topological order of each individual column within - * the panel. - * The location of the first nonzero in each supernodal segment - * (supernodal entry location) is also returned. Each column has a - * separate list for this purpose. - * - * Two marker arrays are used for dfs: - * marker[i] == jj, if i was visited during dfs of current column jj; - * marker1[i] >= jcol, if i was visited by earlier columns in this panel; - * - * marker: A-row --> A-row/col (0/1) - * repfnz: SuperA-col --> PA-row - * parent: SuperA-col --> SuperA-col - * xplore: SuperA-col --> index to L-structure - * - */ - NCPformat *Astore; - double *a; - int *asub; - int *xa_begin, *xa_end; - int krep, chperm, chmark, chrep, oldrep, kchild, myfnz; - int k, krow, kmark, kperm; - int xdfs, maxdfs, kpar; - int jj; /* index through each column in the panel */ - int *marker1; /* marker1[jj] >= jcol if vertex jj was visited - by a previous column within this panel. */ - int *repfnz_col; /* start of each column in the panel */ - double *dense_col; /* start of each column in the panel */ - int nextl_col; /* next available position in panel_lsub[*,jj] */ - int *xsup, *supno; - int *lsub, *xlsub; - - /* Initialize pointers */ - Astore = A->Store; - a = Astore->nzval; - asub = Astore->rowind; - xa_begin = Astore->colbeg; - xa_end = Astore->colend; - marker1 = marker + m; - repfnz_col = repfnz; - dense_col = dense; - *nseg = 0; - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - - /* For each column in the panel */ - for (jj = jcol; jj < jcol + w; jj++) { - nextl_col = (jj - jcol) * m; - -#ifdef CHK_DFS - printf("\npanel col %d: ", jj); -#endif - - /* For each nonz in A[*,jj] do dfs */ - for (k = xa_begin[jj]; k < xa_end[jj]; k++) { - krow = asub[k]; - dense_col[krow] = a[k]; - kmark = marker[krow]; - if ( kmark == jj ) - continue; /* krow visited before, go to the next nonzero */ - - /* For each unmarked nbr krow of jj - * krow is in L: place it in structure of L[*,jj] - */ - marker[krow] = jj; - kperm = perm_r[krow]; - - if ( kperm == EMPTY ) { - panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ - } - /* - * krow is in U: if its supernode-rep krep - * has been explored, update repfnz[*] - */ - else { - - krep = xsup[supno[kperm]+1] - 1; - myfnz = repfnz_col[krep]; - -#ifdef CHK_DFS - printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm); -#endif - if ( myfnz != EMPTY ) { /* Representative visited before */ - if ( myfnz > kperm ) repfnz_col[krep] = kperm; - /* continue; */ - } - else { - /* Otherwise, perform dfs starting at krep */ - oldrep = EMPTY; - parent[krep] = oldrep; - repfnz_col[krep] = kperm; - xdfs = xlsub[krep]; - maxdfs = xprune[krep]; - -#ifdef CHK_DFS - printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); - for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); - printf("\n"); -#endif - do { - /* - * For each unmarked kchild of krep - */ - while ( xdfs < maxdfs ) { - - kchild = lsub[xdfs]; - xdfs++; - chmark = marker[kchild]; - - if ( chmark != jj ) { /* Not reached yet */ - marker[kchild] = jj; - chperm = perm_r[kchild]; - - /* Case kchild is in L: place it in L[*,j] */ - if ( chperm == EMPTY ) { - panel_lsub[nextl_col++] = kchild; - } - /* Case kchild is in U: - * chrep = its supernode-rep. If its rep has - * been explored, update its repfnz[*] - */ - else { - - chrep = xsup[supno[chperm]+1] - 1; - myfnz = repfnz_col[chrep]; -#ifdef CHK_DFS - printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm); -#endif - if ( myfnz != EMPTY ) { /* Visited before */ - if ( myfnz > chperm ) - repfnz_col[chrep] = chperm; - } - else { - /* Cont. dfs at snode-rep of kchild */ - xplore[krep] = xdfs; - oldrep = krep; - krep = chrep; /* Go deeper down G(L) */ - parent[krep] = oldrep; - repfnz_col[krep] = chperm; - xdfs = xlsub[krep]; - maxdfs = xprune[krep]; -#ifdef CHK_DFS - printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs); - for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); - printf("\n"); -#endif - } /* else */ - - } /* else */ - - } /* if... */ - - } /* while xdfs < maxdfs */ - - /* krow has no more unexplored nbrs: - * Place snode-rep krep in postorder DFS, if this - * segment is seen for the first time. (Note that - * "repfnz[krep]" may change later.) - * Backtrack dfs to its parent. - */ - if ( marker1[krep] < jcol ) { - segrep[*nseg] = krep; - ++(*nseg); - marker1[krep] = jj; - } - - kpar = parent[krep]; /* Pop stack, mimic recursion */ - if ( kpar == EMPTY ) break; /* dfs done */ - krep = kpar; - xdfs = xplore[krep]; - maxdfs = xprune[krep]; - -#ifdef CHK_DFS - printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs); - for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]); - printf("\n"); -#endif - } while ( kpar != EMPTY ); /* do-while - until empty stack */ - - } /* else */ - - } /* else */ - - } /* for each nonz in A[*,jj] */ - - repfnz_col += m; /* Move to next column */ - dense_col += m; - - } /* for jj ... */ - -} diff --git a/intern/opennl/superlu/spivotL.c b/intern/opennl/superlu/spivotL.c deleted file mode 100644 index 1a0302d0101..00000000000 --- a/intern/opennl/superlu/spivotL.c +++ /dev/null @@ -1,176 +0,0 @@ -/** \file opennl/superlu/spivotL.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include <math.h> -#include <stdlib.h> -#include "ssp_defs.h" - -#undef DEBUG - -int -spivotL( - const int jcol, /* in */ - const double u, /* in - diagonal pivoting threshold */ - int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */ - int *perm_r, /* may be modified */ - int *iperm_r, /* in - inverse of perm_r */ - int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */ - int *pivrow, /* out */ - GlobalLU_t *Glu, /* modified - global LU data structures */ - SuperLUStat_t *stat /* output */ - ) -{ -/* - * Purpose - * ======= - * Performs the numerical pivoting on the current column of L, - * and the CDIV operation. - * - * Pivot policy: - * (1) Compute thresh = u * max_(i>=j) abs(A_ij); - * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN - * pivot row = k; - * ELSE IF abs(A_jj) >= thresh THEN - * pivot row = j; - * ELSE - * pivot row = m; - * - * Note: If you absolutely want to use a given pivot order, then set u=0.0. - * - * Return value: 0 success; - * i > 0 U(i,i) is exactly zero. - * - */ - int fsupc; /* first column in the supernode */ - int nsupc; /* no of columns in the supernode */ - int nsupr; /* no of rows in the supernode */ - int lptr; /* points to the starting subscript of the supernode */ - int pivptr, old_pivptr, diag, diagind; - double pivmax, rtemp, thresh; - double temp; - double *lu_sup_ptr; - double *lu_col_ptr; - int *lsub_ptr; - int isub, icol, k, itemp; - int *lsub, *xlsub; - double *lusup; - int *xlusup; - flops_t *ops = stat->ops; - - /* Initialize pointers */ - lsub = Glu->lsub; - xlsub = Glu->xlsub; - lusup = Glu->lusup; - xlusup = Glu->xlusup; - fsupc = (Glu->xsup)[(Glu->supno)[jcol]]; - nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */ - lptr = xlsub[fsupc]; - nsupr = xlsub[fsupc+1] - lptr; - lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */ - lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */ - lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */ - -#ifdef DEBUG -if ( jcol == MIN_COL ) { - printf("Before cdiv: col %d\n", jcol); - for (k = nsupc; k < nsupr; k++) - printf(" lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]); -} -#endif - - /* Determine the largest abs numerical value for partial pivoting; - Also search for user-specified pivot, and diagonal element. */ - if ( *usepr ) *pivrow = iperm_r[jcol]; - diagind = iperm_c[jcol]; - pivmax = 0.0; - pivptr = nsupc; - diag = EMPTY; - old_pivptr = nsupc; - for (isub = nsupc; isub < nsupr; ++isub) { - rtemp = fabs (lu_col_ptr[isub]); - if ( rtemp > pivmax ) { - pivmax = rtemp; - pivptr = isub; - } - if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub; - if ( lsub_ptr[isub] == diagind ) diag = isub; - } - - /* Test for singularity */ - if ( pivmax == 0.0 ) { - *pivrow = lsub_ptr[pivptr]; - perm_r[*pivrow] = jcol; - *usepr = 0; - return (jcol+1); - } - - thresh = u * pivmax; - - /* Choose appropriate pivotal element by our policy. */ - if ( *usepr ) { - rtemp = fabs (lu_col_ptr[old_pivptr]); - if ( rtemp != 0.0 && rtemp >= thresh ) - pivptr = old_pivptr; - else - *usepr = 0; - } - if ( *usepr == 0 ) { - /* Use diagonal pivot? */ - if ( diag >= 0 ) { /* diagonal exists */ - rtemp = fabs (lu_col_ptr[diag]); - if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag; - } - *pivrow = lsub_ptr[pivptr]; - } - - /* Record pivot row */ - perm_r[*pivrow] = jcol; - - /* Interchange row subscripts */ - if ( pivptr != nsupc ) { - itemp = lsub_ptr[pivptr]; - lsub_ptr[pivptr] = lsub_ptr[nsupc]; - lsub_ptr[nsupc] = itemp; - - /* Interchange numerical values as well, for the whole snode, such - * that L is indexed the same way as A. - */ - for (icol = 0; icol <= nsupc; icol++) { - itemp = pivptr + icol * nsupr; - temp = lu_sup_ptr[itemp]; - lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr]; - lu_sup_ptr[nsupc + icol*nsupr] = temp; - } - } /* if */ - - /* cdiv operation */ - ops[FACT] += nsupr - nsupc; - - temp = 1.0 / lu_col_ptr[nsupc]; - for (k = nsupc+1; k < nsupr; k++) - lu_col_ptr[k] *= temp; - - return 0; -} - diff --git a/intern/opennl/superlu/spruneL.c b/intern/opennl/superlu/spruneL.c deleted file mode 100644 index 3cf29658596..00000000000 --- a/intern/opennl/superlu/spruneL.c +++ /dev/null @@ -1,152 +0,0 @@ -/** \file opennl/superlu/spruneL.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" -#include "util.h" - -void -spruneL( - const int jcol, /* in */ - const int *perm_r, /* in */ - const int pivrow, /* in */ - const int nseg, /* in */ - const int *segrep, /* in */ - const int *repfnz, /* in */ - int *xprune, /* out */ - GlobalLU_t *Glu /* modified - global LU data structures */ - ) -{ -/* - * Purpose - * ======= - * Prunes the L-structure of supernodes whose L-structure - * contains the current pivot row "pivrow" - * - */ - double utemp; - int jsupno, irep, irep1, kmin, kmax, krow, movnum; - int i, ktemp, minloc, maxloc; - int do_prune; /* logical variable */ - int *xsup, *supno; - int *lsub, *xlsub; - double *lusup; - int *xlusup; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - lusup = Glu->lusup; - xlusup = Glu->xlusup; - - /* - * For each supernode-rep irep in U[*,j] - */ - jsupno = supno[jcol]; - for (i = 0; i < nseg; i++) { - - irep = segrep[i]; - irep1 = irep + 1; - do_prune = FALSE; - - /* Don't prune with a zero U-segment */ - if ( repfnz[irep] == EMPTY ) - continue; - - /* If a snode overlaps with the next panel, then the U-segment - * is fragmented into two parts -- irep and irep1. We should let - * pruning occur at the rep-column in irep1's snode. - */ - if ( supno[irep] == supno[irep1] ) /* Don't prune */ - continue; - - /* - * If it has not been pruned & it has a nonz in row L[pivrow,i] - */ - if ( supno[irep] != jsupno ) { - if ( xprune[irep] >= xlsub[irep1] ) { - kmin = xlsub[irep]; - kmax = xlsub[irep1] - 1; - for (krow = kmin; krow <= kmax; krow++) - if ( lsub[krow] == pivrow ) { - do_prune = TRUE; - break; - } - } - - if ( do_prune ) { - - /* Do a quicksort-type partition - * movnum=TRUE means that the num values have to be exchanged. - */ - movnum = FALSE; - if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */ - movnum = TRUE; - - while ( kmin <= kmax ) { - - if ( perm_r[lsub[kmax]] == EMPTY ) - kmax--; - else if ( perm_r[lsub[kmin]] != EMPTY ) - kmin++; - else { /* kmin below pivrow, and kmax above pivrow: - * interchange the two subscripts - */ - ktemp = lsub[kmin]; - lsub[kmin] = lsub[kmax]; - lsub[kmax] = ktemp; - - /* If the supernode has only one column, then we - * only keep one set of subscripts. For any subscript - * interchange performed, similar interchange must be - * done on the numerical values. - */ - if ( movnum ) { - minloc = xlusup[irep] + (kmin - xlsub[irep]); - maxloc = xlusup[irep] + (kmax - xlsub[irep]); - utemp = lusup[minloc]; - lusup[minloc] = lusup[maxloc]; - lusup[maxloc] = utemp; - } - - kmin++; - kmax--; - - } - - } /* while */ - - xprune[irep] = kmin; /* Pruning */ - -#ifdef CHK_PRUNE - printf(" After spruneL(),using col %d: xprune[%d] = %d\n", - jcol, irep, kmin); -#endif - } /* if do_prune */ - - } /* if */ - - } /* for each U-segment... */ -} diff --git a/intern/opennl/superlu/ssnode_bmod.c b/intern/opennl/superlu/ssnode_bmod.c deleted file mode 100644 index 9533373f212..00000000000 --- a/intern/opennl/superlu/ssnode_bmod.c +++ /dev/null @@ -1,120 +0,0 @@ -/** \file opennl/superlu/ssnode_bmod.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" - -void slsolve(int, int, double*, double*); -void smatvec(int, int, int, double*, double*, double*); - -/* - * Performs numeric block updates within the relaxed snode. - */ -int -ssnode_bmod ( - const int jcol, /* in */ - const int fsupc, /* in */ - double *dense, /* in */ - double *tempv, /* working array */ - GlobalLU_t *Glu, /* modified */ - SuperLUStat_t *stat /* output */ - ) -{ -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - _fcd ftcs1 = _cptofcd("L", strlen("L")), - ftcs2 = _cptofcd("N", strlen("N")), - ftcs3 = _cptofcd("U", strlen("U")); -#endif - int incx = 1, incy = 1; - double alpha = -1.0, beta = 1.0; -#endif - - int luptr, nsupc, nsupr, nrow; - int isub, irow, i, iptr; - register int ufirst, nextlu; - int *lsub, *xlsub; - double *lusup; - int *xlusup; - flops_t *ops = stat->ops; - - lsub = Glu->lsub; - xlsub = Glu->xlsub; - lusup = Glu->lusup; - xlusup = Glu->xlusup; - - nextlu = xlusup[jcol]; - - /* - * Process the supernodal portion of L\U[*,j] - */ - for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) { - irow = lsub[isub]; - lusup[nextlu] = dense[irow]; - dense[irow] = 0; - ++nextlu; - } - - xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */ - - if ( fsupc < jcol ) { - - luptr = xlusup[fsupc]; - nsupr = xlsub[fsupc+1] - xlsub[fsupc]; - nsupc = jcol - fsupc; /* Excluding jcol */ - ufirst = xlusup[jcol]; /* Points to the beginning of column - jcol in supernode L\U(jsupno). */ - nrow = nsupr - nsupc; - - ops[TRSV] += nsupc * (nsupc - 1); - ops[GEMV] += 2 * nrow * nsupc; - -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, - &lusup[ufirst], &incx ); - SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, - &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); -#else - strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, - &lusup[ufirst], &incx ); - sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, - &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); -#endif -#else - slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); - smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], - &lusup[ufirst], &tempv[0] ); - - /* Scatter tempv[*] into lusup[*] */ - iptr = ufirst + nsupc; - for (i = 0; i < nrow; i++) { - lusup[iptr++] -= tempv[i]; - tempv[i] = 0.0; - } -#endif - - } - - return 0; -} diff --git a/intern/opennl/superlu/ssnode_dfs.c b/intern/opennl/superlu/ssnode_dfs.c deleted file mode 100644 index 0dfc8d86d82..00000000000 --- a/intern/opennl/superlu/ssnode_dfs.c +++ /dev/null @@ -1,109 +0,0 @@ -/** \file opennl/superlu/ssnode_dfs.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include "ssp_defs.h" -#include "util.h" - -int -ssnode_dfs ( - const int jcol, /* in - start of the supernode */ - const int kcol, /* in - end of the supernode */ - const int *asub, /* in */ - const int *xa_begin, /* in */ - const int *xa_end, /* in */ - int *xprune, /* out */ - int *marker, /* modified */ - GlobalLU_t *Glu /* modified */ - ) -{ -/* Purpose - * ======= - * ssnode_dfs() - Determine the union of the row structures of those - * columns within the relaxed snode. - * Note: The relaxed snodes are leaves of the supernodal etree, therefore, - * the portion outside the rectangular supernode must be zero. - * - * Return value - * ============ - * 0 success; - * >0 number of bytes allocated when run out of memory. - * - */ - register int i, k, ifrom, ito, nextl, new_next; - int nsuper, krow, kmark, mem_error; - int *xsup, *supno; - int *lsub, *xlsub; - int nzlmax; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - nzlmax = Glu->nzlmax; - - nsuper = ++supno[jcol]; /* Next available supernode number */ - nextl = xlsub[jcol]; - - for (i = jcol; i <= kcol; i++) { - /* For each nonzero in A[*,i] */ - for (k = xa_begin[i]; k < xa_end[i]; k++) { - krow = asub[k]; - kmark = marker[krow]; - if ( kmark != kcol ) { /* First time visit krow */ - marker[krow] = kcol; - lsub[nextl++] = krow; - if ( nextl >= nzlmax ) { - if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) ) - return (mem_error); - lsub = Glu->lsub; - } - } - } - supno[i] = nsuper; - } - - /* Supernode > 1, then make a copy of the subscripts for pruning */ - if ( jcol < kcol ) { - new_next = nextl + (nextl - xlsub[jcol]); - while ( new_next > nzlmax ) { - if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) ) - return (mem_error); - lsub = Glu->lsub; - } - ito = nextl; - for (ifrom = xlsub[jcol]; ifrom < nextl; ) - lsub[ito++] = lsub[ifrom++]; - for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; - nextl = ito; - } - - xsup[nsuper+1] = kcol + 1; - supno[kcol+1] = nsuper; - xprune[kcol] = nextl; - xlsub[kcol+1] = nextl; - - return 0; -} - diff --git a/intern/opennl/superlu/ssp_blas2.c b/intern/opennl/superlu/ssp_blas2.c deleted file mode 100644 index 9215d48dc09..00000000000 --- a/intern/opennl/superlu/ssp_blas2.c +++ /dev/null @@ -1,475 +0,0 @@ -/** \file opennl/superlu/ssp_blas2.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - * File name: ssp_blas2.c - * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations. - */ - -#include "ssp_defs.h" - -/* - * Function prototypes - */ -void susolve(int, int, double*, double*); -void slsolve(int, int, double*, double*); -void smatvec(int, int, int, double*, double*, double*); -int strsv_(char*, char*, char*, int*, double*, int*, double*, int*); - -int -sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, - SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info) -{ -/* - * Purpose - * ======= - * - * sp_strsv() solves one of the systems of equations - * A*x = b, or A'*x = b, - * where b and x are n element vectors and A is a sparse unit , or - * non-unit, upper or lower triangular matrix. - * No test for singularity or near-singularity is included in this - * routine. Such tests must be performed before calling this routine. - * - * Parameters - * ========== - * - * uplo - (input) char* - * On entry, uplo specifies whether the matrix is an upper or - * lower triangular matrix as follows: - * uplo = 'U' or 'u' A is an upper triangular matrix. - * uplo = 'L' or 'l' A is a lower triangular matrix. - * - * trans - (input) char* - * On entry, trans specifies the equations to be solved as - * follows: - * trans = 'N' or 'n' A*x = b. - * trans = 'T' or 't' A'*x = b. - * trans = 'C' or 'c' A'*x = b. - * - * diag - (input) char* - * On entry, diag specifies whether or not A is unit - * triangular as follows: - * diag = 'U' or 'u' A is assumed to be unit triangular. - * diag = 'N' or 'n' A is not assumed to be unit - * triangular. - * - * L - (input) SuperMatrix* - * The factor L from the factorization Pr*A*Pc=L*U. Use - * compressed row subscripts storage for supernodes, - * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU. - * - * U - (input) SuperMatrix* - * The factor U from the factorization Pr*A*Pc=L*U. - * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU. - * - * x - (input/output) double* - * Before entry, the incremented array X must contain the n - * element right-hand side vector b. On exit, X is overwritten - * with the solution vector x. - * - * info - (output) int* - * If *info = -i, the i-th argument had an illegal value. - * - */ -#ifdef _CRAY - _fcd ftcs1 = _cptofcd("L", strlen("L")), - ftcs2 = _cptofcd("N", strlen("N")), - ftcs3 = _cptofcd("U", strlen("U")); -#endif - SCformat *Lstore; - NCformat *Ustore; - double *Lval, *Uval; - int incx = 1; - int nrow; - int fsupc, nsupr, nsupc, luptr, istart, irow; - int i, k, iptr, jcol; - double *work; - flops_t solve_ops; - - /* Test the input parameters */ - *info = 0; - if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; - else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && - !lsame_(trans, "C")) *info = -2; - else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; - else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; - else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; - if ( *info ) { - i = -(*info); - xerbla_("sp_strsv", &i); - return 0; - } - - Lstore = L->Store; - Lval = Lstore->nzval; - Ustore = U->Store; - Uval = Ustore->nzval; - solve_ops = 0; - - if ( !(work = doubleCalloc(L->nrow)) ) - ABORT("Malloc fails for work in sp_strsv()."); - - if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */ - - if ( lsame_(uplo, "L") ) { - /* Form x := inv(L)*x */ - if ( L->nrow == 0 ) { - SUPERLU_FREE(work); - return 0; /* Quick return */ - } - - for (k = 0; k <= Lstore->nsuper; k++) { - fsupc = L_FST_SUPC(k); - istart = L_SUB_START(fsupc); - nsupr = L_SUB_START(fsupc+1) - istart; - nsupc = L_FST_SUPC(k+1) - fsupc; - luptr = L_NZ_START(fsupc); - nrow = nsupr - nsupc; - - solve_ops += nsupc * (nsupc - 1); - solve_ops += 2 * nrow * nsupc; - - if ( nsupc == 1 ) { - for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { - irow = L_SUB(iptr); - ++luptr; - x[irow] -= x[fsupc] * Lval[luptr]; - } - } else { -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); - - SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], - &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); -#else - strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); - - sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], - &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); -#endif -#else - slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); - - smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], - &x[fsupc], &work[0] ); -#endif - - iptr = istart + nsupc; - for (i = 0; i < nrow; ++i, ++iptr) { - irow = L_SUB(iptr); - x[irow] -= work[i]; /* Scatter */ - work[i] = 0.0; - - } - } - } /* for k ... */ - - } else { - /* Form x := inv(U)*x */ - - if ( U->nrow == 0 ) return 0; /* Quick return */ - - for (k = Lstore->nsuper; k >= 0; k--) { - fsupc = L_FST_SUPC(k); - nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); - nsupc = L_FST_SUPC(k+1) - fsupc; - luptr = L_NZ_START(fsupc); - - solve_ops += nsupc * (nsupc + 1); - - if ( nsupc == 1 ) { - x[fsupc] /= Lval[luptr]; - for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { - irow = U_SUB(i); - x[irow] -= x[fsupc] * Uval[i]; - } - } else { -#ifdef USE_VENDOR_BLAS -#ifdef _CRAY - STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); -#else - strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); -#endif -#else - susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] ); -#endif - - for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { - solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); - for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); - i++) { - irow = U_SUB(i); - x[irow] -= x[jcol] * Uval[i]; - } - } - } - } /* for k ... */ - - } - } else { /* Form x := inv(A')*x */ - - if ( lsame_(uplo, "L") ) { - /* Form x := inv(L')*x */ - if ( L->nrow == 0 ) return 0; /* Quick return */ - - for (k = Lstore->nsuper; k >= 0; --k) { - fsupc = L_FST_SUPC(k); - istart = L_SUB_START(fsupc); - nsupr = L_SUB_START(fsupc+1) - istart; - nsupc = L_FST_SUPC(k+1) - fsupc; - luptr = L_NZ_START(fsupc); - - solve_ops += 2 * (nsupr - nsupc) * nsupc; - - for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { - iptr = istart + nsupc; - for (i = L_NZ_START(jcol) + nsupc; - i < L_NZ_START(jcol+1); i++) { - irow = L_SUB(iptr); - x[jcol] -= x[irow] * Lval[i]; - iptr++; - } - } - - if ( nsupc > 1 ) { - solve_ops += nsupc * (nsupc - 1); -#ifdef _CRAY - ftcs1 = _cptofcd("L", strlen("L")); - ftcs2 = _cptofcd("T", strlen("T")); - ftcs3 = _cptofcd("U", strlen("U")); - STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); -#else - strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); -#endif - } - } - } else { - /* Form x := inv(U')*x */ - if ( U->nrow == 0 ) return 0; /* Quick return */ - - for (k = 0; k <= Lstore->nsuper; k++) { - fsupc = L_FST_SUPC(k); - nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); - nsupc = L_FST_SUPC(k+1) - fsupc; - luptr = L_NZ_START(fsupc); - - for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { - solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); - for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { - irow = U_SUB(i); - x[jcol] -= x[irow] * Uval[i]; - } - } - - solve_ops += nsupc * (nsupc + 1); - - if ( nsupc == 1 ) { - x[fsupc] /= Lval[luptr]; - } else { -#ifdef _CRAY - ftcs1 = _cptofcd("U", strlen("U")); - ftcs2 = _cptofcd("T", strlen("T")); - ftcs3 = _cptofcd("N", strlen("N")); - STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); -#else - strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, - &x[fsupc], &incx); -#endif - } - } /* for k ... */ - } - } - - stat->ops[SOLVE] += solve_ops; - SUPERLU_FREE(work); - return 0; -} - - - - -int -sp_sgemv(char *trans, double alpha, SuperMatrix *A, double *x, - int incx, double beta, double *y, int incy) -{ -/* Purpose - ======= - - sp_sgemv() performs one of the matrix-vector operations - y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, - where alpha and beta are scalars, x and y are vectors and A is a - sparse A->nrow by A->ncol matrix. - - Parameters - ========== - - TRANS - (input) char* - On entry, TRANS specifies the operation to be performed as - follows: - TRANS = 'N' or 'n' y := alpha*A*x + beta*y. - TRANS = 'T' or 't' y := alpha*A'*x + beta*y. - TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. - - ALPHA - (input) double - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. - In the future, more general A can be handled. - - X - (input) double*, array of DIMENSION at least - ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. - Before entry, the incremented array X must contain the - vector x. - - INCX - (input) int - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - - BETA - (input) double - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then Y need not be set on input. - - Y - (output) double*, array of DIMENSION at least - ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' - and at least - ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. - Before entry with BETA non-zero, the incremented array Y - must contain the vector y. On exit, Y is overwritten by the - updated vector y. - - INCY - (input) int - On entry, INCY specifies the increment for the elements of - Y. INCY must not be zero. - - ==== Sparse Level 2 Blas routine. -*/ - - /* Local variables */ - NCformat *Astore; - double *Aval; - int info; - double temp; - int lenx, leny, i, j, irow; - int iy, jx, jy, kx, ky; - int notran; - - notran = lsame_(trans, "N"); - Astore = A->Store; - Aval = Astore->nzval; - - /* Test the input parameters */ - info = 0; - if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; - else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; - else if (incx == 0) info = 5; - else if (incy == 0) info = 8; - if (info != 0) { - xerbla_("sp_sgemv ", &info); - return 0; - } - - /* Quick return if possible. */ - if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.)) - return 0; - - /* Set LENX and LENY, the lengths of the vectors x and y, and set - up the start points in X and Y. */ - if (lsame_(trans, "N")) { - lenx = A->ncol; - leny = A->nrow; - } else { - lenx = A->nrow; - leny = A->ncol; - } - if (incx > 0) kx = 0; - else kx = - (lenx - 1) * incx; - if (incy > 0) ky = 0; - else ky = - (leny - 1) * incy; - - /* Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. */ - /* First form y := beta*y. */ - if (beta != 1.) { - if (incy == 1) { - if (beta == 0.) - for (i = 0; i < leny; ++i) y[i] = 0.; - else - for (i = 0; i < leny; ++i) y[i] = beta * y[i]; - } else { - iy = ky; - if (beta == 0.) - for (i = 0; i < leny; ++i) { - y[iy] = 0.; - iy += incy; - } - else - for (i = 0; i < leny; ++i) { - y[iy] = beta * y[iy]; - iy += incy; - } - } - } - - if (alpha == 0.) return 0; - - if ( notran ) { - /* Form y := alpha*A*x + y. */ - jx = kx; - if (incy == 1) { - for (j = 0; j < A->ncol; ++j) { - if (x[jx] != 0.) { - temp = alpha * x[jx]; - for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { - irow = Astore->rowind[i]; - y[irow] += temp * Aval[i]; - } - } - jx += incx; - } - } else { - ABORT("Not implemented."); - } - } else { - /* Form y := alpha*A'*x + y. */ - jy = ky; - if (incx == 1) { - for (j = 0; j < A->ncol; ++j) { - temp = 0.; - for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { - irow = Astore->rowind[i]; - temp += Aval[i] * x[irow]; - } - y[jy] += alpha * temp; - jy += incy; - } - } else { - ABORT("Not implemented."); - } - } - return 0; -} /* sp_sgemv */ - - - diff --git a/intern/opennl/superlu/ssp_blas3.c b/intern/opennl/superlu/ssp_blas3.c deleted file mode 100644 index aeb51b0c1ca..00000000000 --- a/intern/opennl/superlu/ssp_blas3.c +++ /dev/null @@ -1,124 +0,0 @@ -/** \file opennl/superlu/ssp_blas3.c - * \ingroup opennl - */ - - -/* - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - */ -/* - * File name: sp_blas3.c - * Purpose: Sparse BLAS3, using some dense BLAS3 operations. - */ - -#include "ssp_defs.h" -#include "util.h" - -int -sp_sgemm(char *transa, int n, - double alpha, SuperMatrix *A, double *b, int ldb, - double beta, double *c, int ldc) -{ -/* Purpose - ======= - - sp_s performs one of the matrix-matrix operations - - C := alpha*op( A )*op( B ) + beta*C, - - where op( X ) is one of - - op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ), - - alpha and beta are scalars, and A, B and C are matrices, with op( A ) - an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. - - - Parameters - ========== - - TRANSA - (input) char* - On entry, TRANSA specifies the form of op( A ) to be used in - the matrix multiplication as follows: - TRANSA = 'N' or 'n', op( A ) = A. - TRANSA = 'T' or 't', op( A ) = A'. - TRANSA = 'C' or 'c', op( A ) = conjg( A' ). - Unchanged on exit. - - TRANSB - (input) char* - On entry, TRANSB specifies the form of op( B ) to be used in - the matrix multiplication as follows: - TRANSB = 'N' or 'n', op( B ) = B. - TRANSB = 'T' or 't', op( B ) = B'. - TRANSB = 'C' or 'c', op( B ) = conjg( B' ). - Unchanged on exit. - - M - (input) int - On entry, M specifies the number of rows of the matrix - op( A ) and of the matrix C. M must be at least zero. - Unchanged on exit. - - N - (input) int - On entry, N specifies the number of columns of the matrix - op( B ) and the number of columns of the matrix C. N must be - at least zero. - Unchanged on exit. - - K - (input) int - On entry, K specifies the number of columns of the matrix - op( A ) and the number of rows of the matrix op( B ). K must - be at least zero. - Unchanged on exit. - - ALPHA - (input) double - On entry, ALPHA specifies the scalar alpha. - - A - (input) SuperMatrix* - Matrix A with a sparse format, of dimension (A->nrow, A->ncol). - Currently, the type of A can be: - Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. - In the future, more general A can be handled. - - B - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is - n when TRANSB = 'N' or 'n', and is k otherwise. - Before entry with TRANSB = 'N' or 'n', the leading k by n - part of the array B must contain the matrix B, otherwise - the leading n by k part of the array B must contain the - matrix B. - Unchanged on exit. - - LDB - (input) int - On entry, LDB specifies the first dimension of B as declared - in the calling (sub) program. LDB must be at least max( 1, n ). - Unchanged on exit. - - BETA - (input) double - On entry, BETA specifies the scalar beta. When BETA is - supplied as zero then C need not be set on input. - - C - FLOAT PRECISION array of DIMENSION ( LDC, n ). - Before entry, the leading m by n part of the array C must - contain the matrix C, except when beta is zero, in which - case C need not be set on entry. - On exit, the array C is overwritten by the m by n matrix - ( alpha*op( A )*B + beta*C ). - - LDC - (input) int - On entry, LDC specifies the first dimension of C as declared - in the calling (sub)program. LDC must be at least max(1,m). - Unchanged on exit. - - ==== Sparse Level 3 Blas routine. -*/ - int incx = 1, incy = 1; - int j; - - for (j = 0; j < n; ++j) { - sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy); - } - return 0; -} diff --git a/intern/opennl/superlu/ssp_defs.h b/intern/opennl/superlu/ssp_defs.h deleted file mode 100644 index 5110fc5ad69..00000000000 --- a/intern/opennl/superlu/ssp_defs.h +++ /dev/null @@ -1,240 +0,0 @@ -/** \file opennl/superlu/ssp_defs.h - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */ -#define __SUPERLU_sSP_DEFS - -/* - * File name: ssp_defs.h - * Purpose: Sparse matrix types and function prototypes - * History: - */ - -#ifdef _CRAY -#include <fortran.h> -#include <string.h> -#endif - -/* Define my integer type int_t */ -typedef int int_t; /* default */ - -#include "Cnames.h" -#include "supermatrix.h" -#include "util.h" - - -/* - * Global data structures used in LU factorization - - * - * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. - * (xsup,supno): supno[i] is the supernode no to which i belongs; - * xsup(s) points to the beginning of the s-th supernode. - * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) - * xsup 0 1 2 4 7 12 - * Note: dfs will be performed on supernode rep. relative to the new - * row pivoting ordering - * - * (xlsub,lsub): lsub[*] contains the compressed subscript of - * rectangular supernodes; xlsub[j] points to the starting - * location of the j-th column in lsub[*]. Note that xlsub - * is indexed by column. - * Storage: original row subscripts - * - * During the course of sparse LU factorization, we also use - * (xlsub,lsub) for the purpose of symmetric pruning. For each - * supernode {s,s+1,...,t=s+r} with first column s and last - * column t, the subscript set - * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 - * is the structure of column s (i.e. structure of this supernode). - * It is used for the storage of numerical values. - * Furthermore, - * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 - * is the structure of the last column t of this supernode. - * It is for the purpose of symmetric pruning. Therefore, the - * structural subscripts can be rearranged without making physical - * interchanges among the numerical values. - * - * However, if the supernode has only one column, then we - * only keep one set of subscripts. For any subscript interchange - * performed, similar interchange must be done on the numerical - * values. - * - * The last column structures (for pruning) will be removed - * after the numercial LU factorization phase. - * - * (xlusup,lusup): lusup[*] contains the numerical values of the - * rectangular supernodes; xlusup[j] points to the starting - * location of the j-th column in storage vector lusup[*] - * Note: xlusup is indexed by column. - * Each rectangular supernode is stored by column-major - * scheme, consistent with Fortran 2-dim array storage. - * - * (xusub,ucol,usub): ucol[*] stores the numerical values of - * U-columns outside the rectangular supernodes. The row - * subscript of nonzero ucol[k] is stored in usub[k]. - * xusub[i] points to the starting location of column i in ucol. - * Storage: new row subscripts; that is subscripts of PA. - */ -typedef struct { - int *xsup; /* supernode and column mapping */ - int *supno; - int *lsub; /* compressed L subscripts */ - int *xlsub; - double *lusup; /* L supernodes */ - int *xlusup; - double *ucol; /* U columns */ - int *usub; - int *xusub; - int nzlmax; /* current max size of lsub */ - int nzumax; /* " " " ucol */ - int nzlumax; /* " " " lusup */ - int n; /* number of columns in the matrix */ - LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */ -} GlobalLU_t; - -typedef struct { - double for_lu; - double total_needed; - int expansions; -} mem_usage_t; - -#ifdef __cplusplus -extern "C" { -#endif - -/* Driver routines */ -extern void -sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *); -extern void -sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, - char *, double *, double *, SuperMatrix *, SuperMatrix *, - void *, int, SuperMatrix *, SuperMatrix *, - double *, double *, double *, double *, - mem_usage_t *, SuperLUStat_t *, int *); - -/* Supernodal LU factor related */ -extern void -sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *, - int *, int *, Stype_t, Dtype_t, Mtype_t); -extern void -sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *); -extern void -sCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int, - Stype_t, Dtype_t, Mtype_t); -extern void -sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *, - int *, int *, int *, int *, int *, - Stype_t, Dtype_t, Mtype_t); -extern void -sCopy_Dense_Matrix(int, int, double *, int, double *, int); - -extern void countnz (const int, int *, int *, int *, GlobalLU_t *); -extern void fixupL (const int, const int *, GlobalLU_t *); - -extern void sallocateA (int, int, double **, int **, int **); -extern void sgstrf (superlu_options_t*, SuperMatrix*, - int, int, int*, void *, int, int *, int *, - SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *); -extern int ssnode_dfs (const int, const int, const int *, const int *, - const int *, int *, int *, GlobalLU_t *); -extern int ssnode_bmod (const int, const int, double *, - double *, GlobalLU_t *, SuperLUStat_t*); -extern void spanel_dfs (const int, const int, const int, SuperMatrix *, - int *, int *, double *, int *, int *, int *, - int *, int *, int *, int *, GlobalLU_t *); -extern void spanel_bmod (const int, const int, const int, const int, - double *, double *, int *, int *, - GlobalLU_t *, SuperLUStat_t*); -extern int scolumn_dfs (const int, const int, int *, int *, int *, int *, - int *, int *, int *, int *, int *, GlobalLU_t *); -extern int scolumn_bmod (const int, const int, double *, - double *, int *, int *, int, - GlobalLU_t *, SuperLUStat_t*); -extern int scopy_to_ucol (int, int, int *, int *, int *, - double *, GlobalLU_t *); -extern int spivotL (const int, const double, int *, int *, - int *, int *, int *, GlobalLU_t *, SuperLUStat_t*); -extern void spruneL (const int, const int *, const int, const int, - const int *, const int *, int *, GlobalLU_t *); -extern void sreadmt (int *, int *, int *, double **, int **, int **); -extern void sGenXtrue (int, int, double *, int); -extern void sFillRHS (trans_t, int, double *, int, SuperMatrix *, - SuperMatrix *); -extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *, - SuperMatrix *, SuperLUStat_t*, int *); - - -/* Driver related */ - -extern void sgsequ (SuperMatrix *, double *, double *, double *, - double *, double *, int *); -extern void slaqgs (SuperMatrix *, double *, double *, double, - double, double, char *); -extern void sgscon (char *, SuperMatrix *, SuperMatrix *, - double, double *, SuperLUStat_t*, int *); -extern double sPivotGrowth(int, SuperMatrix *, int *, - SuperMatrix *, SuperMatrix *); -extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *, - SuperMatrix *, int *, int *, char *, double *, - double *, SuperMatrix *, SuperMatrix *, - double *, double *, SuperLUStat_t*, int *); - -extern int sp_strsv (char *, char *, char *, SuperMatrix *, - SuperMatrix *, double *, SuperLUStat_t*, int *); -extern int sp_sgemv (char *, double, SuperMatrix *, double *, - int, double, double *, int); - -extern int sp_sgemm (char *, int, double, - SuperMatrix *, double *, int, double, - double *, int); - -/* Memory-related */ -extern int sLUMemInit (fact_t, void *, int, int, int, int, int, - SuperMatrix *, SuperMatrix *, - GlobalLU_t *, int **, double **); -extern void sSetRWork (int, int, double *, double **, double **); -extern void sLUWorkFree (int *, double *, GlobalLU_t *); -extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *); - -extern double *doubleMalloc(int); -extern double *doubleCalloc(int); -extern int smemory_usage(const int, const int, const int, const int); -extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *); - -/* Auxiliary routines */ -extern void sreadhb(int *, int *, int *, double **, int **, int **); -extern void sCompRow_to_CompCol(int, int, int, double*, int*, int*, - double **, int **, int **); -extern void sfill (double *, int, double); -extern void sinf_norm_error (int, SuperMatrix *, double *); -extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *, - double, double, double *, double *, char *); - -/* Routines for debugging */ -extern void sPrint_CompCol_Matrix(char *, SuperMatrix *); -extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *); -extern void sPrint_Dense_Matrix(char *, SuperMatrix *); -extern void print_lu_col(char *, int, int, int *, GlobalLU_t *); -extern void check_tempv(int, double *); -extern int print_int_vec(char *what, int n, int *vec); - -extern int sp_symetree(int *acolst, int *acolend, int *arow, int n, int *parent); -extern void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu); // added to build with debug for blender - campbell -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_sSP_DEFS */ - diff --git a/intern/opennl/superlu/strsv.c b/intern/opennl/superlu/strsv.c deleted file mode 100644 index a34f5fb38a1..00000000000 --- a/intern/opennl/superlu/strsv.c +++ /dev/null @@ -1,323 +0,0 @@ -/** \file opennl/superlu/strsv.c - * \ingroup opennl - */ -int strsv_(char *, char *, char *, int *, double *, int *, double *, int *); - - -/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n, - double *a, int *lda, double *x, int *incx) -{ - - - /* Local variables */ - static int info; - static double temp; - static int i, j; - extern int lsame_(char *, char *); - static int ix, jx, kx; - extern /* Subroutine */ int xerbla_(char *, int *); - static int nounit; - - -/* Purpose - ======= - - STRSV solves one of the systems of equations - - A*x = b, or A'*x = b, - - where b and x are n element vectors and A is an n by n unit, or - non-unit, upper or lower triangular matrix. - - No test for singularity or near-singularity is included in this - routine. Such tests must be performed before calling this routine. - - Parameters - ========== - - UPLO - CHARACTER*1. - On entry, UPLO specifies whether the matrix is an upper or - lower triangular matrix as follows: - - UPLO = 'U' or 'u' A is an upper triangular matrix. - - UPLO = 'L' or 'l' A is a lower triangular matrix. - - Unchanged on exit. - - TRANS - CHARACTER*1. - On entry, TRANS specifies the equations to be solved as - follows: - - TRANS = 'N' or 'n' A*x = b. - - TRANS = 'T' or 't' A'*x = b. - - TRANS = 'C' or 'c' A'*x = b. - - Unchanged on exit. - - DIAG - CHARACTER*1. - On entry, DIAG specifies whether or not A is unit - triangular as follows: - - DIAG = 'U' or 'u' A is assumed to be unit triangular. - - DIAG = 'N' or 'n' A is not assumed to be unit - triangular. - - Unchanged on exit. - - N - INTEGER. - On entry, N specifies the order of the matrix A. - N must be at least zero. - Unchanged on exit. - - A - REAL array of DIMENSION ( LDA, n ). - Before entry with UPLO = 'U' or 'u', the leading n by n - upper triangular part of the array A must contain the upper - - triangular matrix and the strictly lower triangular part of - - A is not referenced. - Before entry with UPLO = 'L' or 'l', the leading n by n - lower triangular part of the array A must contain the lower - - triangular matrix and the strictly upper triangular part of - - A is not referenced. - Note that when DIAG = 'U' or 'u', the diagonal elements of - - A are not referenced either, but are assumed to be unity. - Unchanged on exit. - - LDA - INTEGER. - On entry, LDA specifies the first dimension of A as declared - - in the calling (sub) program. LDA must be at least - max( 1, n ). - Unchanged on exit. - - X - REAL array of dimension at least - ( 1 + ( n - 1 )*abs( INCX ) ). - Before entry, the incremented array X must contain the n - element right-hand side vector b. On exit, X is overwritten - - with the solution vector x. - - INCX - INTEGER. - On entry, INCX specifies the increment for the elements of - X. INCX must not be zero. - Unchanged on exit. - - - Level 2 Blas routine. - - -- Written on 22-October-1986. - Jack Dongarra, Argonne National Lab. - Jeremy Du Croz, Nag Central Office. - Sven Hammarling, Nag Central Office. - Richard Hanson, Sandia National Labs. - - - - Test the input parameters. - - - Parameter adjustments - Function Body */ -#define X(I) x[(I)-1] - -#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] - - info = 0; - if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { - info = 1; - } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") && - ! lsame_(trans, "C")) { - info = 2; - } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) { - info = 3; - } else if (*n < 0) { - info = 4; - } else if (*lda < ((1 > *n)? 1: *n)) { - info = 6; - } else if (*incx == 0) { - info = 8; - } - if (info != 0) { - xerbla_("STRSV ", &info); - return 0; - } - -/* Quick return if possible. */ - - if (*n == 0) { - return 0; - } - - nounit = lsame_(diag, "N"); - -/* Set up the start point in X if the increment is not unity. This - will be ( N - 1 )*INCX too small for descending loops. */ - - if (*incx <= 0) { - kx = 1 - (*n - 1) * *incx; - } else if (*incx != 1) { - kx = 1; - } - -/* Start the operations. In this version the elements of A are - accessed sequentially with one pass through A. */ - - if (lsame_(trans, "N")) { - -/* Form x := inv( A )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - if (X(j) != 0.f) { - if (nounit) { - X(j) /= A(j,j); - } - temp = X(j); - for (i = j - 1; i >= 1; --i) { - X(i) -= temp * A(i,j); -/* L10: */ - } - } -/* L20: */ - } - } else { - jx = kx + (*n - 1) * *incx; - for (j = *n; j >= 1; --j) { - if (X(jx) != 0.f) { - if (nounit) { - X(jx) /= A(j,j); - } - temp = X(jx); - ix = jx; - for (i = j - 1; i >= 1; --i) { - ix -= *incx; - X(ix) -= temp * A(i,j); -/* L30: */ - } - } - jx -= *incx; -/* L40: */ - } - } - } else { - if (*incx == 1) { - for (j = 1; j <= *n; ++j) { - if (X(j) != 0.f) { - if (nounit) { - X(j) /= A(j,j); - } - temp = X(j); - for (i = j + 1; i <= *n; ++i) { - X(i) -= temp * A(i,j); -/* L50: */ - } - } -/* L60: */ - } - } else { - jx = kx; - for (j = 1; j <= *n; ++j) { - if (X(jx) != 0.f) { - if (nounit) { - X(jx) /= A(j,j); - } - temp = X(jx); - ix = jx; - for (i = j + 1; i <= *n; ++i) { - ix += *incx; - X(ix) -= temp * A(i,j); -/* L70: */ - } - } - jx += *incx; -/* L80: */ - } - } - } - } else { - -/* Form x := inv( A' )*x. */ - - if (lsame_(uplo, "U")) { - if (*incx == 1) { - for (j = 1; j <= *n; ++j) { - temp = X(j); - for (i = 1; i <= j-1; ++i) { - temp -= A(i,j) * X(i); -/* L90: */ - } - if (nounit) { - temp /= A(j,j); - } - X(j) = temp; -/* L100: */ - } - } else { - jx = kx; - for (j = 1; j <= *n; ++j) { - temp = X(jx); - ix = kx; - for (i = 1; i <= j-1; ++i) { - temp -= A(i,j) * X(ix); - ix += *incx; -/* L110: */ - } - if (nounit) { - temp /= A(j,j); - } - X(jx) = temp; - jx += *incx; -/* L120: */ - } - } - } else { - if (*incx == 1) { - for (j = *n; j >= 1; --j) { - temp = X(j); - for (i = *n; i >= j+1; --i) { - temp -= A(i,j) * X(i); -/* L130: */ - } - if (nounit) { - temp /= A(j,j); - } - X(j) = temp; -/* L140: */ - } - } else { - kx += (*n - 1) * *incx; - jx = kx; - for (j = *n; j >= 1; --j) { - temp = X(jx); - ix = kx; - for (i = *n; i >= j+1; --i) { - temp -= A(i,j) * X(ix); - ix -= *incx; -/* L150: */ - } - if (nounit) { - temp /= A(j,j); - } - X(jx) = temp; - jx -= *incx; -/* L160: */ - } - } - } - } - - return 0; - -/* End of STRSV . */ - -} /* strsv_ */ - diff --git a/intern/opennl/superlu/superlu_timer.c b/intern/opennl/superlu/superlu_timer.c deleted file mode 100644 index abcafe8fa1f..00000000000 --- a/intern/opennl/superlu/superlu_timer.c +++ /dev/null @@ -1,61 +0,0 @@ -/** \file opennl/superlu/superlu_timer.c - * \ingroup opennl - */ -/* - * Purpose - * ======= - * Returns the time in seconds used by the process. - * - * Note: the timer function call is machine dependent. Use conditional - * compilation to choose the appropriate function. - * - */ - -/* We want this flag, safer than putting in build system */ -#define NO_TIMER - -double SuperLU_timer_ (); - -#ifdef SUN -/* - * It uses the system call gethrtime(3C), which is accurate to - * nanoseconds. -*/ -#include <sys/time.h> - -double SuperLU_timer_() { - return ( (double)gethrtime() / 1e9 ); -} - -#else - -#ifndef NO_TIMER -#include <sys/types.h> -#include <sys/times.h> -#include <time.h> -#include <sys/time.h> -#endif - -#ifndef CLK_TCK -#define CLK_TCK 60 -#endif -double SuperLU_timer_(void); - -double SuperLU_timer_(void) -{ -#ifdef NO_TIMER - /* no sys/times.h on WIN32 */ - double tmp; - tmp = 0.0; -#else - struct tms use; - double tmp; - times(&use); - tmp = use.tms_utime; - tmp += use.tms_stime; -#endif - return (double)(tmp) / CLK_TCK; -} - -#endif - diff --git a/intern/opennl/superlu/supermatrix.h b/intern/opennl/superlu/supermatrix.h deleted file mode 100644 index 74dfde4df7c..00000000000 --- a/intern/opennl/superlu/supermatrix.h +++ /dev/null @@ -1,143 +0,0 @@ -/** \file opennl/superlu/supermatrix.h - * \ingroup opennl - */ -#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */ -#define __SUPERLU_SUPERMATRIX - -/******************************************** - * The matrix types are defined as follows. * - ********************************************/ -typedef enum { - SLU_NC, /* column-wise, no supernode */ - SLU_NR, /* row-wize, no supernode */ - SLU_SC, /* column-wise, supernode */ - SLU_SR, /* row-wise, supernode */ - SLU_NCP, /* column-wise, column-permuted, no supernode - (The consecutive columns of nonzeros, after permutation, - may not be stored contiguously.) */ - SLU_DN /* Fortran style column-wise storage for dense matrix */ -} Stype_t; - -typedef enum { - SLU_S, /* single */ - SLU_D, /* double */ - SLU_C, /* single complex */ - SLU_Z /* double complex */ -} Dtype_t; - -typedef enum { - SLU_GE, /* general */ - SLU_TRLU, /* lower triangular, unit diagonal */ - SLU_TRUU, /* upper triangular, unit diagonal */ - SLU_TRL, /* lower triangular */ - SLU_TRU, /* upper triangular */ - SLU_SYL, /* symmetric, store lower half */ - SLU_SYU, /* symmetric, store upper half */ - SLU_HEL, /* Hermitian, store lower half */ - SLU_HEU /* Hermitian, store upper half */ -} Mtype_t; - -typedef struct { - Stype_t Stype; /* Storage type: interprets the storage structure - pointed to by *Store. */ - Dtype_t Dtype; /* Data type. */ - Mtype_t Mtype; /* Matrix type: describes the mathematical property of - the matrix. */ - int_t nrow; /* number of rows */ - int_t ncol; /* number of columns */ - void *Store; /* pointer to the actual storage of the matrix */ -} SuperMatrix; - -/*********************************************** - * The storage schemes are defined as follows. * - ***********************************************/ - -/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */ -typedef struct { - int_t nnz; /* number of nonzeros in the matrix */ - void *nzval; /* pointer to array of nonzero values, packed by column */ - int_t *rowind; /* pointer to array of row indices of the nonzeros */ - int_t *colptr; /* pointer to array of beginning of columns in nzval[] - and rowind[] */ - /* Note: - Zero-based indexing is used; - colptr[] has ncol+1 entries, the last one pointing - beyond the last column, so that colptr[ncol] = nnz. */ -} NCformat; - -/* Stype == NR (Also known as row compressed storage (RCS). */ -typedef struct { - int_t nnz; /* number of nonzeros in the matrix */ - void *nzval; /* pointer to array of nonzero values, packed by row */ - int_t *colind; /* pointer to array of column indices of the nonzeros */ - int_t *rowptr; /* pointer to array of beginning of rows in nzval[] - and colind[] */ - /* Note: - Zero-based indexing is used; - nzval[] and colind[] are of the same length, nnz; - rowptr[] has nrow+1 entries, the last one pointing - beyond the last column, so that rowptr[nrow] = nnz. */ -} NRformat; - -/* Stype == SC */ -typedef struct { - int_t nnz; /* number of nonzeros in the matrix */ - int_t nsuper; /* number of supernodes, minus 1 */ - void *nzval; /* pointer to array of nonzero values, packed by column */ - int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */ - int_t *rowind; /* pointer to array of compressed row indices of - rectangular supernodes */ - int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */ - int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column - j belongs; mapping from column to supernode number. */ - int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th - supernode; mapping from supernode number to column. - e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12) - sup_to_col: 0 1 2 4 7 12 (nsuper=4) */ - /* Note: - Zero-based indexing is used; - nzval_colptr[], rowind_colptr[], col_to_sup and - sup_to_col[] have ncol+1 entries, the last one - pointing beyond the last column. - For col_to_sup[], only the first ncol entries are - defined. For sup_to_col[], only the first nsuper+2 - entries are defined. */ -} SCformat; - -/* Stype == NCP */ -typedef struct { - int_t nnz; /* number of nonzeros in the matrix */ - void *nzval; /* pointer to array of nonzero values, packed by column */ - int_t *rowind;/* pointer to array of row indices of the nonzeros */ - /* Note: nzval[]/rowind[] always have the same length */ - int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[] - and rowind[] */ - int_t *colend;/* colend[j] points to one past the last element of column - j in nzval[] and rowind[] */ - /* Note: - Zero-based indexing is used; - The consecutive columns of the nonzeros may not be - contiguous in storage, because the matrix has been - postmultiplied by a column permutation matrix. */ -} NCPformat; - -/* Stype == DN */ -typedef struct { - int_t lda; /* leading dimension */ - void *nzval; /* array of size lda*ncol to represent a dense matrix */ -} DNformat; - - - -/********************************************************* - * Macros used for easy access of sparse matrix entries. * - *********************************************************/ -#define L_SUB_START(col) ( Lstore->rowind_colptr[col] ) -#define L_SUB(ptr) ( Lstore->rowind[ptr] ) -#define L_NZ_START(col) ( Lstore->nzval_colptr[col] ) -#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] ) -#define U_NZ_START(col) ( Ustore->colptr[col] ) -#define U_SUB(ptr) ( Ustore->rowind[ptr] ) - - -#endif /* __SUPERLU_SUPERMATRIX */ diff --git a/intern/opennl/superlu/sutil.c b/intern/opennl/superlu/sutil.c deleted file mode 100644 index 52728e47f56..00000000000 --- a/intern/opennl/superlu/sutil.c +++ /dev/null @@ -1,485 +0,0 @@ -/** \file opennl/superlu/sutil.c - * \ingroup opennl - */ - -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include <math.h> -#include "ssp_defs.h" - -/* prototypes */ -void sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu); -void scheck_tempv(int n, double *tempv); -void sPrintPerf(SuperMatrix *, SuperMatrix *, mem_usage_t *,double , double , - double *, double *, char *, SuperLUStat_t *); -int print_double_vec(char *what, int n, double *vec); -/* ********** */ - -void -sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, - double *nzval, int *rowind, int *colptr, - Stype_t stype, Dtype_t dtype, Mtype_t mtype) -{ - NCformat *Astore; - - A->Stype = stype; - A->Dtype = dtype; - A->Mtype = mtype; - A->nrow = m; - A->ncol = n; - A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) ); - if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); - Astore = A->Store; - Astore->nnz = nnz; - Astore->nzval = nzval; - Astore->rowind = rowind; - Astore->colptr = colptr; -} - -void -sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, - double *nzval, int *colind, int *rowptr, - Stype_t stype, Dtype_t dtype, Mtype_t mtype) -{ - NRformat *Astore; - - A->Stype = stype; - A->Dtype = dtype; - A->Mtype = mtype; - A->nrow = m; - A->ncol = n; - A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) ); - if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store"); - Astore = A->Store; - Astore->nnz = nnz; - Astore->nzval = nzval; - Astore->colind = colind; - Astore->rowptr = rowptr; -} - -/* Copy matrix A into matrix B. */ -void -sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B) -{ - NCformat *Astore, *Bstore; - int ncol, nnz, i; - - B->Stype = A->Stype; - B->Dtype = A->Dtype; - B->Mtype = A->Mtype; - B->nrow = A->nrow;; - B->ncol = ncol = A->ncol; - Astore = (NCformat *) A->Store; - Bstore = (NCformat *) B->Store; - Bstore->nnz = nnz = Astore->nnz; - for (i = 0; i < nnz; ++i) - ((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i]; - for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i]; - for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i]; -} - - -void -sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx, - Stype_t stype, Dtype_t dtype, Mtype_t mtype) -{ - DNformat *Xstore; - - X->Stype = stype; - X->Dtype = dtype; - X->Mtype = mtype; - X->nrow = m; - X->ncol = n; - X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) ); - if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store"); - Xstore = (DNformat *) X->Store; - Xstore->lda = ldx; - Xstore->nzval = (double *) x; -} - -void -sCopy_Dense_Matrix(int M, int N, double *X, int ldx, - double *Y, int ldy) -{ -/* - * - * Purpose - * ======= - * - * Copies a two-dimensional matrix X to another matrix Y. - */ - int i, j; - - for (j = 0; j < N; ++j) - for (i = 0; i < M; ++i) - Y[i + j*ldy] = X[i + j*ldx]; -} - -void -sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, - double *nzval, int *nzval_colptr, int *rowind, - int *rowind_colptr, int *col_to_sup, int *sup_to_col, - Stype_t stype, Dtype_t dtype, Mtype_t mtype) -{ - SCformat *Lstore; - - L->Stype = stype; - L->Dtype = dtype; - L->Mtype = mtype; - L->nrow = m; - L->ncol = n; - L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) ); - if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store"); - Lstore = L->Store; - Lstore->nnz = nnz; - Lstore->nsuper = col_to_sup[n]; - Lstore->nzval = nzval; - Lstore->nzval_colptr = nzval_colptr; - Lstore->rowind = rowind; - Lstore->rowind_colptr = rowind_colptr; - Lstore->col_to_sup = col_to_sup; - Lstore->sup_to_col = sup_to_col; - -} - - -/* - * Convert a row compressed storage into a column compressed storage. - */ -void -sCompRow_to_CompCol(int m, int n, int nnz, - double *a, int *colind, int *rowptr, - double **at, int **rowind, int **colptr) -{ - register int i, j, col, relpos; - int *marker; - - /* Allocate storage for another copy of the matrix. */ - *at = (double *) doubleMalloc(nnz); - *rowind = (int *) intMalloc(nnz); - *colptr = (int *) intMalloc(n+1); - marker = (int *) intCalloc(n); - - /* Get counts of each column of A, and set up column pointers */ - for (i = 0; i < m; ++i) - for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]]; - (*colptr)[0] = 0; - for (j = 0; j < n; ++j) { - (*colptr)[j+1] = (*colptr)[j] + marker[j]; - marker[j] = (*colptr)[j]; - } - - /* Transfer the matrix into the compressed column storage. */ - for (i = 0; i < m; ++i) { - for (j = rowptr[i]; j < rowptr[i+1]; ++j) { - col = colind[j]; - relpos = marker[col]; - (*rowind)[relpos] = i; - (*at)[relpos] = a[j]; - ++marker[col]; - } - } - - SUPERLU_FREE(marker); -} - - -void -sPrint_CompCol_Matrix(char *what, SuperMatrix *A) -{ - NCformat *Astore; - register int i,n; - double *dp; - - printf("\nCompCol matrix %s:\n", what); - printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - n = A->ncol; - Astore = (NCformat *) A->Store; - dp = (double *) Astore->nzval; - printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz); - printf("nzval: "); - for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]); - printf("\nrowind: "); - for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]); - printf("\ncolptr: "); - for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]); - printf("\n"); - fflush(stdout); -} - -void -sPrint_SuperNode_Matrix(char *what, SuperMatrix *A) -{ - SCformat *Astore; - register int i, j, k, c, d, n, nsup; - double *dp; - int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr; - - printf("\nSuperNode matrix %s:\n", what); - printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - n = A->ncol; - Astore = (SCformat *) A->Store; - dp = (double *) Astore->nzval; - col_to_sup = Astore->col_to_sup; - sup_to_col = Astore->sup_to_col; - rowind_colptr = Astore->rowind_colptr; - rowind = Astore->rowind; - printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", - A->nrow,A->ncol,Astore->nnz,Astore->nsuper); - printf("nzval:\n"); - for (k = 0; k <= Astore->nsuper; ++k) { - c = sup_to_col[k]; - nsup = sup_to_col[k+1] - c; - for (j = c; j < c + nsup; ++j) { - d = Astore->nzval_colptr[j]; - for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) { - printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]); - } - } - } -#if 0 - for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]); -#endif - printf("\nnzval_colptr: "); - for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]); - printf("\nrowind: "); - for (i = 0; i < Astore->rowind_colptr[n]; ++i) - printf("%d ", Astore->rowind[i]); - printf("\nrowind_colptr: "); - for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]); - printf("\ncol_to_sup: "); - for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]); - printf("\nsup_to_col: "); - for (i = 0; i <= Astore->nsuper+1; ++i) - printf("%d ", sup_to_col[i]); - printf("\n"); - fflush(stdout); -} - -void -sPrint_Dense_Matrix(char *what, SuperMatrix *A) -{ - DNformat *Astore; - register int i; - double *dp; - - printf("\nDense matrix %s:\n", what); - printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype); - Astore = (DNformat *) A->Store; - dp = (double *) Astore->nzval; - printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda); - printf("\nnzval: "); - for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]); - printf("\n"); - fflush(stdout); -} - -/* - * Diagnostic print of column "jcol" in the U/L factor. - */ -void -sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu) -{ - int i, k, fsupc; - int *xsup, *supno; - int *xlsub, *lsub; - double *lusup; - int *xlusup; - double *ucol; - int *usub, *xusub; - - xsup = Glu->xsup; - supno = Glu->supno; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - lusup = Glu->lusup; - xlusup = Glu->xlusup; - ucol = Glu->ucol; - usub = Glu->usub; - xusub = Glu->xusub; - - printf("%s", msg); - printf("col %d: pivrow %d, supno %d, xprune %d\n", - jcol, pivrow, supno[jcol], xprune[jcol]); - - printf("\tU-col:\n"); - for (i = xusub[jcol]; i < xusub[jcol+1]; i++) - printf("\t%d%10.4f\n", usub[i], ucol[i]); - printf("\tL-col in rectangular snode:\n"); - fsupc = xsup[supno[jcol]]; /* first col of the snode */ - i = xlsub[fsupc]; - k = xlusup[jcol]; - while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) { - printf("\t%d\t%10.4f\n", lsub[i], lusup[k]); - i++; k++; - } - fflush(stdout); -} - - -/* - * Check whether tempv[] == 0. This should be true before and after - * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". - */ -void scheck_tempv(int n, double *tempv) -{ - int i; - - for (i = 0; i < n; i++) { - if (tempv[i] != 0.0) - { - fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]); - ABORT("scheck_tempv"); - } - } -} - - -void -sGenXtrue(int n, int nrhs, double *x, int ldx) -{ - int i, j; - for (j = 0; j < nrhs; ++j) - for (i = 0; i < n; ++i) { - x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/ - } -} - -/* - * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's - */ -void -sFillRHS(trans_t trans, int nrhs, double *x, int ldx, - SuperMatrix *A, SuperMatrix *B) -{ - DNformat *Bstore; - double *rhs; - double one = 1.0; - double zero = 0.0; - int ldc; - char transc[1]; - - Bstore = B->Store; - rhs = Bstore->nzval; - ldc = Bstore->lda; - - if ( trans == NOTRANS ) *(unsigned char *)transc = 'N'; - else *(unsigned char *)transc = 'T'; - - sp_sgemm(transc, nrhs, one, A, - x, ldx, zero, rhs, ldc); - -} - -/* - * Fills a double precision array with a given value. - */ -void -sfill(double *a, int alen, double dval) -{ - register int i; - for (i = 0; i < alen; i++) a[i] = dval; -} - - - -/* - * Check the inf-norm of the error vector - */ -void sinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue) -{ - DNformat *Xstore; - double err, xnorm; - double *Xmat, *soln_work; - int i, j; - - Xstore = X->Store; - Xmat = Xstore->nzval; - - for (j = 0; j < nrhs; j++) { - soln_work = &Xmat[j*Xstore->lda]; - err = xnorm = 0.0; - for (i = 0; i < X->nrow; i++) { - err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i])); - xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i])); - } - err = err / xnorm; - printf("||X - Xtrue||/||X|| = %e\n", err); - } -} - - - -/* Print performance of the code. */ -void -sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage, - double rpg, double rcond, double *ferr, - double *berr, char *equed, SuperLUStat_t *stat) -{ - SCformat *Lstore; - NCformat *Ustore; - double *utime; - flops_t *ops; - - utime = stat->utime; - ops = stat->ops; - - if ( utime[FACT] != 0. ) - printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], - ops[FACT]*1e-6/utime[FACT]); - printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]); - if ( utime[SOLVE] != 0. ) - printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE], - ops[SOLVE]*1e-6/utime[SOLVE]); - - Lstore = (SCformat *) L->Store; - Ustore = (NCformat *) U->Store; - printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz); - printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz); - printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); - - printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", - mem_usage->for_lu/1e6, mem_usage->total_needed/1e6, - mem_usage->expansions); - - printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n"); - printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n", - utime[FACT], ops[FACT]*1e-6/utime[FACT], - utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE], - utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]); - - printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n"); - printf("NUM:\t%e\t%e\t%e\t%e\t%s\n", - rpg, rcond, ferr[0], berr[0], equed); - -} - - - - -int print_double_vec(char *what, int n, double *vec) -{ - int i; - printf("%s: n %d\n", what, n); - for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]); - return 0; -} - diff --git a/intern/opennl/superlu/util.c b/intern/opennl/superlu/util.c deleted file mode 100644 index 96f404d886b..00000000000 --- a/intern/opennl/superlu/util.c +++ /dev/null @@ -1,400 +0,0 @@ -/** \file opennl/superlu/util.c - * \ingroup opennl - */ -/* - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - */ -/* - Copyright (c) 1994 by Xerox Corporation. All rights reserved. - - THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - - Permission is hereby granted to use or copy this program for any - purpose, provided the above notices are retained on all copies. - Permission to modify the code and to distribute modified code is - granted, provided the above notices are retained, and a notice that - the code was modified is included with the above copyright notice. -*/ - -#include <math.h> -#include "ssp_defs.h" -#include "util.h" - -/* prototypes */ -flops_t LUFactFlops(SuperLUStat_t *stat); -flops_t LUSolveFlops(SuperLUStat_t *stat); -double SpaSize(int n, int np, double sum_npw); -double DenseSize(int n, double sum_nw); - -/* - * Global statistics variale - */ - -void superlu_abort_and_exit(char* msg) -{ - fprintf(stderr, "%s", msg); - exit (-1); -} - -/* - * Set the default values for the options argument. - */ -void set_default_options(superlu_options_t *options) -{ - options->Fact = DOFACT; - options->Equil = YES; - options->ColPerm = COLAMD; - options->DiagPivotThresh = 1.0; - options->Trans = NOTRANS; - options->IterRefine = NOREFINE; - options->SymmetricMode = NO; - options->PivotGrowth = NO; - options->ConditionNumber = NO; - options->PrintStat = YES; -} - -/* Deallocate the structure pointing to the actual storage of the matrix. */ -void -Destroy_SuperMatrix_Store(SuperMatrix *A) -{ - SUPERLU_FREE ( A->Store ); -} - -void -Destroy_CompCol_Matrix(SuperMatrix *A) -{ - SUPERLU_FREE( ((NCformat *)A->Store)->rowind ); - SUPERLU_FREE( ((NCformat *)A->Store)->colptr ); - SUPERLU_FREE( ((NCformat *)A->Store)->nzval ); - SUPERLU_FREE( A->Store ); -} - -void -Destroy_CompRow_Matrix(SuperMatrix *A) -{ - SUPERLU_FREE( ((NRformat *)A->Store)->colind ); - SUPERLU_FREE( ((NRformat *)A->Store)->rowptr ); - SUPERLU_FREE( ((NRformat *)A->Store)->nzval ); - SUPERLU_FREE( A->Store ); -} - -void -Destroy_SuperNode_Matrix(SuperMatrix *A) -{ - SUPERLU_FREE ( ((SCformat *)A->Store)->rowind ); - SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr ); - SUPERLU_FREE ( ((SCformat *)A->Store)->nzval ); - SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr ); - SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup ); - SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col ); - SUPERLU_FREE ( A->Store ); -} - -/* A is of type Stype==NCP */ -void -Destroy_CompCol_Permuted(SuperMatrix *A) -{ - SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg ); - SUPERLU_FREE ( ((NCPformat *)A->Store)->colend ); - SUPERLU_FREE ( A->Store ); -} - -/* A is of type Stype==DN */ -void -Destroy_Dense_Matrix(SuperMatrix *A) -{ - DNformat* Astore = A->Store; - SUPERLU_FREE (Astore->nzval); - SUPERLU_FREE ( A->Store ); -} - -/* - * Reset repfnz[] for the current column - */ -void -resetrep_col (const int nseg, const int *segrep, int *repfnz) -{ - int i, irep; - - for (i = 0; i < nseg; i++) { - irep = segrep[i]; - repfnz[irep] = EMPTY; - } -} - - -/* - * Count the total number of nonzeros in factors L and U, and in the - * symmetrically reduced L. - */ -void -countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu) -{ - int nsuper, fsupc, i, j; - int nnzL0, jlen, irep; - int *xsup, *xlsub; - - xsup = Glu->xsup; - xlsub = Glu->xlsub; - *nnzL = 0; - *nnzU = (Glu->xusub)[n]; - nnzL0 = 0; - nsuper = (Glu->supno)[n]; - - if ( n <= 0 ) return; - - /* - * For each supernode - */ - for (i = 0; i <= nsuper; i++) { - fsupc = xsup[i]; - jlen = xlsub[fsupc+1] - xlsub[fsupc]; - - for (j = fsupc; j < xsup[i+1]; j++) { - *nnzL += jlen; - *nnzU += j - fsupc + 1; - jlen--; - } - irep = xsup[i+1] - 1; - nnzL0 += xprune[irep] - xlsub[irep]; - } - - /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/ -} - - - -/* - * Fix up the data storage lsub for L-subscripts. It removes the subscript - * sets for structural pruning, and applies permuation to the remaining - * subscripts. - */ -void -fixupL(const int n, const int *perm_r, GlobalLU_t *Glu) -{ - register int nsuper, fsupc, nextl, i, j, k, jstrt; - int *xsup, *lsub, *xlsub; - - if ( n <= 1 ) return; - - xsup = Glu->xsup; - lsub = Glu->lsub; - xlsub = Glu->xlsub; - nextl = 0; - nsuper = (Glu->supno)[n]; - - /* - * For each supernode ... - */ - for (i = 0; i <= nsuper; i++) { - fsupc = xsup[i]; - jstrt = xlsub[fsupc]; - xlsub[fsupc] = nextl; - for (j = jstrt; j < xlsub[fsupc+1]; j++) { - lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */ - nextl++; - } - for (k = fsupc+1; k < xsup[i+1]; k++) - xlsub[k] = nextl; /* Other columns in supernode i */ - - } - - xlsub[n] = nextl; -} - - -/* - * Diagnostic print of segment info after panel_dfs(). - */ -void print_panel_seg(int n, int w, int jcol, int nseg, - int *segrep, int *repfnz) -{ - int j, k; - - for (j = jcol; j < jcol+w; j++) { - printf("\tcol %d:\n", j); - for (k = 0; k < nseg; k++) - printf("\t\tseg %d, segrep %d, repfnz %d\n", k, - segrep[k], repfnz[(j-jcol)*n + segrep[k]]); - } - -} - - -void -StatInit(SuperLUStat_t *stat) -{ - register int i, w, panel_size, relax; - - panel_size = sp_ienv(1); - relax = sp_ienv(2); - w = SUPERLU_MAX(panel_size, relax); - stat->panel_histo = intCalloc(w+1); - stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double)); - if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime"); - stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t)); - if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops"); - for (i = 0; i < NPHASES; ++i) { - stat->utime[i] = 0.; - stat->ops[i] = 0.; - } -} - - -void -StatPrint(SuperLUStat_t *stat) -{ - double *utime; - flops_t *ops; - - utime = stat->utime; - ops = stat->ops; - printf("Factor time = %8.2f\n", utime[FACT]); - if ( utime[FACT] != 0.0 ) - printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT], - ops[FACT]*1e-6/utime[FACT]); - - printf("Solve time = %8.2f\n", utime[SOLVE]); - if ( utime[SOLVE] != 0.0 ) - printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE], - ops[SOLVE]*1e-6/utime[SOLVE]); - -} - - -void -StatFree(SuperLUStat_t *stat) -{ - SUPERLU_FREE(stat->panel_histo); - SUPERLU_FREE(stat->utime); - SUPERLU_FREE(stat->ops); -} - - -flops_t -LUFactFlops(SuperLUStat_t *stat) -{ - return (stat->ops[FACT]); -} - -flops_t -LUSolveFlops(SuperLUStat_t *stat) -{ - return (stat->ops[SOLVE]); -} - - - - - -/* - * Fills an integer array with a given value. - */ -void ifill(int *a, int alen, int ival) -{ - register int i; - for (i = 0; i < alen; i++) a[i] = ival; -} - - - -/* - * Get the statistics of the supernodes - */ -#define NBUCKS 10 -static int max_sup_size; - -void super_stats(int nsuper, int *xsup) -{ - register int nsup1 = 0; - int i, isize, whichb, bl, bh; - int bucket[NBUCKS]; - - max_sup_size = 0; - - for (i = 0; i <= nsuper; i++) { - isize = xsup[i+1] - xsup[i]; - if ( isize == 1 ) nsup1++; - if ( max_sup_size < isize ) max_sup_size = isize; - } - - printf(" Supernode statistics:\n\tno of super = %d\n", nsuper+1); - printf("\tmax supernode size = %d\n", max_sup_size); - printf("\tno of size 1 supernodes = %d\n", nsup1); - - /* Histogram of the supernode sizes */ - ifill (bucket, NBUCKS, 0); - - for (i = 0; i <= nsuper; i++) { - isize = xsup[i+1] - xsup[i]; - whichb = (double) isize / max_sup_size * NBUCKS; - if (whichb >= NBUCKS) whichb = NBUCKS - 1; - bucket[whichb]++; - } - - printf("\tHistogram of supernode sizes:\n"); - for (i = 0; i < NBUCKS; i++) { - bl = (double) i * max_sup_size / NBUCKS; - bh = (double) (i+1) * max_sup_size / NBUCKS; - printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]); - } - -} - - -double SpaSize(int n, int np, double sum_npw) -{ - return (sum_npw*8 + np*8 + n*4)/1024.; -} - -double DenseSize(int n, double sum_nw) -{ - return (sum_nw*8 + n*8)/1024.;; -} - - - -/* - * Check whether repfnz[] == EMPTY after reset. - */ -void check_repfnz(int n, int w, int jcol, int *repfnz) -{ - int jj, k; - - for (jj = jcol; jj < jcol+w; jj++) - for (k = 0; k < n; k++) - if ( repfnz[(jj-jcol)*n + k] != EMPTY ) { - fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj, - k, repfnz[(jj-jcol)*n + k]); - ABORT("check_repfnz"); - } -} - - -/* Print a summary of the testing results. */ -void -PrintSumm(char *type, int nfail, int nrun, int nerrs) -{ - if ( nfail > 0 ) - printf("%3s driver: %d out of %d tests failed to pass the threshold\n", - type, nfail, nrun); - else - printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun); - - if ( nerrs > 0 ) - printf("%6d error messages recorded\n", nerrs); -} - - -int print_int_vec(char *what, int n, int *vec) -{ - int i; - printf("%s\n", what); - for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]); - return 0; -} diff --git a/intern/opennl/superlu/util.h b/intern/opennl/superlu/util.h deleted file mode 100644 index da9a8dbe4e3..00000000000 --- a/intern/opennl/superlu/util.h +++ /dev/null @@ -1,271 +0,0 @@ -/** \file opennl/superlu/util.h - * \ingroup opennl - */ -#ifndef __SUPERLU_UTIL /* allow multiple inclusions */ -#define __SUPERLU_UTIL - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -/* -#ifndef __STDC__ -#include <malloc.h> -#endif -*/ -#include <assert.h> - -/*********************************************************************** - * Macros - ***********************************************************************/ -#define FIRSTCOL_OF_SNODE(i) (xsup[i]) -/* No of marker arrays used in the symbolic factorization, - each of size n */ -#define NO_MARKER 3 -#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) ) - -#ifndef USER_ABORT -#define USER_ABORT(msg) \ - { fprintf(stderr, "%s", msg); exit (-1); } -#endif - -#define ABORT(err_msg) \ - { char msg[256];\ - sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\ - USER_ABORT(msg); } - - -#ifndef USER_MALLOC -#if 1 -#define USER_MALLOC(size) superlu_malloc(size) -#else -/* The following may check out some uninitialized data */ -#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size) -#endif -#endif - -#define SUPERLU_MALLOC(size) USER_MALLOC(size) - -#ifndef USER_FREE -#define USER_FREE(addr) superlu_free(addr) -#endif - -#define SUPERLU_FREE(addr) USER_FREE(addr) - -#define CHECK_MALLOC(where) { \ - extern int superlu_malloc_total; \ - printf("%s: malloc_total %d Bytes\n", \ - where, superlu_malloc_total); \ -} - -#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) ) -#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) ) - -/*********************************************************************** - * Constants - ***********************************************************************/ -#define EMPTY (-1) -/*#define NO (-1)*/ -#define FALSE 0 -#define TRUE 1 - -/*********************************************************************** - * Enumerate types - ***********************************************************************/ -typedef enum {NO, YES} yes_no_t; -typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t; -typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t; -typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t; -typedef enum {NOTRANS, TRANS, CONJ} trans_t; -typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t; -typedef enum {NOREFINE, SINGLE=1, SLU_DOUBLE, EXTRA} IterRefine_t; -typedef enum {LUSUP, UCOL, LSUB, USUB} MemType; -typedef enum {HEAD, TAIL} stack_end_t; -typedef enum {SYSTEM, USER} LU_space_t; - -/* - * The following enumerate type is used by the statistics variable - * to keep track of flop count and time spent at various stages. - * - * Note that not all of the fields are disjoint. - */ -typedef enum { - COLPERM, /* find a column ordering that minimizes fills */ - RELAX, /* find artificial supernodes */ - ETREE, /* compute column etree */ - EQUIL, /* equilibrate the original matrix */ - FACT, /* perform LU factorization */ - RCOND, /* estimate reciprocal condition number */ - SOLVE, /* forward and back solves */ - REFINE, /* perform iterative refinement */ - SLU_FLOAT, /* time spent in doubleing-point operations */ - TRSV, /* fraction of FACT spent in xTRSV */ - GEMV, /* fraction of FACT spent in xGEMV */ - FERR, /* estimate error bounds after iterative refinement */ - NPHASES /* total number of phases */ -} PhaseType; - - -/*********************************************************************** - * Type definitions - ***********************************************************************/ -typedef double flops_t; -typedef unsigned char Logical; - -/* - *-- This contains the options used to control the solve process. - * - * Fact (fact_t) - * Specifies whether or not the factored form of the matrix - * A is supplied on entry, and if not, how the matrix A should - * be factorizaed. - * = DOFACT: The matrix A will be factorized from scratch, and the - * factors will be stored in L and U. - * = SamePattern: The matrix A will be factorized assuming - * that a factorization of a matrix with the same sparsity - * pattern was performed prior to this one. Therefore, this - * factorization will reuse column permutation vector - * ScalePermstruct->perm_c and the column elimination tree - * LUstruct->etree. - * = SamePattern_SameRowPerm: The matrix A will be factorized - * assuming that a factorization of a matrix with the same - * sparsity pattern and similar numerical values was performed - * prior to this one. Therefore, this factorization will reuse - * both row and column scaling factors R and C, and the - * both row and column permutation vectors perm_r and perm_c, - * distributed data structure set up from the previous symbolic - * factorization. - * = FACTORED: On entry, L, U, perm_r and perm_c contain the - * factored form of A. If DiagScale is not NOEQUIL, the matrix - * A has been equilibrated with scaling factors R and C. - * - * Equil (yes_no_t) - * Specifies whether to equilibrate the system (scale A's row and - * columns to have unit norm). - * - * ColPerm (colperm_t) - * Specifies what type of column permutation to use to reduce fill. - * = NATURAL: use the natural ordering - * = MMD_ATA: use minimum degree ordering on structure of A'*A - * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A - * = COLAMD: use approximate minimum degree column ordering - * = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[] - * - * Trans (trans_t) - * Specifies the form of the system of equations: - * = NOTRANS: A * X = B (No transpose) - * = TRANS: A**T * X = B (Transpose) - * = CONJ: A**H * X = B (Transpose) - * - * IterRefine (IterRefine_t) - * Specifies whether to perform iterative refinement. - * = NO: no iterative refinement - * = WorkingPrec: perform iterative refinement in working precision - * = ExtraPrec: perform iterative refinement in extra precision - * - * PrintStat (yes_no_t) - * Specifies whether to print the solver's statistics. - * - * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU) - * Specifies the threshold used for a diagonal entry to be an - * acceptable pivot. - * - * PivotGrowth (yes_no_t) - * Specifies whether to compute the reciprocal pivot growth. - * - * ConditionNumber (ues_no_t) - * Specifies whether to compute the reciprocal condition number. - * - * RowPerm (rowperm_t) (only for SuperLU_DIST) - * Specifies whether to permute rows of the original matrix. - * = NO: not to permute the rows - * = LargeDiag: make the diagonal large relative to the off-diagonal - * = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[] - * - * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST) - * Specifies whether to replace the tiny diagonals by - * sqrt(epsilon)*||A|| during LU factorization. - * - * SolveInitialized (yes_no_t) (only for SuperLU_DIST) - * Specifies whether the initialization has been performed to the - * triangular solve. - * - * RefineInitialized (yes_no_t) (only for SuperLU_DIST) - * Specifies whether the initialization has been performed to the - * sparse matrix-vector multiplication routine needed in iterative - * refinement. - */ -typedef struct { - fact_t Fact; - yes_no_t Equil; - colperm_t ColPerm; - trans_t Trans; - IterRefine_t IterRefine; - yes_no_t PrintStat; - yes_no_t SymmetricMode; - double DiagPivotThresh; - yes_no_t PivotGrowth; - yes_no_t ConditionNumber; - rowperm_t RowPerm; - yes_no_t ReplaceTinyPivot; - yes_no_t SolveInitialized; - yes_no_t RefineInitialized; -} superlu_options_t; - -typedef struct { - int *panel_histo; /* histogram of panel size distribution */ - double *utime; /* running time at various phases */ - flops_t *ops; /* operation count at various phases */ - int TinyPivots; /* number of tiny pivots */ - int RefineSteps; /* number of iterative refinement steps */ -} SuperLUStat_t; - - -/*********************************************************************** - * Prototypes - ***********************************************************************/ -#ifdef __cplusplus -extern "C" { -#endif - -extern void Destroy_SuperMatrix_Store(SuperMatrix *); -extern void Destroy_CompCol_Matrix(SuperMatrix *); -extern void Destroy_CompRow_Matrix(SuperMatrix *); -extern void Destroy_SuperNode_Matrix(SuperMatrix *); -extern void Destroy_CompCol_Permuted(SuperMatrix *); -extern void Destroy_Dense_Matrix(SuperMatrix *); -extern void get_perm_c(int, SuperMatrix *, int *); -extern void set_default_options(superlu_options_t *options); -extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*, - SuperMatrix*); -extern void superlu_abort_and_exit(char*); -extern void *superlu_malloc (size_t); -extern int *intMalloc (int); -extern int *intCalloc (int); -extern void superlu_free (void*); -extern void SetIWork (int, int, int, int *, int **, int **, int **, - int **, int **, int **, int **); -extern int sp_coletree (int *, int *, int *, int, int, int *); -extern void relax_snode (const int, int *, const int, int *, int *); -extern void heap_relax_snode (const int, int *, const int, int *, int *); -extern void resetrep_col (const int, const int *, int *); -extern int spcoletree (int *, int *, int *, int, int, int *); -extern int *TreePostorder (int, int *); -extern double SuperLU_timer_ (void); -extern int sp_ienv (int); -extern int lsame_ (char *, char *); -extern int xerbla_ (char *, int *); -extern void ifill (int *, int, int); -extern void snode_profile (int, int *); -extern void super_stats (int, int *); -extern void PrintSumm (char *, int, int, int); -extern void StatInit(SuperLUStat_t *); -extern void StatPrint (SuperLUStat_t *); -extern void StatFree(SuperLUStat_t *); -extern void print_panel_seg(int, int, int, int, int *, int *); -extern void check_repfnz(int, int, int, int *); - -#ifdef __cplusplus - } -#endif - -#endif /* __SUPERLU_UTIL */ diff --git a/intern/opennl/superlu/xerbla.c b/intern/opennl/superlu/xerbla.c deleted file mode 100644 index 31baaecf3b0..00000000000 --- a/intern/opennl/superlu/xerbla.c +++ /dev/null @@ -1,47 +0,0 @@ -/** \file opennl/superlu/xerbla.c - * \ingroup opennl - */ - -#include <stdio.h> -int xerbla_(char *, int *); - -/* Subroutine */ int xerbla_(char *srname, int *info) -{ -/* -- LAPACK auxiliary routine (version 2.0) -- - Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., - Courant Institute, Argonne National Lab, and Rice University - September 30, 1994 - - - Purpose - ======= - - XERBLA is an error handler for the LAPACK routines. - It is called by an LAPACK routine if an input parameter has an - invalid value. A message is printed and execution stops. - - Installers may consider modifying the STOP statement in order to - call system-specific exception-handling facilities. - - Arguments - ========= - - SRNAME (input) CHARACTER*6 - The name of the routine which called XERBLA. - - INFO (input) INT - The position of the invalid parameter in the parameter list - - of the calling routine. - - ===================================================================== -*/ - - printf("** On entry to %6s, parameter number %2d had an illegal value\n", - srname, *info); - -/* End of XERBLA */ - - return 0; -} /* xerbla_ */ - |