Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2004-07-13 15:42:13 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2004-07-13 15:42:13 +0400
commit4f1c674ee02e07a6986a54a312d7fb9e2547e950 (patch)
tree5c177fa0eb7a22fd81c259d57f7963489e666a7b /intern
parent4a13a5720db5e54984de2cf3e58a1108f52d1833 (diff)
Added SuperLU 3.0:
http://crd.lbl.gov/~xiaoye/SuperLU/ This is a library to solve sparse matrix systems (type A*x=B). It is able to solve large systems very FAST. Only the necessary parts of the library are included to limit file size and compilation time. This means the example files, fortran interface, test files, matlab interface, cblas library, complex number part and build system have been left out. All (gcc) warnings have been fixed too. This library will be used for LSCM UV unwrapping. With this library, LSCM unwrapping can be calculated in a split second, making the unwrapping proces much more interactive. Added OpenNL (Open Numerical Libary): http://www.loria.fr/~levy/OpenNL/ OpenNL is a library to easily construct and solve sparse linear systems. We use a stripped down version, as an interface to SuperLU. This library was kindly given to use by Bruno Levy.
Diffstat (limited to 'intern')
-rw-r--r--intern/Makefile2
-rw-r--r--intern/SConscript3
-rw-r--r--intern/opennl/Makefile67
-rw-r--r--intern/opennl/SConscript43
-rw-r--r--intern/opennl/doc/OpenNL_License.txt341
-rw-r--r--intern/opennl/doc/OpenNL_Readme.txt13
-rw-r--r--intern/opennl/doc/SuperLU_License.txt31
-rw-r--r--intern/opennl/doc/SuperLU_Readme.txt52
-rw-r--r--intern/opennl/extern/ONL_opennl.h163
-rw-r--r--intern/opennl/intern/Makefile43
-rw-r--r--intern/opennl/intern/opennl.c1151
-rw-r--r--intern/opennl/superlu/Cnames.h281
-rw-r--r--intern/opennl/superlu/Makefile40
-rw-r--r--intern/opennl/superlu/colamd.c2583
-rw-r--r--intern/opennl/superlu/colamd.h67
-rw-r--r--intern/opennl/superlu/get_perm_c.c453
-rw-r--r--intern/opennl/superlu/heap_relax_snode.c116
-rw-r--r--intern/opennl/superlu/lsame.c70
-rw-r--r--intern/opennl/superlu/memory.c207
-rw-r--r--intern/opennl/superlu/mmd.c1012
-rw-r--r--intern/opennl/superlu/relax_snode.c71
-rw-r--r--intern/opennl/superlu/scolumn_bmod.c353
-rw-r--r--intern/opennl/superlu/scolumn_dfs.c270
-rw-r--r--intern/opennl/superlu/scopy_to_ucol.c105
-rw-r--r--intern/opennl/superlu/sgssv.c221
-rw-r--r--intern/opennl/superlu/sgstrf.c433
-rw-r--r--intern/opennl/superlu/sgstrs.c331
-rw-r--r--intern/opennl/superlu/smemory.c676
-rw-r--r--intern/opennl/superlu/smyblas2.c225
-rw-r--r--intern/opennl/superlu/sp_coletree.c332
-rw-r--r--intern/opennl/superlu/sp_ienv.c65
-rw-r--r--intern/opennl/superlu/sp_preorder.c203
-rw-r--r--intern/opennl/superlu/spanel_bmod.c449
-rw-r--r--intern/opennl/superlu/spanel_dfs.c249
-rw-r--r--intern/opennl/superlu/spivotL.c173
-rw-r--r--intern/opennl/superlu/spruneL.c149
-rw-r--r--intern/opennl/superlu/ssnode_bmod.c117
-rw-r--r--intern/opennl/superlu/ssnode_dfs.c106
-rw-r--r--intern/opennl/superlu/ssp_blas2.c469
-rw-r--r--intern/opennl/superlu/ssp_blas3.c121
-rw-r--r--intern/opennl/superlu/ssp_defs.h234
-rw-r--r--intern/opennl/superlu/strsv.c331
-rw-r--r--intern/opennl/superlu/superlu_timer.c55
-rw-r--r--intern/opennl/superlu/supermatrix.h140
-rw-r--r--intern/opennl/superlu/sutil.c478
-rw-r--r--intern/opennl/superlu/util.c391
-rw-r--r--intern/opennl/superlu/util.h267
-rw-r--r--intern/opennl/superlu/xerbla.c43
48 files changed, 13793 insertions, 2 deletions
diff --git a/intern/Makefile b/intern/Makefile
index 08ab03150fe..af64e44cdf4 100644
--- a/intern/Makefile
+++ b/intern/Makefile
@@ -35,7 +35,7 @@ SOURCEDIR = intern
# include nan_subdirs.mk
ALLDIRS = string ghost guardedalloc bmfont moto container memutil
-ALLDIRS += decimation iksolver bsp SoundSystem
+ALLDIRS += decimation iksolver bsp SoundSystem opennl
all::
@for i in $(ALLDIRS); do \
diff --git a/intern/SConscript b/intern/SConscript
index 833a0316634..afbcd24b8be 100644
--- a/intern/SConscript
+++ b/intern/SConscript
@@ -8,7 +8,8 @@ SConscript(['SoundSystem/SConscript',
'container/SConscript',
'memutil/SConscript/',
'decimation/SConscript',
- 'iksolver/SConscript'])
+ 'iksolver/SConscript',
+ 'opennl/SConscript'])
NEW_CSG='false'
diff --git a/intern/opennl/Makefile b/intern/opennl/Makefile
new file mode 100644
index 00000000000..8aa0d4f590b
--- /dev/null
+++ b/intern/opennl/Makefile
@@ -0,0 +1,67 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License. See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Hans Lambermont
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl main makefile.
+#
+
+include nan_definitions.mk
+
+LIBNAME = opennl
+LIBNAME_SLU = superlu
+SOURCEDIR = intern/$(LIBNAME)
+SOURCEDIR_SLU = intern/$(LIBNAME_SLU)
+DIR = $(OCGDIR)/$(SOURCEDIR)
+DIR_SLU = $(OCGDIR)/$(SOURCEDIR_SLU)
+DIRS = intern superlu
+
+include nan_subdirs.mk
+
+install: all debug
+ @[ -d $(NAN_OPENNL) ] || mkdir $(NAN_OPENNL)
+ @[ -d $(NAN_OPENNL)/include ] || mkdir $(NAN_OPENNL)/include
+ @[ -d $(NAN_OPENNL)/lib ] || mkdir $(NAN_OPENNL)/lib
+ @[ -d $(NAN_OPENNL)/lib/debug ] || mkdir $(NAN_OPENNL)/lib/debug
+ @../tools/cpifdiff.sh $(DIR)/libopennl.a $(NAN_OPENNL)/lib/
+ @../tools/cpifdiff.sh $(DIR)/debug/libopennl.a $(NAN_OPENNL)/lib/debug/
+ifeq ($(OS),darwin)
+ ranlib $(NAN_OPENNL)/lib/libopennl.a
+ ranlib $(NAN_OPENNL)/lib/debug/libopennl.a
+endif
+ @../tools/cpifdiff.sh extern/*.h $(NAN_OPENNL)/include/
+ @[ -d $(NAN_SUPERLU) ] || mkdir $(NAN_SUPERLU)
+ @[ -d $(NAN_SUPERLU)/lib ] || mkdir $(NAN_SUPERLU)/lib
+ @[ -d $(NAN_SUPERLU)/lib/debug ] || mkdir $(NAN_SUPERLU)/lib/debug
+ @../tools/cpifdiff.sh $(DIR_SLU)/libsuperlu.a $(NAN_SUPERLU)/lib/
+ @../tools/cpifdiff.sh $(DIR_SLU)/debug/libsuperlu.a $(NAN_SUPERLU)/lib/debug/
+ifeq ($(OS),darwin)
+ ranlib $(NAN_SUPERLU)/lib/libsuperlu.a
+ ranlib $(NAN_SUPERLU)/lib/debug/libsuperlu.a
+endif
+
diff --git a/intern/opennl/SConscript b/intern/opennl/SConscript
new file mode 100644
index 00000000000..4e0260c7f33
--- /dev/null
+++ b/intern/opennl/SConscript
@@ -0,0 +1,43 @@
+Import ('user_options_dict')
+Import ('library_env')
+
+opennl_env = library_env.Copy ()
+
+source_files = ['intern/opennl.c',
+ 'superlu/colamd.c',
+ 'superlu/get_perm_c.c',
+ 'superlu/heap_relax_snode.c',
+ 'superlu/lsame.c',
+ 'superlu/memory.c',
+ 'superlu/mmd.c',
+ 'superlu/relax_snode.c',
+ 'superlu/scolumn_bmod.c',
+ 'superlu/scolumn_dfs.c',
+ 'superlu/scopy_to_ucol.c',
+ 'superlu/sgssv.c',
+ 'superlu/sgstrf.c',
+ 'superlu/sgstrs.c',
+ 'superlu/smemory.c',
+ 'superlu/smyblas2.c',
+ 'superlu/sp_coletree.c',
+ 'superlu/sp_ienv.c',
+ 'superlu/sp_preorder.c',
+ 'superlu/spanel_bmod.c',
+ 'superlu/spanel_dfs.c',
+ 'superlu/spivotL.c',
+ 'superlu/spruneL.c',
+ 'superlu/ssnode_bmod.c',
+ 'superlu/ssnode_dfs.c',
+ 'superlu/ssp_blas2.c',
+ 'superlu/ssp_blas3.c',
+ 'superlu/strsv.c',
+ 'superlu/superlu_timer.c',
+ 'superlu/sutil.c',
+ 'superlu/util.c',
+ 'superlu/xerbla.c']
+
+opennl_env.Append (CPPPATH = ['extern',
+ 'superlu'])
+
+opennl_env.Library (target='#'+user_options_dict['BUILD_DIR']+'/lib/blender_ONL', source=source_files)
+
diff --git a/intern/opennl/doc/OpenNL_License.txt b/intern/opennl/doc/OpenNL_License.txt
new file mode 100644
index 00000000000..4e8d97fd526
--- /dev/null
+++ b/intern/opennl/doc/OpenNL_License.txt
@@ -0,0 +1,341 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
diff --git a/intern/opennl/doc/OpenNL_Readme.txt b/intern/opennl/doc/OpenNL_Readme.txt
new file mode 100644
index 00000000000..e6aea3c0286
--- /dev/null
+++ b/intern/opennl/doc/OpenNL_Readme.txt
@@ -0,0 +1,13 @@
+
+This is OpenNL, a library to easily construct and solve sparse linear systems.
+* OpenNL is supplied with a set of iterative solvers (Conjugate gradient,
+ BICGSTAB, GMRes) and preconditioners (Jacobi, SSOR).
+* OpenNL can also use other solvers (SuperLU 3.0 supported as an OpenNL
+ extension)
+
+Note that to be compatible with OpenNL, SuperLU 3.0 needs to be compiled with
+the following flag (see make.inc in SuperLU3.0):
+CDEFS = -DAdd_ (the default is -DAdd__, just remove the second underscore)
+
+OpenNL was modified for Blender to be used only as a wrapper for SuperLU.
+
diff --git a/intern/opennl/doc/SuperLU_License.txt b/intern/opennl/doc/SuperLU_License.txt
new file mode 100644
index 00000000000..f31a01782e2
--- /dev/null
+++ b/intern/opennl/doc/SuperLU_License.txt
@@ -0,0 +1,31 @@
+Copyright (c) 2003, The Regents of the University of California, through
+Lawrence Berkeley National Laboratory (subject to receipt of any required
+approvals from U.S. Dept. of Energy)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification,
+are permitted provided that the following conditions are met:
+
+(1) Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+(2) Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
+Energy nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/intern/opennl/doc/SuperLU_Readme.txt b/intern/opennl/doc/SuperLU_Readme.txt
new file mode 100644
index 00000000000..c1cedd09893
--- /dev/null
+++ b/intern/opennl/doc/SuperLU_Readme.txt
@@ -0,0 +1,52 @@
+ SuperLU (Version 3.0)
+ =====================
+
+Copyright (c) 2003, The Regents of the University of California, through
+Lawrence Berkeley National Laboratory (subject to receipt of any required
+approvals from U.S. Dept. of Energy)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+(1) Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+(2) Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
+Energy nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+SuperLU contains a set of subroutines to solve a sparse linear system
+A*X=B. It uses Gaussian elimination with partial pivoting (GEPP).
+The columns of A may be preordered before factorization; the
+preordering for sparsity is completely separate from the factorization.
+
+SuperLU is implemented in ANSI C, and must be compiled with standard
+ANSI C compilers. It provides functionality for both real and complex
+matrices, in both single and double precision. The file names for the
+single-precision real version start with letter "s" (such as sgstrf.c);
+the file names for the double-precision real version start with letter "d"
+(such as dgstrf.c); the file names for the single-precision complex
+version start with letter "c" (such as cgstrf.c); the file names
+for the double-precision complex version start with letter "z"
+(such as zgstrf.c).
+
+SuperLU was modified for Blender to only include single-precision
+functionality.
+
diff --git a/intern/opennl/extern/ONL_opennl.h b/intern/opennl/extern/ONL_opennl.h
new file mode 100644
index 00000000000..5e4bd24313c
--- /dev/null
+++ b/intern/opennl/extern/ONL_opennl.h
@@ -0,0 +1,163 @@
+/*
+ * $Id$
+ *
+ * OpenNL: Numerical Library
+ * Copyright (C) 2004 Bruno Levy
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * If you modify this software, you should include a notice giving the
+ * name of the person performing the modification, the date of modification,
+ * and the reason for such modification.
+ *
+ * Contact: Bruno Levy
+ *
+ * levy@loria.fr
+ *
+ * ISA Project
+ * LORIA, INRIA Lorraine,
+ * Campus Scientifique, BP 239
+ * 54506 VANDOEUVRE LES NANCY CEDEX
+ * FRANCE
+ *
+ * Note that the GNU General Public License does not permit incorporating
+ * the Software into proprietary programs.
+ */
+
+/*
+#define NL_DEBUG
+#define NL_PARANOID
+*/
+
+#define NL_USE_SUPERLU
+
+#ifndef nlOPENNL_H
+#define nlOPENNL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NL_VERSION_0_0 1
+
+/*
+ *
+ * Datatypes
+ *
+ */
+
+typedef unsigned int NLenum;
+typedef unsigned char NLboolean;
+typedef unsigned int NLbitfield;
+typedef void NLvoid;
+typedef signed char NLbyte; /* 1-byte signed */
+typedef short NLshort; /* 2-byte signed */
+typedef int NLint; /* 4-byte signed */
+typedef unsigned char NLubyte; /* 1-byte unsigned */
+typedef unsigned short NLushort; /* 2-byte unsigned */
+typedef unsigned int NLuint; /* 4-byte unsigned */
+typedef int NLsizei; /* 4-byte signed */
+typedef float NLfloat; /* single precision float */
+typedef double NLdouble; /* double precision float */
+
+typedef void* NLContext ;
+
+/*
+ *
+ * Constants
+ *
+ */
+
+#define NL_FALSE 0x0
+#define NL_TRUE 0x1
+
+/* Primitives */
+
+#define NL_SYSTEM 0x0
+#define NL_MATRIX 0x1
+#define NL_ROW 0x2
+
+/* Solver Parameters */
+
+#define NL_SOLVER 0x100
+#define NL_NB_VARIABLES 0x101
+#define NL_LEAST_SQUARES 0x102
+#define NL_SYMMETRIC 0x106
+#define NL_ERROR 0x108
+
+/* Enable / Disable */
+
+#define NL_NORMALIZE_ROWS 0x400
+
+/* Row parameters */
+
+#define NL_RIGHT_HAND_SIDE 0x500
+#define NL_ROW_SCALING 0x501
+
+/*
+ * Contexts
+ */
+ NLContext nlNewContext() ;
+ void nlDeleteContext(NLContext context) ;
+ void nlMakeCurrent(NLContext context) ;
+ NLContext nlGetCurrent() ;
+
+/*
+ * State set/get
+ */
+
+ void nlSolverParameterf(NLenum pname, NLfloat param) ;
+ void nlSolverParameteri(NLenum pname, NLint param) ;
+
+ void nlRowParameterf(NLenum pname, NLfloat param) ;
+ void nlRowParameteri(NLenum pname, NLint param) ;
+
+ void nlGetBooleanv(NLenum pname, NLboolean* params) ;
+ void nlGetFloatv(NLenum pname, NLfloat* params) ;
+ void nlGetIntergerv(NLenum pname, NLint* params) ;
+
+ void nlEnable(NLenum pname) ;
+ void nlDisable(NLenum pname) ;
+ NLboolean nlIsEnabled(NLenum pname) ;
+
+/*
+ * Variables
+ */
+ void nlSetVariable(NLuint index, NLfloat value) ;
+ NLfloat nlGetVariable(NLuint index) ;
+ void nlLockVariable(NLuint index) ;
+ void nlUnlockVariable(NLuint index) ;
+ NLboolean nlVariableIsLocked(NLuint index) ;
+
+/*
+ * Begin/End
+ */
+
+ void nlBegin(NLenum primitive) ;
+ void nlEnd(NLenum primitive) ;
+ void nlCoefficient(NLuint index, NLfloat value) ;
+
+/*
+ * Solve
+ */
+
+ NLboolean nlSolve() ;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/intern/opennl/intern/Makefile b/intern/opennl/intern/Makefile
new file mode 100644
index 00000000000..2e57905d931
--- /dev/null
+++ b/intern/opennl/intern/Makefile
@@ -0,0 +1,43 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License. See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): none yet.
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl intern Makefile
+#
+
+LIBNAME = opennl
+DIR = $(OCGDIR)/intern/$(LIBNAME)
+
+include nan_compile.mk
+
+CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
+
+CPPFLAGS += -I../superlu -I../extern
+
+
diff --git a/intern/opennl/intern/opennl.c b/intern/opennl/intern/opennl.c
new file mode 100644
index 00000000000..be797223f51
--- /dev/null
+++ b/intern/opennl/intern/opennl.c
@@ -0,0 +1,1151 @@
+/*
+ * $Id$
+ *
+ * OpenNL: Numerical Library
+ * Copyright (C) 2004 Bruno Levy
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * If you modify this software, you should include a notice giving the
+ * name of the person performing the modification, the date of modification,
+ * and the reason for such modification.
+ *
+ * Contact: Bruno Levy
+ *
+ * levy@loria.fr
+ *
+ * ISA Project
+ * LORIA, INRIA Lorraine,
+ * Campus Scientifique, BP 239
+ * 54506 VANDOEUVRE LES NANCY CEDEX
+ * FRANCE
+ *
+ * Note that the GNU General Public License does not permit incorporating
+ * the Software into proprietary programs.
+ */
+
+#include "ONL_opennl.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef NL_PARANOID
+#ifndef NL_DEBUG
+#define NL_DEBUG
+#endif
+#endif
+
+/* SuperLU includes */
+#include <ssp_defs.h>
+#include <util.h>
+
+/************************************************************************************/
+/* Assertions */
+
+
+static void __nl_assertion_failed(char* cond, char* file, int line) {
+ fprintf(
+ stderr,
+ "OpenNL assertion failed: %s, file:%s, line:%d\n",
+ cond,file,line
+ ) ;
+ abort() ;
+}
+
+static void __nl_range_assertion_failed(
+ float x, float min_val, float max_val, char* file, int line
+) {
+ fprintf(
+ stderr,
+ "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
+ x, min_val, max_val, file,line
+ ) ;
+ abort() ;
+}
+
+static void __nl_should_not_have_reached(char* file, int line) {
+ fprintf(
+ stderr,
+ "OpenNL should not have reached this point: file:%s, line:%d\n",
+ file,line
+ ) ;
+ abort() ;
+}
+
+
+#define __nl_assert(x) { \
+ if(!(x)) { \
+ __nl_assertion_failed(#x,__FILE__, __LINE__) ; \
+ } \
+}
+
+#define __nl_range_assert(x,min_val,max_val) { \
+ if(((x) < (min_val)) || ((x) > (max_val))) { \
+ __nl_range_assertion_failed(x, min_val, max_val, \
+ __FILE__, __LINE__ \
+ ) ; \
+ } \
+}
+
+#define __nl_assert_not_reached { \
+ __nl_should_not_have_reached(__FILE__, __LINE__) ; \
+}
+
+#ifdef NL_DEBUG
+#define __nl_debug_assert(x) __nl_assert(x)
+#define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
+#else
+#define __nl_debug_assert(x)
+#define __nl_debug_range_assert(x,min_val,max_val)
+#endif
+
+#ifdef NL_PARANOID
+#define __nl_parano_assert(x) __nl_assert(x)
+#define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
+#else
+#define __nl_parano_assert(x)
+#define __nl_parano_range_assert(x,min_val,max_val)
+#endif
+
+/************************************************************************************/
+/* classic macros */
+
+#ifndef MIN
+#define MIN(x,y) (((x) < (y)) ? (x) : (y))
+#endif
+
+#ifndef MAX
+#define MAX(x,y) (((x) > (y)) ? (x) : (y))
+#endif
+
+/************************************************************************************/
+/* memory management */
+
+#define __NL_NEW(T) (T*)(calloc(1, sizeof(T)))
+#define __NL_NEW_ARRAY(T,NB) (T*)(calloc((NB),sizeof(T)))
+#define __NL_RENEW_ARRAY(T,x,NB) (T*)(realloc(x,(NB)*sizeof(T)))
+#define __NL_DELETE(x) free(x); x = NULL
+#define __NL_DELETE_ARRAY(x) free(x); x = NULL
+
+#define __NL_CLEAR(T, x) memset(x, 0, sizeof(T))
+#define __NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (NB)*sizeof(T))
+
+/************************************************************************************/
+/* Dynamic arrays for sparse row/columns */
+
+typedef struct {
+ NLuint index ;
+ NLfloat value ;
+} __NLCoeff ;
+
+typedef struct {
+ NLuint size ;
+ NLuint capacity ;
+ __NLCoeff* coeff ;
+} __NLRowColumn ;
+
+static void __nlRowColumnConstruct(__NLRowColumn* c) {
+ c->size = 0 ;
+ c->capacity = 0 ;
+ c->coeff = NULL ;
+}
+
+static void __nlRowColumnDestroy(__NLRowColumn* c) {
+ __NL_DELETE_ARRAY(c->coeff) ;
+#ifdef NL_PARANOID
+ __NL_CLEAR(__NLRowColumn, c) ;
+#endif
+}
+
+static void __nlRowColumnGrow(__NLRowColumn* c) {
+ if(c->capacity != 0) {
+ c->capacity = 2 * c->capacity ;
+ c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity) ;
+ } else {
+ c->capacity = 4 ;
+ c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity) ;
+ }
+}
+
+static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
+ NLuint i ;
+ for(i=0; i<c->size; i++) {
+ if(c->coeff[i].index == (NLuint)index) {
+ c->coeff[i].value += value ;
+ return ;
+ }
+ }
+ if(c->size == c->capacity) {
+ __nlRowColumnGrow(c) ;
+ }
+ c->coeff[c->size].index = index ;
+ c->coeff[c->size].value = value ;
+ c->size++ ;
+}
+
+/* Does not check whether the index already exists */
+static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
+ if(c->size == c->capacity) {
+ __nlRowColumnGrow(c) ;
+ }
+ c->coeff[c->size].index = index ;
+ c->coeff[c->size].value = value ;
+ c->size++ ;
+}
+
+static void __nlRowColumnZero(__NLRowColumn* c) {
+ c->size = 0 ;
+}
+
+static void __nlRowColumnClear(__NLRowColumn* c) {
+ c->size = 0 ;
+ c->capacity = 0 ;
+ __NL_DELETE_ARRAY(c->coeff) ;
+}
+
+/************************************************************************************/
+/* SparseMatrix data structure */
+
+#define __NL_ROWS 1
+#define __NL_COLUMNS 2
+#define __NL_SYMMETRIC 4
+
+typedef struct {
+ NLuint m ;
+ NLuint n ;
+ NLuint diag_size ;
+ NLenum storage ;
+ __NLRowColumn* row ;
+ __NLRowColumn* column ;
+ NLfloat* diag ;
+} __NLSparseMatrix ;
+
+
+static void __nlSparseMatrixConstruct(
+ __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
+) {
+ NLuint i ;
+ M->m = m ;
+ M->n = n ;
+ M->storage = storage ;
+ if(storage & __NL_ROWS) {
+ M->row = __NL_NEW_ARRAY(__NLRowColumn, m) ;
+ for(i=0; i<n; i++) {
+ __nlRowColumnConstruct(&(M->row[i])) ;
+ }
+ } else {
+ M->row = NULL ;
+ }
+
+ if(storage & __NL_COLUMNS) {
+ M->column = __NL_NEW_ARRAY(__NLRowColumn, n) ;
+ for(i=0; i<n; i++) {
+ __nlRowColumnConstruct(&(M->column[i])) ;
+ }
+ } else {
+ M->column = NULL ;
+ }
+
+ M->diag_size = MIN(m,n) ;
+ M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size) ;
+}
+
+static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
+ NLuint i ;
+ __NL_DELETE_ARRAY(M->diag) ;
+ if(M->storage & __NL_ROWS) {
+ for(i=0; i<M->m; i++) {
+ __nlRowColumnDestroy(&(M->row[i])) ;
+ }
+ __NL_DELETE_ARRAY(M->row) ;
+ }
+ if(M->storage & __NL_COLUMNS) {
+ for(i=0; i<M->n; i++) {
+ __nlRowColumnDestroy(&(M->column[i])) ;
+ }
+ __NL_DELETE_ARRAY(M->column) ;
+ }
+#ifdef NL_PARANOID
+ __NL_CLEAR(__NLSparseMatrix,M) ;
+#endif
+}
+
+static void __nlSparseMatrixAdd(
+ __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
+) {
+ __nl_parano_range_assert(i, 0, M->m - 1) ;
+ __nl_parano_range_assert(j, 0, M->n - 1) ;
+ if((M->storage & __NL_SYMMETRIC) && (j > i)) {
+ return ;
+ }
+ if(i == j) {
+ M->diag[i] += value ;
+ }
+ if(M->storage & __NL_ROWS) {
+ __nlRowColumnAdd(&(M->row[i]), j, value) ;
+ }
+ if(M->storage & __NL_COLUMNS) {
+ __nlRowColumnAdd(&(M->column[j]), i, value) ;
+ }
+}
+
+static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
+ NLuint i ;
+ if(M->storage & __NL_ROWS) {
+ for(i=0; i<M->m; i++) {
+ __nlRowColumnClear(&(M->row[i])) ;
+ }
+ }
+ if(M->storage & __NL_COLUMNS) {
+ for(i=0; i<M->n; i++) {
+ __nlRowColumnClear(&(M->column[i])) ;
+ }
+ }
+ __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size) ;
+}
+
+/* Returns the number of non-zero coefficients */
+static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
+ NLuint nnz = 0 ;
+ NLuint i ;
+ if(M->storage & __NL_ROWS) {
+ for(i = 0; i<M->m; i++) {
+ nnz += M->row[i].size ;
+ }
+ } else if (M->storage & __NL_COLUMNS) {
+ for(i = 0; i<M->n; i++) {
+ nnz += M->column[i].size ;
+ }
+ } else {
+ __nl_assert_not_reached ;
+ }
+ return nnz ;
+}
+
+/************************************************************************************/
+/* SparseMatrix x Vector routines, internal helper routines */
+
+static void __nlSparseMatrix_mult_rows_symmetric(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint m = A->m ;
+ NLuint i,ij ;
+ __NLRowColumn* Ri = NULL ;
+ __NLCoeff* c = NULL ;
+ for(i=0; i<m; i++) {
+ y[i] = 0 ;
+ Ri = &(A->row[i]) ;
+ for(ij=0; ij<Ri->size; ij++) {
+ c = &(Ri->coeff[ij]) ;
+ y[i] += c->value * x[c->index] ;
+ if(i != c->index) {
+ y[c->index] += c->value * x[i] ;
+ }
+ }
+ }
+}
+
+static void __nlSparseMatrix_mult_rows(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint m = A->m ;
+ NLuint i,ij ;
+ __NLRowColumn* Ri = NULL ;
+ __NLCoeff* c = NULL ;
+ for(i=0; i<m; i++) {
+ y[i] = 0 ;
+ Ri = &(A->row[i]) ;
+ for(ij=0; ij<Ri->size; ij++) {
+ c = &(Ri->coeff[ij]) ;
+ y[i] += c->value * x[c->index] ;
+ }
+ }
+}
+
+static void __nlSparseMatrix_mult_cols_symmetric(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint n = A->n ;
+ NLuint j,ii ;
+ __NLRowColumn* Cj = NULL ;
+ __NLCoeff* c = NULL ;
+ for(j=0; j<n; j++) {
+ y[j] = 0 ;
+ Cj = &(A->column[j]) ;
+ for(ii=0; ii<Cj->size; ii++) {
+ c = &(Cj->coeff[ii]) ;
+ y[c->index] += c->value * x[j] ;
+ if(j != c->index) {
+ y[j] += c->value * x[c->index] ;
+ }
+ }
+ }
+}
+
+static void __nlSparseMatrix_mult_cols(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint n = A->n ;
+ NLuint j,ii ;
+ __NLRowColumn* Cj = NULL ;
+ __NLCoeff* c = NULL ;
+ __NL_CLEAR_ARRAY(NLfloat, y, A->m) ;
+ for(j=0; j<n; j++) {
+ Cj = &(A->column[j]) ;
+ for(ii=0; ii<Cj->size; ii++) {
+ c = &(Cj->coeff[ii]) ;
+ y[c->index] += c->value * x[j] ;
+ }
+ }
+}
+
+/************************************************************************************/
+/* SparseMatrix x Vector routines, main driver routine */
+
+void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
+ if(A->storage & __NL_ROWS) {
+ if(A->storage & __NL_SYMMETRIC) {
+ __nlSparseMatrix_mult_rows_symmetric(A, x, y) ;
+ } else {
+ __nlSparseMatrix_mult_rows(A, x, y) ;
+ }
+ } else {
+ if(A->storage & __NL_SYMMETRIC) {
+ __nlSparseMatrix_mult_cols_symmetric(A, x, y) ;
+ } else {
+ __nlSparseMatrix_mult_cols(A, x, y) ;
+ }
+ }
+}
+
+/************************************************************************************/
+/* NLContext data structure */
+
+typedef void(*__NLMatrixFunc)(float* x, float* y) ;
+
+typedef struct {
+ NLfloat value ;
+ NLboolean locked ;
+ NLuint index ;
+} __NLVariable ;
+
+#define __NL_STATE_INITIAL 0
+#define __NL_STATE_SYSTEM 1
+#define __NL_STATE_MATRIX 2
+#define __NL_STATE_ROW 3
+#define __NL_STATE_MATRIX_CONSTRUCTED 4
+#define __NL_STATE_SYSTEM_CONSTRUCTED 5
+#define __NL_STATE_SOLVED 6
+
+typedef struct {
+ NLenum state ;
+ __NLVariable* variable ;
+ NLuint n ;
+ __NLSparseMatrix M ;
+ __NLRowColumn af ;
+ __NLRowColumn al ;
+ __NLRowColumn xl ;
+ NLfloat* x ;
+ NLfloat* b ;
+ NLfloat right_hand_side ;
+ NLfloat row_scaling ;
+ NLuint nb_variables ;
+ NLuint current_row ;
+ NLboolean least_squares ;
+ NLboolean symmetric ;
+ NLboolean normalize_rows ;
+ NLboolean alloc_M ;
+ NLboolean alloc_af ;
+ NLboolean alloc_al ;
+ NLboolean alloc_xl ;
+ NLboolean alloc_variable ;
+ NLboolean alloc_x ;
+ NLboolean alloc_b ;
+ NLfloat error ;
+ __NLMatrixFunc matrix_vector_prod ;
+} __NLContext ;
+
+static __NLContext* __nlCurrentContext = NULL ;
+
+void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
+ __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y) ;
+}
+
+
+NLContext nlNewContext() {
+ __NLContext* result = __NL_NEW(__NLContext) ;
+ result->state = __NL_STATE_INITIAL ;
+ result->row_scaling = 1.0 ;
+ result->right_hand_side = 0.0 ;
+ result->matrix_vector_prod = __nlMatrixVectorProd_default ;
+ nlMakeCurrent(result) ;
+ return result ;
+}
+
+void nlDeleteContext(NLContext context_in) {
+ __NLContext* context = (__NLContext*)(context_in) ;
+ if(__nlCurrentContext == context) {
+ __nlCurrentContext = NULL ;
+ }
+ if(context->alloc_M) {
+ __nlSparseMatrixDestroy(&context->M) ;
+ }
+ if(context->alloc_af) {
+ __nlRowColumnDestroy(&context->af) ;
+ }
+ if(context->alloc_al) {
+ __nlRowColumnDestroy(&context->al) ;
+ }
+ if(context->alloc_xl) {
+ __nlRowColumnDestroy(&context->xl) ;
+ }
+ if(context->alloc_variable) {
+ __NL_DELETE_ARRAY(context->variable) ;
+ }
+ if(context->alloc_x) {
+ __NL_DELETE_ARRAY(context->x) ;
+ }
+ if(context->alloc_b) {
+ __NL_DELETE_ARRAY(context->b) ;
+ }
+
+#ifdef NL_PARANOID
+ __NL_CLEAR(__NLContext, context) ;
+#endif
+ __NL_DELETE(context) ;
+}
+
+void nlMakeCurrent(NLContext context) {
+ __nlCurrentContext = (__NLContext*)(context) ;
+}
+
+NLContext nlGetCurrent() {
+ return __nlCurrentContext ;
+}
+
+void __nlCheckState(NLenum state) {
+ __nl_assert(__nlCurrentContext->state == state) ;
+}
+
+void __nlTransition(NLenum from_state, NLenum to_state) {
+ __nlCheckState(from_state) ;
+ __nlCurrentContext->state = to_state ;
+}
+
+/************************************************************************************/
+/* Get/Set parameters */
+
+void nlSolverParameterf(NLenum pname, NLfloat param) {
+ __nlCheckState(__NL_STATE_INITIAL) ;
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ __nl_assert(param > 0) ;
+ __nlCurrentContext->nb_variables = (NLuint)param ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ __nlCurrentContext->least_squares = (NLboolean)param ;
+ } break ;
+ case NL_SYMMETRIC: {
+ __nlCurrentContext->symmetric = (NLboolean)param ;
+ }
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlSolverParameteri(NLenum pname, NLint param) {
+ __nlCheckState(__NL_STATE_INITIAL) ;
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ __nl_assert(param > 0) ;
+ __nlCurrentContext->nb_variables = (NLuint)param ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ __nlCurrentContext->least_squares = (NLboolean)param ;
+ } break ;
+ case NL_SYMMETRIC: {
+ __nlCurrentContext->symmetric = (NLboolean)param ;
+ }
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlRowParameterf(NLenum pname, NLfloat param) {
+ __nlCheckState(__NL_STATE_MATRIX) ;
+ switch(pname) {
+ case NL_RIGHT_HAND_SIDE: {
+ __nlCurrentContext->right_hand_side = param ;
+ } break ;
+ case NL_ROW_SCALING: {
+ __nlCurrentContext->row_scaling = param ;
+ } break ;
+ }
+}
+
+void nlRowParameteri(NLenum pname, NLint param) {
+ __nlCheckState(__NL_STATE_MATRIX) ;
+ switch(pname) {
+ case NL_RIGHT_HAND_SIDE: {
+ __nlCurrentContext->right_hand_side = (NLfloat)param ;
+ } break ;
+ case NL_ROW_SCALING: {
+ __nlCurrentContext->row_scaling = (NLfloat)param ;
+ } break ;
+ }
+}
+
+void nlGetBooleanv(NLenum pname, NLboolean* params) {
+ switch(pname) {
+ case NL_LEAST_SQUARES: {
+ *params = __nlCurrentContext->least_squares ;
+ } break ;
+ case NL_SYMMETRIC: {
+ *params = __nlCurrentContext->symmetric ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlGetFloatv(NLenum pname, NLfloat* params) {
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ *params = (NLfloat)(__nlCurrentContext->nb_variables) ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ *params = (NLfloat)(__nlCurrentContext->least_squares) ;
+ } break ;
+ case NL_SYMMETRIC: {
+ *params = (NLfloat)(__nlCurrentContext->symmetric) ;
+ } break ;
+ case NL_ERROR: {
+ *params = (NLfloat)(__nlCurrentContext->error) ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlGetIntergerv(NLenum pname, NLint* params) {
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ *params = (NLint)(__nlCurrentContext->nb_variables) ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ *params = (NLint)(__nlCurrentContext->least_squares) ;
+ } break ;
+ case NL_SYMMETRIC: {
+ *params = (NLint)(__nlCurrentContext->symmetric) ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+/************************************************************************************/
+/* Enable / Disable */
+
+void nlEnable(NLenum pname) {
+ switch(pname) {
+ case NL_NORMALIZE_ROWS: {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_ROW) ;
+ __nlCurrentContext->normalize_rows = NL_TRUE ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+void nlDisable(NLenum pname) {
+ switch(pname) {
+ case NL_NORMALIZE_ROWS: {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_ROW) ;
+ __nlCurrentContext->normalize_rows = NL_FALSE ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+NLboolean nlIsEnabled(NLenum pname) {
+ switch(pname) {
+ case NL_NORMALIZE_ROWS: {
+ return __nlCurrentContext->normalize_rows ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+ return NL_FALSE ;
+}
+
+/************************************************************************************/
+/* Get/Set Lock/Unlock variables */
+
+void nlSetVariable(NLuint index, NLfloat value) {
+ __nlCheckState(__NL_STATE_SYSTEM) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ __nlCurrentContext->variable[index].value = value ;
+}
+
+NLfloat nlGetVariable(NLuint index) {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ return __nlCurrentContext->variable[index].value ;
+}
+
+void nlLockVariable(NLuint index) {
+ __nlCheckState(__NL_STATE_SYSTEM) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ __nlCurrentContext->variable[index].locked = NL_TRUE ;
+}
+
+void nlUnlockVariable(NLuint index) {
+ __nlCheckState(__NL_STATE_SYSTEM) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ __nlCurrentContext->variable[index].locked = NL_FALSE ;
+}
+
+NLboolean nlVariableIsLocked(NLuint index) {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ return __nlCurrentContext->variable[index].locked ;
+}
+
+/************************************************************************************/
+/* System construction */
+
+void __nlVariablesToVector() {
+ NLuint i ;
+ __nl_assert(__nlCurrentContext->alloc_x) ;
+ __nl_assert(__nlCurrentContext->alloc_variable) ;
+ for(i=0; i<__nlCurrentContext->nb_variables; i++) {
+ __NLVariable* v = &(__nlCurrentContext->variable[i]) ;
+ if(!v->locked) {
+ __nl_assert(v->index < __nlCurrentContext->n) ;
+ __nlCurrentContext->x[v->index] = v->value ;
+ }
+ }
+}
+
+void __nlVectorToVariables() {
+ NLuint i ;
+ __nl_assert(__nlCurrentContext->alloc_x) ;
+ __nl_assert(__nlCurrentContext->alloc_variable) ;
+ for(i=0; i<__nlCurrentContext->nb_variables; i++) {
+ __NLVariable* v = &(__nlCurrentContext->variable[i]) ;
+ if(!v->locked) {
+ __nl_assert(v->index < __nlCurrentContext->n) ;
+ v->value = __nlCurrentContext->x[v->index] ;
+ }
+ }
+}
+
+
+void __nlBeginSystem() {
+ __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM) ;
+ __nl_assert(__nlCurrentContext->nb_variables > 0) ;
+ __nlCurrentContext->variable = __NL_NEW_ARRAY(
+ __NLVariable, __nlCurrentContext->nb_variables
+ ) ;
+ __nlCurrentContext->alloc_variable = NL_TRUE ;
+}
+
+void __nlEndSystem() {
+ __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED) ;
+}
+
+void __nlBeginMatrix() {
+ NLuint i ;
+ NLuint n = 0 ;
+ NLenum storage = __NL_ROWS ;
+
+ __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX) ;
+
+ for(i=0; i<__nlCurrentContext->nb_variables; i++) {
+ if(!__nlCurrentContext->variable[i].locked) {
+ __nlCurrentContext->variable[i].index = n ;
+ n++ ;
+ } else {
+ __nlCurrentContext->variable[i].index = ~0 ;
+ }
+ }
+
+ __nlCurrentContext->n = n ;
+
+ /* a least squares problem results in a symmetric matrix */
+ if(__nlCurrentContext->least_squares) {
+ __nlCurrentContext->symmetric = NL_TRUE ;
+ }
+
+ if(__nlCurrentContext->symmetric) {
+ storage = (storage | __NL_SYMMETRIC) ;
+ }
+
+ /* SuperLU storage does not support symmetric storage */
+ storage = (storage & ~__NL_SYMMETRIC) ;
+
+ __nlSparseMatrixConstruct(&__nlCurrentContext->M, n, n, storage) ;
+ __nlCurrentContext->alloc_M = NL_TRUE ;
+
+ __nlCurrentContext->x = __NL_NEW_ARRAY(NLfloat, n) ;
+ __nlCurrentContext->alloc_x = NL_TRUE ;
+
+ __nlCurrentContext->b = __NL_NEW_ARRAY(NLfloat, n) ;
+ __nlCurrentContext->alloc_b = NL_TRUE ;
+
+ __nlVariablesToVector() ;
+
+ __nlRowColumnConstruct(&__nlCurrentContext->af) ;
+ __nlCurrentContext->alloc_af = NL_TRUE ;
+ __nlRowColumnConstruct(&__nlCurrentContext->al) ;
+ __nlCurrentContext->alloc_al = NL_TRUE ;
+ __nlRowColumnConstruct(&__nlCurrentContext->xl) ;
+ __nlCurrentContext->alloc_xl = NL_TRUE ;
+
+ __nlCurrentContext->current_row = 0 ;
+}
+
+void __nlEndMatrix() {
+ __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED) ;
+
+ __nlRowColumnDestroy(&__nlCurrentContext->af) ;
+ __nlCurrentContext->alloc_af = NL_FALSE ;
+ __nlRowColumnDestroy(&__nlCurrentContext->al) ;
+ __nlCurrentContext->alloc_al = NL_FALSE ;
+ __nlRowColumnDestroy(&__nlCurrentContext->xl) ;
+ __nlCurrentContext->alloc_al = NL_FALSE ;
+
+ if(!__nlCurrentContext->least_squares) {
+ __nl_assert(
+ __nlCurrentContext->current_row ==
+ __nlCurrentContext->n
+ ) ;
+ }
+}
+
+void __nlBeginRow() {
+ __nlTransition(__NL_STATE_MATRIX, __NL_STATE_ROW) ;
+ __nlRowColumnZero(&__nlCurrentContext->af) ;
+ __nlRowColumnZero(&__nlCurrentContext->al) ;
+ __nlRowColumnZero(&__nlCurrentContext->xl) ;
+}
+
+void __nlScaleRow(NLfloat s) {
+ __NLRowColumn* af = &__nlCurrentContext->af ;
+ __NLRowColumn* al = &__nlCurrentContext->al ;
+ NLuint nf = af->size ;
+ NLuint nl = al->size ;
+ NLuint i ;
+ for(i=0; i<nf; i++) {
+ af->coeff[i].value *= s ;
+ }
+ for(i=0; i<nl; i++) {
+ al->coeff[i].value *= s ;
+ }
+ __nlCurrentContext->right_hand_side *= s ;
+}
+
+void __nlNormalizeRow(NLfloat weight) {
+ __NLRowColumn* af = &__nlCurrentContext->af ;
+ __NLRowColumn* al = &__nlCurrentContext->al ;
+ NLuint nf = af->size ;
+ NLuint nl = al->size ;
+ NLuint i ;
+ NLfloat norm = 0.0 ;
+ for(i=0; i<nf; i++) {
+ norm += af->coeff[i].value * af->coeff[i].value ;
+ }
+ for(i=0; i<nl; i++) {
+ norm += al->coeff[i].value * al->coeff[i].value ;
+ }
+ norm = sqrt(norm) ;
+ __nlScaleRow(weight / norm) ;
+}
+
+void __nlEndRow() {
+ __NLRowColumn* af = &__nlCurrentContext->af ;
+ __NLRowColumn* al = &__nlCurrentContext->al ;
+ __NLRowColumn* xl = &__nlCurrentContext->xl ;
+ __NLSparseMatrix* M = &__nlCurrentContext->M ;
+ NLfloat* b = __nlCurrentContext->b ;
+ NLuint nf = af->size ;
+ NLuint nl = al->size ;
+ NLuint current_row = __nlCurrentContext->current_row ;
+ NLuint i ;
+ NLuint j ;
+ NLfloat S ;
+ __nlTransition(__NL_STATE_ROW, __NL_STATE_MATRIX) ;
+
+ if(__nlCurrentContext->normalize_rows) {
+ __nlNormalizeRow(__nlCurrentContext->row_scaling) ;
+ } else {
+ __nlScaleRow(__nlCurrentContext->row_scaling) ;
+ }
+
+ if(__nlCurrentContext->least_squares) {
+ for(i=0; i<nf; i++) {
+ for(j=0; j<nf; j++) {
+ __nlSparseMatrixAdd(
+ M, af->coeff[i].index, af->coeff[j].index,
+ af->coeff[i].value * af->coeff[j].value
+ ) ;
+ }
+ }
+ S = -__nlCurrentContext->right_hand_side ;
+ for(j=0; j<nl; j++) {
+ S += al->coeff[j].value * xl->coeff[j].value ;
+ }
+ for(i=0; i<nf; i++) {
+ b[ af->coeff[i].index ] -= af->coeff[i].value * S ;
+ }
+ } else {
+ for(i=0; i<nf; i++) {
+ __nlSparseMatrixAdd(
+ M, current_row, af->coeff[i].index, af->coeff[i].value
+ ) ;
+ }
+ b[current_row] = -__nlCurrentContext->right_hand_side ;
+ for(i=0; i<nl; i++) {
+ b[current_row] -= al->coeff[i].value * xl->coeff[i].value ;
+ }
+ }
+ __nlCurrentContext->current_row++ ;
+ __nlCurrentContext->right_hand_side = 0.0 ;
+ __nlCurrentContext->row_scaling = 1.0 ;
+}
+
+void nlCoefficient(NLuint index, NLfloat value) {
+ __NLVariable* v;
+ unsigned int zero= 0;
+ __nlCheckState(__NL_STATE_ROW) ;
+ __nl_range_assert(index, zero, __nlCurrentContext->nb_variables - 1) ;
+ v = &(__nlCurrentContext->variable[index]) ;
+ if(v->locked) {
+ __nlRowColumnAppend(&(__nlCurrentContext->al), 0, value) ;
+ __nlRowColumnAppend(&(__nlCurrentContext->xl), 0, v->value) ;
+ } else {
+ __nlRowColumnAppend(&(__nlCurrentContext->af), v->index, value) ;
+ }
+}
+
+void nlBegin(NLenum prim) {
+ switch(prim) {
+ case NL_SYSTEM: {
+ __nlBeginSystem() ;
+ } break ;
+ case NL_MATRIX: {
+ __nlBeginMatrix() ;
+ } break ;
+ case NL_ROW: {
+ __nlBeginRow() ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+void nlEnd(NLenum prim) {
+ switch(prim) {
+ case NL_SYSTEM: {
+ __nlEndSystem() ;
+ } break ;
+ case NL_MATRIX: {
+ __nlEndMatrix() ;
+ } break ;
+ case NL_ROW: {
+ __nlEndRow() ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+/************************************************************************/
+/* SuperLU wrapper */
+
+/* Note: SuperLU is difficult to call, but it is worth it. */
+/* Here is a driver inspired by A. Sheffer's "cow flattener". */
+static NLboolean __nlSolve_SUPERLU( NLboolean do_perm) {
+
+ /* OpenNL Context */
+ __NLSparseMatrix* M = &(__nlCurrentContext->M) ;
+ NLfloat* b = __nlCurrentContext->b ;
+ NLfloat* x = __nlCurrentContext->x ;
+
+ /* Compressed Row Storage matrix representation */
+ NLuint n = __nlCurrentContext->n ;
+ NLuint nnz = __nlSparseMatrixNNZ(M) ; /* Number of Non-Zero coeffs */
+ NLint* xa = __NL_NEW_ARRAY(NLint, n+1) ;
+ NLfloat* rhs = __NL_NEW_ARRAY(NLfloat, n) ;
+ NLfloat* a = __NL_NEW_ARRAY(NLfloat, nnz) ;
+ NLint* asub = __NL_NEW_ARRAY(NLint, nnz) ;
+
+ /* Permutation vector */
+ NLint* perm_r = __NL_NEW_ARRAY(NLint, n) ;
+ NLint* perm = __NL_NEW_ARRAY(NLint, n) ;
+
+ /* SuperLU variables */
+ SuperMatrix A, B ; /* System */
+ SuperMatrix L, U ; /* Inverse of A */
+ NLint info ; /* status code */
+ DNformat *vals = NULL ; /* access to result */
+ float *rvals = NULL ; /* access to result */
+
+ /* SuperLU options and stats */
+ superlu_options_t options ;
+ SuperLUStat_t stat ;
+
+
+ /* Temporary variables */
+ __NLRowColumn* Ri = NULL ;
+ NLuint i,jj,count ;
+
+ __nl_assert(!(M->storage & __NL_SYMMETRIC)) ;
+ __nl_assert(M->storage & __NL_ROWS) ;
+ __nl_assert(M->m == M->n) ;
+
+
+ /*
+ * Step 1: convert matrix M into SuperLU compressed column
+ * representation.
+ * -------------------------------------------------------
+ */
+
+ count = 0 ;
+ for(i=0; i<n; i++) {
+ Ri = &(M->row[i]) ;
+ xa[i] = count ;
+ for(jj=0; jj<Ri->size; jj++) {
+ a[count] = Ri->coeff[jj].value ;
+ asub[count] = Ri->coeff[jj].index ;
+ count++ ;
+ }
+ }
+ xa[n] = nnz ;
+
+ /* Save memory for SuperLU */
+ __nlSparseMatrixClear(M) ;
+
+
+ /*
+ * Rem: symmetric storage does not seem to work with
+ * SuperLU ... (->deactivated in main SLS::Solver driver)
+ */
+ sCreate_CompCol_Matrix(
+ &A, n, n, nnz, a, asub, xa,
+ SLU_NR, /* Row_wise, no supernode */
+ SLU_S, /* floats */
+ SLU_GE /* general storage */
+ );
+
+ /* Step 2: create vector */
+ sCreate_Dense_Matrix(
+ &B, n, 1, b, n,
+ SLU_DN, /* Fortran-type column-wise storage */
+ SLU_S, /* floats */
+ SLU_GE /* general */
+ );
+
+
+ /* Step 3: get permutation matrix
+ * ------------------------------
+ * com_perm: 0 -> no re-ordering
+ * 1 -> re-ordering for A^t.A
+ * 2 -> re-ordering for A^t+A
+ * 3 -> approximate minimum degree ordering
+ */
+ get_perm_c(do_perm ? 3 : 0, &A, perm) ;
+
+ /* Step 4: call SuperLU main routine
+ * ---------------------------------
+ */
+
+ set_default_options(&options) ;
+ options.ColPerm = MY_PERMC ;
+ StatInit(&stat) ;
+
+ sgssv(&options, &A, perm, perm_r, &L, &U, &B, &stat, &info);
+
+ /* Step 5: get the solution
+ * ------------------------
+ * Fortran-type column-wise storage
+ */
+ vals = (DNformat*)B.Store;
+ rvals = (float*)(vals->nzval);
+ if(info == 0) {
+ for(i = 0; i < n; i++){
+ x[i] = rvals[i];
+ }
+ }
+
+ /* Step 6: cleanup
+ * ---------------
+ */
+
+ /*
+ * For these two ones, only the "store" structure
+ * needs to be deallocated (the arrays have been allocated
+ * by us).
+ */
+ Destroy_SuperMatrix_Store(&A) ;
+ Destroy_SuperMatrix_Store(&B) ;
+
+
+ /*
+ * These ones need to be fully deallocated (they have been
+ * allocated by SuperLU).
+ */
+ Destroy_SuperNode_Matrix(&L);
+ Destroy_CompCol_Matrix(&U);
+
+ __NL_DELETE_ARRAY(xa) ;
+ __NL_DELETE_ARRAY(rhs) ;
+ __NL_DELETE_ARRAY(a) ;
+ __NL_DELETE_ARRAY(asub) ;
+ __NL_DELETE_ARRAY(perm_r) ;
+ __NL_DELETE_ARRAY(perm) ;
+
+ return (info == 0) ;
+}
+
+
+/************************************************************************/
+/* nlSolve() driver routine */
+
+NLboolean nlSolve() {
+ NLboolean result = NL_TRUE ;
+
+ __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED) ;
+ result = __nlSolve_SUPERLU(NL_TRUE) ;
+
+ __nlVectorToVariables() ;
+ __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SOLVED) ;
+
+ return result ;
+}
+
diff --git a/intern/opennl/superlu/Cnames.h b/intern/opennl/superlu/Cnames.h
new file mode 100644
index 00000000000..35ff7b0b665
--- /dev/null
+++ b/intern/opennl/superlu/Cnames.h
@@ -0,0 +1,281 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 1, 1997
+ *
+ */
+#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
+#define __SUPERLU_CNAMES
+
+/* We want this flag, safer than putting in build system */
+#define Add_
+
+/*
+ * These macros define how C routines will be called. ADD_ assumes that
+ * they will be called by fortran, which expects C routines to have an
+ * underscore postfixed to the name (Suns, and the Intel expect this).
+ * NOCHANGE indicates that fortran will be calling, and that it expects
+ * the name called by fortran to be identical to that compiled by the C
+ * (RS6K's do this). UPCASE says it expects C routines called by fortran
+ * to be in all upcase (CRAY wants this).
+ */
+
+#define ADD_ 0
+#define ADD__ 1
+#define NOCHANGE 2
+#define UPCASE 3
+#define C_CALL 4
+
+#ifdef UpCase
+#define F77_CALL_C UPCASE
+#endif
+
+#ifdef NoChange
+#define F77_CALL_C NOCHANGE
+#endif
+
+#ifdef Add_
+#define F77_CALL_C ADD_
+#endif
+
+#ifdef Add__
+#define F77_CALL_C ADD__
+#endif
+
+/* Default */
+#ifndef F77_CALL_C
+#define F77_CALL_C ADD_
+#endif
+
+
+#if (F77_CALL_C == ADD_)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * No redefinition necessary to have following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void dgemm_(...)
+ *
+ * This is the default.
+ */
+
+#endif
+
+#if (F77_CALL_C == ADD__)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * for following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void dgemm__(...)
+ */
+#define sasum_ sasum__
+#define isamax_ isamax__
+#define scopy_ scopy__
+#define sscal_ sscal__
+#define sger_ sger__
+#define snrm2_ snrm2__
+#define ssymv_ ssymv__
+#define sdot_ sdot__
+#define saxpy_ saxpy__
+#define ssyr2_ ssyr2__
+#define srot_ srot__
+#define sgemv_ sgemv__
+#define strsv_ strsv__
+#define sgemm_ sgemm__
+#define strsm_ strsm__
+
+#define dasum_ dasum__
+#define idamax_ idamax__
+#define dcopy_ dcopy__
+#define dscal_ dscal__
+#define dger_ dger__
+#define dnrm2_ dnrm2__
+#define dsymv_ dsymv__
+#define ddot_ ddot__
+#define daxpy_ daxpy__
+#define dsyr2_ dsyr2__
+#define drot_ drot__
+#define dgemv_ dgemv__
+#define dtrsv_ dtrsv__
+#define dgemm_ dgemm__
+#define dtrsm_ dtrsm__
+
+#define scasum_ scasum__
+#define icamax_ icamax__
+#define ccopy_ ccopy__
+#define cscal_ cscal__
+#define scnrm2_ scnrm2__
+#define caxpy_ caxpy__
+#define cgemv_ cgemv__
+#define ctrsv_ ctrsv__
+#define cgemm_ cgemm__
+#define ctrsm_ ctrsm__
+#define cgerc_ cgerc__
+#define chemv_ chemv__
+#define cher2_ cher2__
+
+#define dzasum_ dzasum__
+#define izamax_ izamax__
+#define zcopy_ zcopy__
+#define zscal_ zscal__
+#define dznrm2_ dznrm2__
+#define zaxpy_ zaxpy__
+#define zgemv_ zgemv__
+#define ztrsv_ ztrsv__
+#define zgemm_ zgemm__
+#define ztrsm_ ztrsm__
+#define zgerc_ zgerc__
+#define zhemv_ zhemv__
+#define zher2_ zher2__
+
+#define c_bridge_dgssv_ c_bridge_dgssv__
+#define c_fortran_dgssv_ c_fortran_dgssv__
+#endif
+
+#if (F77_CALL_C == UPCASE)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void DGEMM(...)
+ */
+#define sasum_ SASUM
+#define isamax_ ISAMAX
+#define scopy_ SCOPY
+#define sscal_ SSCAL
+#define sger_ SGER
+#define snrm2_ SNRM2
+#define ssymv_ SSYMV
+#define sdot_ SDOT
+#define saxpy_ SAXPY
+#define ssyr2_ SSYR2
+#define srot_ SROT
+#define sgemv_ SGEMV
+#define strsv_ STRSV
+#define sgemm_ SGEMM
+#define strsm_ STRSM
+
+#define dasum_ SASUM
+#define idamax_ ISAMAX
+#define dcopy_ SCOPY
+#define dscal_ SSCAL
+#define dger_ SGER
+#define dnrm2_ SNRM2
+#define dsymv_ SSYMV
+#define ddot_ SDOT
+#define daxpy_ SAXPY
+#define dsyr2_ SSYR2
+#define drot_ SROT
+#define dgemv_ SGEMV
+#define dtrsv_ STRSV
+#define dgemm_ SGEMM
+#define dtrsm_ STRSM
+
+#define scasum_ SCASUM
+#define icamax_ ICAMAX
+#define ccopy_ CCOPY
+#define cscal_ CSCAL
+#define scnrm2_ SCNRM2
+#define caxpy_ CAXPY
+#define cgemv_ CGEMV
+#define ctrsv_ CTRSV
+#define cgemm_ CGEMM
+#define ctrsm_ CTRSM
+#define cgerc_ CGERC
+#define chemv_ CHEMV
+#define cher2_ CHER2
+
+#define dzasum_ SCASUM
+#define izamax_ ICAMAX
+#define zcopy_ CCOPY
+#define zscal_ CSCAL
+#define dznrm2_ SCNRM2
+#define zaxpy_ CAXPY
+#define zgemv_ CGEMV
+#define ztrsv_ CTRSV
+#define zgemm_ CGEMM
+#define ztrsm_ CTRSM
+#define zgerc_ CGERC
+#define zhemv_ CHEMV
+#define zher2_ CHER2
+
+#define c_bridge_dgssv_ C_BRIDGE_DGSSV
+#define c_fortran_dgssv_ C_FORTRAN_DGSSV
+#endif
+
+#if (F77_CALL_C == NOCHANGE)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * for following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void dgemm(...)
+ */
+#define sasum_ sasum
+#define isamax_ isamax
+#define scopy_ scopy
+#define sscal_ sscal
+#define sger_ sger
+#define snrm2_ snrm2
+#define ssymv_ ssymv
+#define sdot_ sdot
+#define saxpy_ saxpy
+#define ssyr2_ ssyr2
+#define srot_ srot
+#define sgemv_ sgemv
+#define strsv_ strsv
+#define sgemm_ sgemm
+#define strsm_ strsm
+
+#define dasum_ dasum
+#define idamax_ idamax
+#define dcopy_ dcopy
+#define dscal_ dscal
+#define dger_ dger
+#define dnrm2_ dnrm2
+#define dsymv_ dsymv
+#define ddot_ ddot
+#define daxpy_ daxpy
+#define dsyr2_ dsyr2
+#define drot_ drot
+#define dgemv_ dgemv
+#define dtrsv_ dtrsv
+#define dgemm_ dgemm
+#define dtrsm_ dtrsm
+
+#define scasum_ scasum
+#define icamax_ icamax
+#define ccopy_ ccopy
+#define cscal_ cscal
+#define scnrm2_ scnrm2
+#define caxpy_ caxpy
+#define cgemv_ cgemv
+#define ctrsv_ ctrsv
+#define cgemm_ cgemm
+#define ctrsm_ ctrsm
+#define cgerc_ cgerc
+#define chemv_ chemv
+#define cher2_ cher2
+
+#define dzasum_ dzasum
+#define izamax_ izamax
+#define zcopy_ zcopy
+#define zscal_ zscal
+#define dznrm2_ dznrm2
+#define zaxpy_ zaxpy
+#define zgemv_ zgemv
+#define ztrsv_ ztrsv
+#define zgemm_ zgemm
+#define ztrsm_ ztrsm
+#define zgerc_ zgerc
+#define zhemv_ zhemv
+#define zher2_ zher2
+
+#define c_bridge_dgssv_ c_bridge_dgssv
+#define c_fortran_dgssv_ c_fortran_dgssv
+#endif
+
+#endif /* __SUPERLU_CNAMES */
diff --git a/intern/opennl/superlu/Makefile b/intern/opennl/superlu/Makefile
new file mode 100644
index 00000000000..942ceebc79c
--- /dev/null
+++ b/intern/opennl/superlu/Makefile
@@ -0,0 +1,40 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License. See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): none yet.
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl intern Makefile
+#
+
+LIBNAME = superlu
+DIR = $(OCGDIR)/intern/$(LIBNAME)
+
+include nan_compile.mk
+
+CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
+
diff --git a/intern/opennl/superlu/colamd.c b/intern/opennl/superlu/colamd.c
new file mode 100644
index 00000000000..b60718f9938
--- /dev/null
+++ b/intern/opennl/superlu/colamd.c
@@ -0,0 +1,2583 @@
+/* ========================================================================== */
+/* === colamd - a sparse matrix column ordering algorithm =================== */
+/* ========================================================================== */
+
+/*
+ colamd: An approximate minimum degree column ordering algorithm.
+
+ Purpose:
+
+ Colamd computes a permutation Q such that the Cholesky factorization of
+ (AQ)'(AQ) has less fill-in and requires fewer floating point operations
+ than A'A. This also provides a good ordering for sparse partial
+ pivoting methods, P(AQ) = LU, where Q is computed prior to numerical
+ factorization, and P is computed during numerical factorization via
+ conventional partial pivoting with row interchanges. Colamd is the
+ column ordering method used in SuperLU, part of the ScaLAPACK library.
+ It is also available as user-contributed software for Matlab 5.2,
+ available from MathWorks, Inc. (http://www.mathworks.com). This
+ routine can be used in place of COLMMD in Matlab. By default, the \
+ and / operators in Matlab perform a column ordering (using COLMMD)
+ prior to LU factorization using sparse partial pivoting, in the
+ built-in Matlab LU(A) routine.
+
+ Authors:
+
+ The authors of the code itself are Stefan I. Larimore and Timothy A.
+ Davis (davis@cise.ufl.edu), University of Florida. The algorithm was
+ developed in collaboration with John Gilbert, Xerox PARC, and Esmond
+ Ng, Oak Ridge National Laboratory.
+
+ Date:
+
+ August 3, 1998. Version 1.0.
+
+ Acknowledgements:
+
+ This work was supported by the National Science Foundation, under
+ grants DMS-9504974 and DMS-9803599.
+
+ Notice:
+
+ Copyright (c) 1998 by the University of Florida. All Rights Reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ User documentation of any code that uses this code must cite the
+ Authors, the Copyright, and "Used by permission." If this code is
+ accessible from within Matlab, then typing "help colamd" or "colamd"
+ (with no arguments) must cite the Authors. Permission to modify the
+ code and to distribute modified code is granted, provided the above
+ notices are retained, and a notice that the code was modified is
+ included with the above copyright notice. You must also retain the
+ Availability information below, of the original version.
+
+ This software is provided free of charge.
+
+ Availability:
+
+ This file is located at
+
+ http://www.cise.ufl.edu/~davis/colamd/colamd.c
+
+ The colamd.h file is required, located in the same directory.
+ The colamdmex.c file provides a Matlab interface for colamd.
+ The symamdmex.c file provides a Matlab interface for symamd, which is
+ a symmetric ordering based on this code, colamd.c. All codes are
+ purely ANSI C compliant (they use no Unix-specific routines, include
+ files, etc.).
+*/
+
+/* ========================================================================== */
+/* === Description of user-callable routines ================================ */
+/* ========================================================================== */
+
+/*
+ Each user-callable routine (declared as PUBLIC) is briefly described below.
+ Refer to the comments preceding each routine for more details.
+
+ ----------------------------------------------------------------------------
+ colamd_recommended:
+ ----------------------------------------------------------------------------
+
+ Usage:
+
+ Alen = colamd_recommended (nnz, n_row, n_col) ;
+
+ Purpose:
+
+ Returns recommended value of Alen for use by colamd. Returns -1
+ if any input argument is negative.
+
+ Arguments:
+
+ int nnz ; Number of nonzeros in the matrix A. This must
+ be the same value as p [n_col] in the call to
+ colamd - otherwise you will get a wrong value
+ of the recommended memory to use.
+ int n_row ; Number of rows in the matrix A.
+ int n_col ; Number of columns in the matrix A.
+
+ ----------------------------------------------------------------------------
+ colamd_set_defaults:
+ ----------------------------------------------------------------------------
+
+ Usage:
+
+ colamd_set_defaults (knobs) ;
+
+ Purpose:
+
+ Sets the default parameters.
+
+ Arguments:
+
+ double knobs [COLAMD_KNOBS] ; Output only.
+
+ Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries
+ are removed prior to ordering. Columns with more than
+ (knobs [COLAMD_DENSE_COL] * n_row) entries are removed
+ prior to ordering, and placed last in the output column
+ ordering. Default values of these two knobs are both 0.5.
+ Currently, only knobs [0] and knobs [1] are used, but future
+ versions may use more knobs. If so, they will be properly set
+ to their defaults by the future version of colamd_set_defaults,
+ so that the code that calls colamd will not need to change,
+ assuming that you either use colamd_set_defaults, or pass a
+ (double *) NULL pointer as the knobs array to colamd.
+
+ ----------------------------------------------------------------------------
+ colamd:
+ ----------------------------------------------------------------------------
+
+ Usage:
+
+ colamd (n_row, n_col, Alen, A, p, knobs) ;
+
+ Purpose:
+
+ Computes a column ordering (Q) of A such that P(AQ)=LU or
+ (AQ)'AQ=LL' have less fill-in and require fewer floating point
+ operations than factorizing the unpermuted matrix A or A'A,
+ respectively.
+
+ Arguments:
+
+ int n_row ;
+
+ Number of rows in the matrix A.
+ Restriction: n_row >= 0.
+ Colamd returns FALSE if n_row is negative.
+
+ int n_col ;
+
+ Number of columns in the matrix A.
+ Restriction: n_col >= 0.
+ Colamd returns FALSE if n_col is negative.
+
+ int Alen ;
+
+ Restriction (see note):
+ Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS
+ Colamd returns FALSE if these conditions are not met.
+
+ Note: this restriction makes an modest assumption regarding
+ the size of the two typedef'd structures, below. We do,
+ however, guarantee that
+ Alen >= colamd_recommended (nnz, n_row, n_col)
+ will be sufficient.
+
+ int A [Alen] ; Input argument, stats on output.
+
+ A is an integer array of size Alen. Alen must be at least as
+ large as the bare minimum value given above, but this is very
+ low, and can result in excessive run time. For best
+ performance, we recommend that Alen be greater than or equal to
+ colamd_recommended (nnz, n_row, n_col), which adds
+ nnz/5 to the bare minimum value given above.
+
+ On input, the row indices of the entries in column c of the
+ matrix are held in A [(p [c]) ... (p [c+1]-1)]. The row indices
+ in a given column c need not be in ascending order, and
+ duplicate row indices may be be present. However, colamd will
+ work a little faster if both of these conditions are met
+ (Colamd puts the matrix into this format, if it finds that the
+ the conditions are not met).
+
+ The matrix is 0-based. That is, rows are in the range 0 to
+ n_row-1, and columns are in the range 0 to n_col-1. Colamd
+ returns FALSE if any row index is out of range.
+
+ The contents of A are modified during ordering, and are thus
+ undefined on output with the exception of a few statistics
+ about the ordering (A [0..COLAMD_STATS-1]):
+ A [0]: number of dense or empty rows ignored.
+ A [1]: number of dense or empty columns ignored (and ordered
+ last in the output permutation p)
+ A [2]: number of garbage collections performed.
+ A [3]: 0, if all row indices in each column were in sorted
+ order, and no duplicates were present.
+ 1, otherwise (in which case colamd had to do more work)
+ Note that a row can become "empty" if it contains only
+ "dense" and/or "empty" columns, and similarly a column can
+ become "empty" if it only contains "dense" and/or "empty" rows.
+ Future versions may return more statistics in A, but the usage
+ of these 4 entries in A will remain unchanged.
+
+ int p [n_col+1] ; Both input and output argument.
+
+ p is an integer array of size n_col+1. On input, it holds the
+ "pointers" for the column form of the matrix A. Column c of
+ the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first
+ entry, p [0], must be zero, and p [c] <= p [c+1] must hold
+ for all c in the range 0 to n_col-1. The value p [n_col] is
+ thus the total number of entries in the pattern of the matrix A.
+ Colamd returns FALSE if these conditions are not met.
+
+ On output, if colamd returns TRUE, the array p holds the column
+ permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is
+ the first column index in the new ordering, and p [n_col-1] is
+ the last. That is, p [k] = j means that column j of A is the
+ kth pivot column, in AQ, where k is in the range 0 to n_col-1
+ (p [0] = j means that column j of A is the first column in AQ).
+
+ If colamd returns FALSE, then no permutation is returned, and
+ p is undefined on output.
+
+ double knobs [COLAMD_KNOBS] ; Input only.
+
+ See colamd_set_defaults for a description. If the knobs array
+ is not present (that is, if a (double *) NULL pointer is passed
+ in its place), then the default values of the parameters are
+ used instead.
+
+*/
+
+
+/* ========================================================================== */
+/* === Include files ======================================================== */
+/* ========================================================================== */
+
+/* limits.h: the largest positive integer (INT_MAX) */
+#include <limits.h>
+
+/* colamd.h: knob array size, stats output size, and global prototypes */
+#include "colamd.h"
+
+/* ========================================================================== */
+/* === Scaffolding code definitions ======================================== */
+/* ========================================================================== */
+
+/* Ensure that debugging is turned off: */
+#ifndef NDEBUG
+#define NDEBUG
+#endif
+
+/* assert.h: the assert macro (no debugging if NDEBUG is defined) */
+#include <assert.h>
+
+/*
+ Our "scaffolding code" philosophy: In our opinion, well-written library
+ code should keep its "debugging" code, and just normally have it turned off
+ by the compiler so as not to interfere with performance. This serves
+ several purposes:
+
+ (1) assertions act as comments to the reader, telling you what the code
+ expects at that point. All assertions will always be true (unless
+ there really is a bug, of course).
+
+ (2) leaving in the scaffolding code assists anyone who would like to modify
+ the code, or understand the algorithm (by reading the debugging output,
+ one can get a glimpse into what the code is doing).
+
+ (3) (gasp!) for actually finding bugs. This code has been heavily tested
+ and "should" be fully functional and bug-free ... but you never know...
+
+ To enable debugging, comment out the "#define NDEBUG" above. The code will
+ become outrageously slow when debugging is enabled. To control the level of
+ debugging output, set an environment variable D to 0 (little), 1 (some),
+ 2, 3, or 4 (lots).
+*/
+
+/* ========================================================================== */
+/* === Row and Column structures ============================================ */
+/* ========================================================================== */
+
+typedef struct ColInfo_struct
+{
+ int start ; /* index for A of first row in this column, or DEAD */
+ /* if column is dead */
+ int length ; /* number of rows in this column */
+ union
+ {
+ int thickness ; /* number of original columns represented by this */
+ /* col, if the column is alive */
+ int parent ; /* parent in parent tree super-column structure, if */
+ /* the column is dead */
+ } shared1 ;
+ union
+ {
+ int score ; /* the score used to maintain heap, if col is alive */
+ int order ; /* pivot ordering of this column, if col is dead */
+ } shared2 ;
+ union
+ {
+ int headhash ; /* head of a hash bucket, if col is at the head of */
+ /* a degree list */
+ int hash ; /* hash value, if col is not in a degree list */
+ int prev ; /* previous column in degree list, if col is in a */
+ /* degree list (but not at the head of a degree list) */
+ } shared3 ;
+ union
+ {
+ int degree_next ; /* next column, if col is in a degree list */
+ int hash_next ; /* next column, if col is in a hash list */
+ } shared4 ;
+
+} ColInfo ;
+
+typedef struct RowInfo_struct
+{
+ int start ; /* index for A of first col in this row */
+ int length ; /* number of principal columns in this row */
+ union
+ {
+ int degree ; /* number of principal & non-principal columns in row */
+ int p ; /* used as a row pointer in init_rows_cols () */
+ } shared1 ;
+ union
+ {
+ int mark ; /* for computing set differences and marking dead rows*/
+ int first_column ;/* first column in row (used in garbage collection) */
+ } shared2 ;
+
+} RowInfo ;
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+#define ONES_COMPLEMENT(r) (-(r)-1)
+
+#define TRUE (1)
+#define FALSE (0)
+#define EMPTY (-1)
+
+/* Row and column status */
+#define ALIVE (0)
+#define DEAD (-1)
+
+/* Column status */
+#define DEAD_PRINCIPAL (-1)
+#define DEAD_NON_PRINCIPAL (-2)
+
+/* Macros for row and column status update and checking. */
+#define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark)
+#define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE)
+#define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE)
+#define COL_IS_DEAD(c) (Col [c].start < ALIVE)
+#define COL_IS_ALIVE(c) (Col [c].start >= ALIVE)
+#define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL)
+#define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; }
+#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; }
+#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; }
+
+/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
+#define PUBLIC
+#define PRIVATE static
+
+/* ========================================================================== */
+/* === Prototypes of PRIVATE routines ======================================= */
+/* ========================================================================== */
+
+PRIVATE int init_rows_cols
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int p []
+) ;
+
+PRIVATE void init_scoring
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int head [],
+ double knobs [COLAMD_KNOBS],
+ int *p_n_row2,
+ int *p_n_col2,
+ int *p_max_deg
+) ;
+
+PRIVATE int find_ordering
+(
+ int n_row,
+ int n_col,
+ int Alen,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int head [],
+ int n_col2,
+ int max_deg,
+ int pfree
+) ;
+
+PRIVATE void order_children
+(
+ int n_col,
+ ColInfo Col [],
+ int p []
+) ;
+
+PRIVATE void detect_super_cols
+(
+#ifndef NDEBUG
+ int n_col,
+ RowInfo Row [],
+#endif
+ ColInfo Col [],
+ int A [],
+ int head [],
+ int row_start,
+ int row_length
+) ;
+
+PRIVATE int garbage_collection
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int *pfree
+) ;
+
+PRIVATE int clear_mark
+(
+ int n_row,
+ RowInfo Row []
+) ;
+
+/* ========================================================================== */
+/* === Debugging definitions ================================================ */
+/* ========================================================================== */
+
+#ifndef NDEBUG
+
+/* === With debugging ======================================================= */
+
+/* stdlib.h: for getenv and atoi, to get debugging level from environment */
+#include <stdlib.h>
+
+/* stdio.h: for printf (no printing if debugging is turned off) */
+#include <stdio.h>
+
+PRIVATE void debug_deg_lists
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int head [],
+ int min_score,
+ int should,
+ int max_deg
+) ;
+
+PRIVATE void debug_mark
+(
+ int n_row,
+ RowInfo Row [],
+ int tag_mark,
+ int max_mark
+) ;
+
+PRIVATE void debug_matrix
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A []
+) ;
+
+PRIVATE void debug_structures
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int n_col2
+) ;
+
+/* the following is the *ONLY* global variable in this file, and is only */
+/* present when debugging */
+
+PRIVATE int debug_colamd ; /* debug print level */
+
+#define DEBUG0(params) { (void) printf params ; }
+#define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; }
+#define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; }
+#define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; }
+#define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; }
+
+#else
+
+/* === No debugging ========================================================= */
+
+#define DEBUG0(params) ;
+#define DEBUG1(params) ;
+#define DEBUG2(params) ;
+#define DEBUG3(params) ;
+#define DEBUG4(params) ;
+
+#endif
+
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === USER-CALLABLE ROUTINES: ============================================== */
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === colamd_recommended =================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd_recommended routine returns the suggested size for Alen. This
+ value has been determined to provide good balance between the number of
+ garbage collections and the memory requirements for colamd.
+*/
+
+PUBLIC int colamd_recommended /* returns recommended value of Alen. */
+(
+ /* === Parameters ======================================================= */
+
+ int nnz, /* number of nonzeros in A */
+ int n_row, /* number of rows in A */
+ int n_col /* number of columns in A */
+)
+{
+ /* === Local variables ================================================== */
+
+ int minimum ; /* bare minimum requirements */
+ int recommended ; /* recommended value of Alen */
+
+ if (nnz < 0 || n_row < 0 || n_col < 0)
+ {
+ /* return -1 if any input argument is corrupted */
+ DEBUG0 (("colamd_recommended error!")) ;
+ DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ;
+ return (-1) ;
+ }
+
+ minimum =
+ 2 * (nnz) /* for A */
+ + (((n_col) + 1) * sizeof (ColInfo) / sizeof (int)) /* for Col */
+ + (((n_row) + 1) * sizeof (RowInfo) / sizeof (int)) /* for Row */
+ + n_col /* minimum elbow room to guarrantee success */
+ + COLAMD_STATS ; /* for output statistics */
+
+ /* recommended is equal to the minumum plus enough memory to keep the */
+ /* number garbage collections low */
+ recommended = minimum + nnz/5 ;
+
+ return (recommended) ;
+}
+
+
+/* ========================================================================== */
+/* === colamd_set_defaults ================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd_set_defaults routine sets the default values of the user-
+ controllable parameters for colamd:
+
+ knobs [0] rows with knobs[0]*n_col entries or more are removed
+ prior to ordering.
+
+ knobs [1] columns with knobs[1]*n_row entries or more are removed
+ prior to ordering, and placed last in the column
+ permutation.
+
+ knobs [2..19] unused, but future versions might use this
+*/
+
+PUBLIC void colamd_set_defaults
+(
+ /* === Parameters ======================================================= */
+
+ double knobs [COLAMD_KNOBS] /* knob array */
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ;
+
+ if (!knobs)
+ {
+ return ; /* no knobs to initialize */
+ }
+ for (i = 0 ; i < COLAMD_KNOBS ; i++)
+ {
+ knobs [i] = 0 ;
+ }
+ knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */
+ knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */
+}
+
+
+/* ========================================================================== */
+/* === colamd =============================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd routine computes a column ordering Q of a sparse matrix
+ A such that the LU factorization P(AQ) = LU remains sparse, where P is
+ selected via partial pivoting. The routine can also be viewed as
+ providing a permutation Q such that the Cholesky factorization
+ (AQ)'(AQ) = LL' remains sparse.
+
+ On input, the nonzero patterns of the columns of A are stored in the
+ array A, in order 0 to n_col-1. A is held in 0-based form (rows in the
+ range 0 to n_row-1 and columns in the range 0 to n_col-1). Row indices
+ for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0,
+ and thus p [n_col] is the number of entries in A. The matrix is
+ destroyed on output. The row indices within each column do not have to
+ be sorted (from small to large row indices), and duplicate row indices
+ may be present. However, colamd will work a little faster if columns are
+ sorted and no duplicates are present. Matlab 5.2 always passes the matrix
+ with sorted columns, and no duplicates.
+
+ The integer array A is of size Alen. Alen must be at least of size
+ (where nnz is the number of entries in A):
+
+ nnz for the input column form of A
+ + nnz for a row form of A that colamd generates
+ + 6*(n_col+1) for a ColInfo Col [0..n_col] array
+ (this assumes sizeof (ColInfo) is 6 int's).
+ + 4*(n_row+1) for a RowInfo Row [0..n_row] array
+ (this assumes sizeof (RowInfo) is 4 int's).
+ + elbow_room must be at least n_col. We recommend at least
+ nnz/5 in addition to that. If sufficient,
+ changes in the elbow room affect the ordering
+ time only, not the ordering itself.
+ + COLAMD_STATS for the output statistics
+
+ Colamd returns FALSE is memory is insufficient, or TRUE otherwise.
+
+ On input, the caller must specify:
+
+ n_row the number of rows of A
+ n_col the number of columns of A
+ Alen the size of the array A
+ A [0 ... nnz-1] the row indices, where nnz = p [n_col]
+ A [nnz ... Alen-1] (need not be initialized by the user)
+ p [0 ... n_col] the column pointers, p [0] = 0, and p [n_col]
+ is the number of entries in A. Column c of A
+ is stored in A [p [c] ... p [c+1]-1].
+ knobs [0 ... 19] a set of parameters that control the behavior
+ of colamd. If knobs is a NULL pointer the
+ defaults are used. The user-callable
+ colamd_set_defaults routine sets the default
+ parameters. See that routine for a description
+ of the user-controllable parameters.
+
+ If the return value of Colamd is TRUE, then on output:
+
+ p [0 ... n_col-1] the column permutation. p [0] is the first
+ column index, and p [n_col-1] is the last.
+ That is, p [k] = j means that column j of A
+ is the kth column of AQ.
+
+ A is undefined on output (the matrix pattern is
+ destroyed), except for the following statistics:
+
+ A [0] the number of dense (or empty) rows ignored
+ A [1] the number of dense (or empty) columms. These
+ are ordered last, in their natural order.
+ A [2] the number of garbage collections performed.
+ If this is excessive, then you would have
+ gotten your results faster if Alen was larger.
+ A [3] 0, if all row indices in each column were in
+ sorted order and no duplicates were present.
+ 1, if there were unsorted or duplicate row
+ indices in the input. You would have gotten
+ your results faster if A [3] was returned as 0.
+
+ If the return value of Colamd is FALSE, then A and p are undefined on
+ output.
+*/
+
+PUBLIC int colamd /* returns TRUE if successful */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows in A */
+ int n_col, /* number of columns in A */
+ int Alen, /* length of A */
+ int A [], /* row indices of A */
+ int p [], /* pointers to columns in A */
+ double knobs [COLAMD_KNOBS] /* parameters (uses defaults if NULL) */
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ; /* loop index */
+ int nnz ; /* nonzeros in A */
+ int Row_size ; /* size of Row [], in integers */
+ int Col_size ; /* size of Col [], in integers */
+ int elbow_room ; /* remaining free space */
+ RowInfo *Row ; /* pointer into A of Row [0..n_row] array */
+ ColInfo *Col ; /* pointer into A of Col [0..n_col] array */
+ int n_col2 ; /* number of non-dense, non-empty columns */
+ int n_row2 ; /* number of non-dense, non-empty rows */
+ int ngarbage ; /* number of garbage collections performed */
+ int max_deg ; /* maximum row degree */
+ double default_knobs [COLAMD_KNOBS] ; /* default knobs knobs array */
+ int init_result ; /* return code from initialization */
+
+#ifndef NDEBUG
+ debug_colamd = 0 ; /* no debug printing */
+ /* get "D" environment variable, which gives the debug printing level */
+ if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ;
+ DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ;
+#endif
+
+ /* === Check the input arguments ======================================== */
+
+ if (n_row < 0 || n_col < 0 || !A || !p)
+ {
+ /* n_row and n_col must be non-negative, A and p must be present */
+ DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ;
+ return (FALSE) ;
+ }
+ nnz = p [n_col] ;
+ if (nnz < 0 || p [0] != 0)
+ {
+ /* nnz must be non-negative, and p [0] must be zero */
+ DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ;
+ return (FALSE) ;
+ }
+
+ /* === If no knobs, set default parameters ============================== */
+
+ if (!knobs)
+ {
+ knobs = default_knobs ;
+ colamd_set_defaults (knobs) ;
+ }
+
+ /* === Allocate the Row and Col arrays from array A ===================== */
+
+ Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ;
+ Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ;
+ elbow_room = Alen - (2*nnz + Col_size + Row_size) ;
+ if (elbow_room < n_col + COLAMD_STATS)
+ {
+ /* not enough space in array A to perform the ordering */
+ DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ;
+ return (FALSE) ;
+ }
+ Alen = 2*nnz + elbow_room ;
+ Col = (ColInfo *) &A [Alen] ;
+ Row = (RowInfo *) &A [Alen + Col_size] ;
+
+ /* === Construct the row and column data structures ===================== */
+
+ init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ;
+ if (init_result == -1)
+ {
+ /* input matrix is invalid */
+ DEBUG0 (("colamd error! matrix invalid\n")) ;
+ return (FALSE) ;
+ }
+
+ /* === Initialize scores, kill dense rows/columns ======================= */
+
+ init_scoring (n_row, n_col, Row, Col, A, p, knobs,
+ &n_row2, &n_col2, &max_deg) ;
+
+ /* === Order the supercolumns =========================================== */
+
+ ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
+ n_col2, max_deg, 2*nnz) ;
+
+ /* === Order the non-principal columns ================================== */
+
+ order_children (n_col, Col, p) ;
+
+ /* === Return statistics in A =========================================== */
+
+ for (i = 0 ; i < COLAMD_STATS ; i++)
+ {
+ A [i] = 0 ;
+ }
+ A [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+ A [COLAMD_DENSE_COL] = n_col - n_col2 ;
+ A [COLAMD_DEFRAG_COUNT] = ngarbage ;
+ A [COLAMD_JUMBLED_COLS] = init_result ;
+
+ return (TRUE) ;
+}
+
+
+/* ========================================================================== */
+/* === NON-USER-CALLABLE ROUTINES: ========================================== */
+/* ========================================================================== */
+
+/* There are no user-callable routines beyond this point in the file */
+
+
+/* ========================================================================== */
+/* === init_rows_cols ======================================================= */
+/* ========================================================================== */
+
+/*
+ Takes the column form of the matrix in A and creates the row form of the
+ matrix. Also, row and column attributes are stored in the Col and Row
+ structs. If the columns are un-sorted or contain duplicate row indices,
+ this routine will also sort and remove duplicate row indices from the
+ column form of the matrix. Returns -1 on error, 1 if columns jumbled,
+ or 0 if columns not jumbled. Not user-callable.
+*/
+
+PRIVATE int init_rows_cols /* returns status code */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows of A */
+ int n_col, /* number of columns of A */
+ RowInfo Row [], /* of size n_row+1 */
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* row indices of A, of size Alen */
+ int p [] /* pointers to columns in A, of size n_col+1 */
+)
+{
+ /* === Local variables ================================================== */
+
+ int col ; /* a column index */
+ int row ; /* a row index */
+ int *cp ; /* a column pointer */
+ int *cp_end ; /* a pointer to the end of a column */
+ int *rp ; /* a row pointer */
+ int *rp_end ; /* a pointer to the end of a row */
+ int last_start ; /* start index of previous column in A */
+ int start ; /* start index of column in A */
+ int last_row ; /* previous row */
+ int jumbled_columns ; /* indicates if columns are jumbled */
+
+ /* === Initialize columns, and check column pointers ==================== */
+
+ last_start = 0 ;
+ for (col = 0 ; col < n_col ; col++)
+ {
+ start = p [col] ;
+ if (start < last_start)
+ {
+ /* column pointers must be non-decreasing */
+ DEBUG0 (("colamd error! last p %d p [col] %d\n",last_start,start));
+ return (-1) ;
+ }
+ Col [col].start = start ;
+ Col [col].length = p [col+1] - start ;
+ Col [col].shared1.thickness = 1 ;
+ Col [col].shared2.score = 0 ;
+ Col [col].shared3.prev = EMPTY ;
+ Col [col].shared4.degree_next = EMPTY ;
+ last_start = start ;
+ }
+ /* must check the end pointer for last column */
+ if (p [n_col] < last_start)
+ {
+ /* column pointers must be non-decreasing */
+ DEBUG0 (("colamd error! last p %d p [n_col] %d\n",p[col],last_start)) ;
+ return (-1) ;
+ }
+
+ /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
+
+ /* === Scan columns, compute row degrees, and check row indices ========= */
+
+ jumbled_columns = FALSE ;
+
+ for (row = 0 ; row < n_row ; row++)
+ {
+ Row [row].length = 0 ;
+ Row [row].shared2.mark = -1 ;
+ }
+
+ for (col = 0 ; col < n_col ; col++)
+ {
+ last_row = -1 ;
+
+ cp = &A [p [col]] ;
+ cp_end = &A [p [col+1]] ;
+
+ while (cp < cp_end)
+ {
+ row = *cp++ ;
+
+ /* make sure row indices within range */
+ if (row < 0 || row >= n_row)
+ {
+ DEBUG0 (("colamd error! col %d row %d last_row %d\n",
+ col, row, last_row)) ;
+ return (-1) ;
+ }
+ else if (row <= last_row)
+ {
+ /* row indices are not sorted or repeated, thus cols */
+ /* are jumbled */
+ jumbled_columns = TRUE ;
+ }
+ /* prevent repeated row from being counted */
+ if (Row [row].shared2.mark != col)
+ {
+ Row [row].length++ ;
+ Row [row].shared2.mark = col ;
+ last_row = row ;
+ }
+ else
+ {
+ /* this is a repeated entry in the column, */
+ /* it will be removed */
+ Col [col].length-- ;
+ }
+ }
+ }
+
+ /* === Compute row pointers ============================================= */
+
+ /* row form of the matrix starts directly after the column */
+ /* form of matrix in A */
+ Row [0].start = p [n_col] ;
+ Row [0].shared1.p = Row [0].start ;
+ Row [0].shared2.mark = -1 ;
+ for (row = 1 ; row < n_row ; row++)
+ {
+ Row [row].start = Row [row-1].start + Row [row-1].length ;
+ Row [row].shared1.p = Row [row].start ;
+ Row [row].shared2.mark = -1 ;
+ }
+
+ /* === Create row form ================================================== */
+
+ if (jumbled_columns)
+ {
+ /* if cols jumbled, watch for repeated row indices */
+ for (col = 0 ; col < n_col ; col++)
+ {
+ cp = &A [p [col]] ;
+ cp_end = &A [p [col+1]] ;
+ while (cp < cp_end)
+ {
+ row = *cp++ ;
+ if (Row [row].shared2.mark != col)
+ {
+ A [(Row [row].shared1.p)++] = col ;
+ Row [row].shared2.mark = col ;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* if cols not jumbled, we don't need the mark (this is faster) */
+ for (col = 0 ; col < n_col ; col++)
+ {
+ cp = &A [p [col]] ;
+ cp_end = &A [p [col+1]] ;
+ while (cp < cp_end)
+ {
+ A [(Row [*cp++].shared1.p)++] = col ;
+ }
+ }
+ }
+
+ /* === Clear the row marks and set row degrees ========================== */
+
+ for (row = 0 ; row < n_row ; row++)
+ {
+ Row [row].shared2.mark = 0 ;
+ Row [row].shared1.degree = Row [row].length ;
+ }
+
+ /* === See if we need to re-create columns ============================== */
+
+ if (jumbled_columns)
+ {
+
+#ifndef NDEBUG
+ /* make sure column lengths are correct */
+ for (col = 0 ; col < n_col ; col++)
+ {
+ p [col] = Col [col].length ;
+ }
+ for (row = 0 ; row < n_row ; row++)
+ {
+ rp = &A [Row [row].start] ;
+ rp_end = rp + Row [row].length ;
+ while (rp < rp_end)
+ {
+ p [*rp++]-- ;
+ }
+ }
+ for (col = 0 ; col < n_col ; col++)
+ {
+ assert (p [col] == 0) ;
+ }
+ /* now p is all zero (different than when debugging is turned off) */
+#endif
+
+ /* === Compute col pointers ========================================= */
+
+ /* col form of the matrix starts at A [0]. */
+ /* Note, we may have a gap between the col form and the row */
+ /* form if there were duplicate entries, if so, it will be */
+ /* removed upon the first garbage collection */
+ Col [0].start = 0 ;
+ p [0] = Col [0].start ;
+ for (col = 1 ; col < n_col ; col++)
+ {
+ /* note that the lengths here are for pruned columns, i.e. */
+ /* no duplicate row indices will exist for these columns */
+ Col [col].start = Col [col-1].start + Col [col-1].length ;
+ p [col] = Col [col].start ;
+ }
+
+ /* === Re-create col form =========================================== */
+
+ for (row = 0 ; row < n_row ; row++)
+ {
+ rp = &A [Row [row].start] ;
+ rp_end = rp + Row [row].length ;
+ while (rp < rp_end)
+ {
+ A [(p [*rp++])++] = row ;
+ }
+ }
+ return (1) ;
+ }
+ else
+ {
+ /* no columns jumbled (this is faster) */
+ return (0) ;
+ }
+}
+
+
+/* ========================================================================== */
+/* === init_scoring ========================================================= */
+/* ========================================================================== */
+
+/*
+ Kills dense or empty columns and rows, calculates an initial score for
+ each column, and places all columns in the degree lists. Not user-callable.
+*/
+
+PRIVATE void init_scoring
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows of A */
+ int n_col, /* number of columns of A */
+ RowInfo Row [], /* of size n_row+1 */
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* column form and row form of A */
+ int head [], /* of size n_col+1 */
+ double knobs [COLAMD_KNOBS],/* parameters */
+ int *p_n_row2, /* number of non-dense, non-empty rows */
+ int *p_n_col2, /* number of non-dense, non-empty columns */
+ int *p_max_deg /* maximum row degree */
+)
+{
+ /* === Local variables ================================================== */
+
+ int c ; /* a column index */
+ int r, row ; /* a row index */
+ int *cp ; /* a column pointer */
+ int deg ; /* degree (# entries) of a row or column */
+ int *cp_end ; /* a pointer to the end of a column */
+ int *new_cp ; /* new column pointer */
+ int col_length ; /* length of pruned column */
+ int score ; /* current column score */
+ int n_col2 ; /* number of non-dense, non-empty columns */
+ int n_row2 ; /* number of non-dense, non-empty rows */
+ int dense_row_count ; /* remove rows with more entries than this */
+ int dense_col_count ; /* remove cols with more entries than this */
+ int min_score ; /* smallest column score */
+ int max_deg ; /* maximum row degree */
+ int next_col ; /* Used to add to degree list.*/
+#ifndef NDEBUG
+ int debug_count ; /* debug only. */
+#endif
+
+ /* === Extract knobs ==================================================== */
+
+ dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
+ dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+ DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+ max_deg = 0 ;
+ n_col2 = n_col ;
+ n_row2 = n_row ;
+
+ /* === Kill empty columns =============================================== */
+
+ /* Put the empty columns at the end in their natural, so that LU */
+ /* factorization can proceed as far as possible. */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ deg = Col [c].length ;
+ if (deg == 0)
+ {
+ /* this is a empty column, kill and order it last */
+ Col [c].shared2.order = --n_col2 ;
+ KILL_PRINCIPAL_COL (c) ;
+ }
+ }
+ DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ;
+
+ /* === Kill dense columns =============================================== */
+
+ /* Put the dense columns at the end, in their natural order */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ /* skip any dead columns */
+ if (COL_IS_DEAD (c))
+ {
+ continue ;
+ }
+ deg = Col [c].length ;
+ if (deg > dense_col_count)
+ {
+ /* this is a dense column, kill and order it last */
+ Col [c].shared2.order = --n_col2 ;
+ /* decrement the row degrees */
+ cp = &A [Col [c].start] ;
+ cp_end = cp + Col [c].length ;
+ while (cp < cp_end)
+ {
+ Row [*cp++].shared1.degree-- ;
+ }
+ KILL_PRINCIPAL_COL (c) ;
+ }
+ }
+ DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ;
+
+ /* === Kill dense and empty rows ======================================== */
+
+ for (r = 0 ; r < n_row ; r++)
+ {
+ deg = Row [r].shared1.degree ;
+ assert (deg >= 0 && deg <= n_col) ;
+ if (deg > dense_row_count || deg == 0)
+ {
+ /* kill a dense or empty row */
+ KILL_ROW (r) ;
+ --n_row2 ;
+ }
+ else
+ {
+ /* keep track of max degree of remaining rows */
+ max_deg = MAX (max_deg, deg) ;
+ }
+ }
+ DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ;
+
+ /* === Compute initial column scores ==================================== */
+
+ /* At this point the row degrees are accurate. They reflect the number */
+ /* of "live" (non-dense) columns in each row. No empty rows exist. */
+ /* Some "live" columns may contain only dead rows, however. These are */
+ /* pruned in the code below. */
+
+ /* now find the initial matlab score for each column */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ /* skip dead column */
+ if (COL_IS_DEAD (c))
+ {
+ continue ;
+ }
+ score = 0 ;
+ cp = &A [Col [c].start] ;
+ new_cp = cp ;
+ cp_end = cp + Col [c].length ;
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ /* skip if dead */
+ if (ROW_IS_DEAD (row))
+ {
+ continue ;
+ }
+ /* compact the column */
+ *new_cp++ = row ;
+ /* add row's external degree */
+ score += Row [row].shared1.degree - 1 ;
+ /* guard against integer overflow */
+ score = MIN (score, n_col) ;
+ }
+ /* determine pruned column length */
+ col_length = (int) (new_cp - &A [Col [c].start]) ;
+ if (col_length == 0)
+ {
+ /* a newly-made null column (all rows in this col are "dense" */
+ /* and have already been killed) */
+ DEBUG0 (("Newly null killed: %d\n", c)) ;
+ Col [c].shared2.order = --n_col2 ;
+ KILL_PRINCIPAL_COL (c) ;
+ }
+ else
+ {
+ /* set column length and set score */
+ assert (score >= 0) ;
+ assert (score <= n_col) ;
+ Col [c].length = col_length ;
+ Col [c].shared2.score = score ;
+ }
+ }
+ DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ;
+
+ /* At this point, all empty rows and columns are dead. All live columns */
+ /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
+ /* yet). Rows may contain dead columns, but all live rows contain at */
+ /* least one live column. */
+
+#ifndef NDEBUG
+ debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
+#endif
+
+ /* === Initialize degree lists ========================================== */
+
+#ifndef NDEBUG
+ debug_count = 0 ;
+#endif
+
+ /* clear the hash buckets */
+ for (c = 0 ; c <= n_col ; c++)
+ {
+ head [c] = EMPTY ;
+ }
+ min_score = n_col ;
+ /* place in reverse order, so low column indices are at the front */
+ /* of the lists. This is to encourage natural tie-breaking */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ /* only add principal columns to degree lists */
+ if (COL_IS_ALIVE (c))
+ {
+ DEBUG4 (("place %d score %d minscore %d ncol %d\n",
+ c, Col [c].shared2.score, min_score, n_col)) ;
+
+ /* === Add columns score to DList =============================== */
+
+ score = Col [c].shared2.score ;
+
+ assert (min_score >= 0) ;
+ assert (min_score <= n_col) ;
+ assert (score >= 0) ;
+ assert (score <= n_col) ;
+ assert (head [score] >= EMPTY) ;
+
+ /* now add this column to dList at proper score location */
+ next_col = head [score] ;
+ Col [c].shared3.prev = EMPTY ;
+ Col [c].shared4.degree_next = next_col ;
+
+ /* if there already was a column with the same score, set its */
+ /* previous pointer to this new column */
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = c ;
+ }
+ head [score] = c ;
+
+ /* see if this score is less than current min */
+ min_score = MIN (min_score, score) ;
+
+#ifndef NDEBUG
+ debug_count++ ;
+#endif
+ }
+ }
+
+#ifndef NDEBUG
+ DEBUG0 (("Live cols %d out of %d, non-princ: %d\n",
+ debug_count, n_col, n_col-debug_count)) ;
+ assert (debug_count == n_col2) ;
+ debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ;
+#endif
+
+ /* === Return number of remaining columns, and max row degree =========== */
+
+ *p_n_col2 = n_col2 ;
+ *p_n_row2 = n_row2 ;
+ *p_max_deg = max_deg ;
+}
+
+
+/* ========================================================================== */
+/* === find_ordering ======================================================== */
+/* ========================================================================== */
+
+/*
+ Order the principal columns of the supercolumn form of the matrix
+ (no supercolumns on input). Uses a minimum approximate column minimum
+ degree ordering method. Not user-callable.
+*/
+
+PRIVATE int find_ordering /* return the number of garbage collections */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows of A */
+ int n_col, /* number of columns of A */
+ int Alen, /* size of A, 2*nnz + elbow_room or larger */
+ RowInfo Row [], /* of size n_row+1 */
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* column form and row form of A */
+ int head [], /* of size n_col+1 */
+ int n_col2, /* Remaining columns to order */
+ int max_deg, /* Maximum row degree */
+ int pfree /* index of first free slot (2*nnz on entry) */
+)
+{
+ /* === Local variables ================================================== */
+
+ int k ; /* current pivot ordering step */
+ int pivot_col ; /* current pivot column */
+ int *cp ; /* a column pointer */
+ int *rp ; /* a row pointer */
+ int pivot_row ; /* current pivot row */
+ int *new_cp ; /* modified column pointer */
+ int *new_rp ; /* modified row pointer */
+ int pivot_row_start ; /* pointer to start of pivot row */
+ int pivot_row_degree ; /* # of columns in pivot row */
+ int pivot_row_length ; /* # of supercolumns in pivot row */
+ int pivot_col_score ; /* score of pivot column */
+ int needed_memory ; /* free space needed for pivot row */
+ int *cp_end ; /* pointer to the end of a column */
+ int *rp_end ; /* pointer to the end of a row */
+ int row ; /* a row index */
+ int col ; /* a column index */
+ int max_score ; /* maximum possible score */
+ int cur_score ; /* score of current column */
+ unsigned int hash ; /* hash value for supernode detection */
+ int head_column ; /* head of hash bucket */
+ int first_col ; /* first column in hash bucket */
+ int tag_mark ; /* marker value for mark array */
+ int row_mark ; /* Row [row].shared2.mark */
+ int set_difference ; /* set difference size of row with pivot row */
+ int min_score ; /* smallest column score */
+ int col_thickness ; /* "thickness" (# of columns in a supercol) */
+ int max_mark ; /* maximum value of tag_mark */
+ int pivot_col_thickness ; /* number of columns represented by pivot col */
+ int prev_col ; /* Used by Dlist operations. */
+ int next_col ; /* Used by Dlist operations. */
+ int ngarbage ; /* number of garbage collections performed */
+#ifndef NDEBUG
+ int debug_d ; /* debug loop counter */
+ int debug_step = 0 ; /* debug loop counter */
+#endif
+
+ /* === Initialization and clear mark ==================================== */
+
+ max_mark = INT_MAX - n_col ; /* INT_MAX defined in <limits.h> */
+ tag_mark = clear_mark (n_row, Row) ;
+ min_score = 0 ;
+ ngarbage = 0 ;
+ DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ;
+
+ /* === Order the columns ================================================ */
+
+ for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
+ {
+
+#ifndef NDEBUG
+ if (debug_step % 100 == 0)
+ {
+ DEBUG0 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+ }
+ else
+ {
+ DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+ }
+ debug_step++ ;
+ debug_deg_lists (n_row, n_col, Row, Col, head,
+ min_score, n_col2-k, max_deg) ;
+ debug_matrix (n_row, n_col, Row, Col, A) ;
+#endif
+
+ /* === Select pivot column, and order it ============================ */
+
+ /* make sure degree list isn't empty */
+ assert (min_score >= 0) ;
+ assert (min_score <= n_col) ;
+ assert (head [min_score] >= EMPTY) ;
+
+#ifndef NDEBUG
+ for (debug_d = 0 ; debug_d < min_score ; debug_d++)
+ {
+ assert (head [debug_d] == EMPTY) ;
+ }
+#endif
+
+ /* get pivot column from head of minimum degree list */
+ while (head [min_score] == EMPTY && min_score < n_col)
+ {
+ min_score++ ;
+ }
+ pivot_col = head [min_score] ;
+ assert (pivot_col >= 0 && pivot_col <= n_col) ;
+ next_col = Col [pivot_col].shared4.degree_next ;
+ head [min_score] = next_col ;
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = EMPTY ;
+ }
+
+ assert (COL_IS_ALIVE (pivot_col)) ;
+ DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
+
+ /* remember score for defrag check */
+ pivot_col_score = Col [pivot_col].shared2.score ;
+
+ /* the pivot column is the kth column in the pivot order */
+ Col [pivot_col].shared2.order = k ;
+
+ /* increment order count by column thickness */
+ pivot_col_thickness = Col [pivot_col].shared1.thickness ;
+ k += pivot_col_thickness ;
+ assert (pivot_col_thickness > 0) ;
+
+ /* === Garbage_collection, if necessary ============================= */
+
+ needed_memory = MIN (pivot_col_score, n_col - k) ;
+ if (pfree + needed_memory >= Alen)
+ {
+ pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
+ ngarbage++ ;
+ /* after garbage collection we will have enough */
+ assert (pfree + needed_memory < Alen) ;
+ /* garbage collection has wiped out the Row[].shared2.mark array */
+ tag_mark = clear_mark (n_row, Row) ;
+#ifndef NDEBUG
+ debug_matrix (n_row, n_col, Row, Col, A) ;
+#endif
+ }
+
+ /* === Compute pivot row pattern ==================================== */
+
+ /* get starting location for this new merged row */
+ pivot_row_start = pfree ;
+
+ /* initialize new row counts to zero */
+ pivot_row_degree = 0 ;
+
+ /* tag pivot column as having been visited so it isn't included */
+ /* in merged pivot row */
+ Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
+
+ /* pivot row is the union of all rows in the pivot column pattern */
+ cp = &A [Col [pivot_col].start] ;
+ cp_end = cp + Col [pivot_col].length ;
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
+ /* skip if row is dead */
+ if (ROW_IS_DEAD (row))
+ {
+ continue ;
+ }
+ rp = &A [Row [row].start] ;
+ rp_end = rp + Row [row].length ;
+ while (rp < rp_end)
+ {
+ /* get a column */
+ col = *rp++ ;
+ /* add the column, if alive and untagged */
+ col_thickness = Col [col].shared1.thickness ;
+ if (col_thickness > 0 && COL_IS_ALIVE (col))
+ {
+ /* tag column in pivot row */
+ Col [col].shared1.thickness = -col_thickness ;
+ assert (pfree < Alen) ;
+ /* place column in pivot row */
+ A [pfree++] = col ;
+ pivot_row_degree += col_thickness ;
+ }
+ }
+ }
+
+ /* clear tag on pivot column */
+ Col [pivot_col].shared1.thickness = pivot_col_thickness ;
+ max_deg = MAX (max_deg, pivot_row_degree) ;
+
+#ifndef NDEBUG
+ DEBUG3 (("check2\n")) ;
+ debug_mark (n_row, Row, tag_mark, max_mark) ;
+#endif
+
+ /* === Kill all rows used to construct pivot row ==================== */
+
+ /* also kill pivot row, temporarily */
+ cp = &A [Col [pivot_col].start] ;
+ cp_end = cp + Col [pivot_col].length ;
+ while (cp < cp_end)
+ {
+ /* may be killing an already dead row */
+ row = *cp++ ;
+ DEBUG2 (("Kill row in pivot col: %d\n", row)) ;
+ KILL_ROW (row) ;
+ }
+
+ /* === Select a row index to use as the new pivot row =============== */
+
+ pivot_row_length = pfree - pivot_row_start ;
+ if (pivot_row_length > 0)
+ {
+ /* pick the "pivot" row arbitrarily (first row in col) */
+ pivot_row = A [Col [pivot_col].start] ;
+ DEBUG2 (("Pivotal row is %d\n", pivot_row)) ;
+ }
+ else
+ {
+ /* there is no pivot row, since it is of zero length */
+ pivot_row = EMPTY ;
+ assert (pivot_row_length == 0) ;
+ }
+ assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+
+ /* === Approximate degree computation =============================== */
+
+ /* Here begins the computation of the approximate degree. The column */
+ /* score is the sum of the pivot row "length", plus the size of the */
+ /* set differences of each row in the column minus the pattern of the */
+ /* pivot row itself. The column ("thickness") itself is also */
+ /* excluded from the column score (we thus use an approximate */
+ /* external degree). */
+
+ /* The time taken by the following code (compute set differences, and */
+ /* add them up) is proportional to the size of the data structure */
+ /* being scanned - that is, the sum of the sizes of each column in */
+ /* the pivot row. Thus, the amortized time to compute a column score */
+ /* is proportional to the size of that column (where size, in this */
+ /* context, is the column "length", or the number of row indices */
+ /* in that column). The number of row indices in a column is */
+ /* monotonically non-decreasing, from the length of the original */
+ /* column on input to colamd. */
+
+ /* === Compute set differences ====================================== */
+
+ DEBUG1 (("** Computing set differences phase. **\n")) ;
+
+ /* pivot row is currently dead - it will be revived later. */
+
+ DEBUG2 (("Pivot row: ")) ;
+ /* for each column in pivot row */
+ rp = &A [pivot_row_start] ;
+ rp_end = rp + pivot_row_length ;
+ while (rp < rp_end)
+ {
+ col = *rp++ ;
+ assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+ DEBUG2 (("Col: %d\n", col)) ;
+
+ /* clear tags used to construct pivot row pattern */
+ col_thickness = -Col [col].shared1.thickness ;
+ assert (col_thickness > 0) ;
+ Col [col].shared1.thickness = col_thickness ;
+
+ /* === Remove column from degree list =========================== */
+
+ cur_score = Col [col].shared2.score ;
+ prev_col = Col [col].shared3.prev ;
+ next_col = Col [col].shared4.degree_next ;
+ assert (cur_score >= 0) ;
+ assert (cur_score <= n_col) ;
+ assert (cur_score >= EMPTY) ;
+ if (prev_col == EMPTY)
+ {
+ head [cur_score] = next_col ;
+ }
+ else
+ {
+ Col [prev_col].shared4.degree_next = next_col ;
+ }
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = prev_col ;
+ }
+
+ /* === Scan the column ========================================== */
+
+ cp = &A [Col [col].start] ;
+ cp_end = cp + Col [col].length ;
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ row_mark = Row [row].shared2.mark ;
+ /* skip if dead */
+ if (ROW_IS_MARKED_DEAD (row_mark))
+ {
+ continue ;
+ }
+ assert (row != pivot_row) ;
+ set_difference = row_mark - tag_mark ;
+ /* check if the row has been seen yet */
+ if (set_difference < 0)
+ {
+ assert (Row [row].shared1.degree <= max_deg) ;
+ set_difference = Row [row].shared1.degree ;
+ }
+ /* subtract column thickness from this row's set difference */
+ set_difference -= col_thickness ;
+ assert (set_difference >= 0) ;
+ /* absorb this row if the set difference becomes zero */
+ if (set_difference == 0)
+ {
+ DEBUG1 (("aggressive absorption. Row: %d\n", row)) ;
+ KILL_ROW (row) ;
+ }
+ else
+ {
+ /* save the new mark */
+ Row [row].shared2.mark = set_difference + tag_mark ;
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ debug_deg_lists (n_row, n_col, Row, Col, head,
+ min_score, n_col2-k-pivot_row_degree, max_deg) ;
+#endif
+
+ /* === Add up set differences for each column ======================= */
+
+ DEBUG1 (("** Adding set differences phase. **\n")) ;
+
+ /* for each column in pivot row */
+ rp = &A [pivot_row_start] ;
+ rp_end = rp + pivot_row_length ;
+ while (rp < rp_end)
+ {
+ /* get a column */
+ col = *rp++ ;
+ assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+ hash = 0 ;
+ cur_score = 0 ;
+ cp = &A [Col [col].start] ;
+ /* compact the column */
+ new_cp = cp ;
+ cp_end = cp + Col [col].length ;
+
+ DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ;
+
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ assert(row >= 0 && row < n_row) ;
+ row_mark = Row [row].shared2.mark ;
+ /* skip if dead */
+ if (ROW_IS_MARKED_DEAD (row_mark))
+ {
+ continue ;
+ }
+ assert (row_mark > tag_mark) ;
+ /* compact the column */
+ *new_cp++ = row ;
+ /* compute hash function */
+ hash += row ;
+ /* add set difference */
+ cur_score += row_mark - tag_mark ;
+ /* integer overflow... */
+ cur_score = MIN (cur_score, n_col) ;
+ }
+
+ /* recompute the column's length */
+ Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
+
+ /* === Further mass elimination ================================= */
+
+ if (Col [col].length == 0)
+ {
+ DEBUG1 (("further mass elimination. Col: %d\n", col)) ;
+ /* nothing left but the pivot row in this column */
+ KILL_PRINCIPAL_COL (col) ;
+ pivot_row_degree -= Col [col].shared1.thickness ;
+ assert (pivot_row_degree >= 0) ;
+ /* order it */
+ Col [col].shared2.order = k ;
+ /* increment order count by column thickness */
+ k += Col [col].shared1.thickness ;
+ }
+ else
+ {
+ /* === Prepare for supercolumn detection ==================== */
+
+ DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ;
+
+ /* save score so far */
+ Col [col].shared2.score = cur_score ;
+
+ /* add column to hash table, for supercolumn detection */
+ hash %= n_col + 1 ;
+
+ DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+ assert (hash <= n_col) ;
+
+ head_column = head [hash] ;
+ if (head_column > EMPTY)
+ {
+ /* degree list "hash" is non-empty, use prev (shared3) of */
+ /* first column in degree list as head of hash bucket */
+ first_col = Col [head_column].shared3.headhash ;
+ Col [head_column].shared3.headhash = col ;
+ }
+ else
+ {
+ /* degree list "hash" is empty, use head as hash bucket */
+ first_col = - (head_column + 2) ;
+ head [hash] = - (col + 2) ;
+ }
+ Col [col].shared4.hash_next = first_col ;
+
+ /* save hash function in Col [col].shared3.hash */
+ Col [col].shared3.hash = (int) hash ;
+ assert (COL_IS_ALIVE (col)) ;
+ }
+ }
+
+ /* The approximate external column degree is now computed. */
+
+ /* === Supercolumn detection ======================================== */
+
+ DEBUG1 (("** Supercolumn detection phase. **\n")) ;
+
+ detect_super_cols (
+#ifndef NDEBUG
+ n_col, Row,
+#endif
+ Col, A, head, pivot_row_start, pivot_row_length) ;
+
+ /* === Kill the pivotal column ====================================== */
+
+ KILL_PRINCIPAL_COL (pivot_col) ;
+
+ /* === Clear mark =================================================== */
+
+ tag_mark += (max_deg + 1) ;
+ if (tag_mark >= max_mark)
+ {
+ DEBUG1 (("clearing tag_mark\n")) ;
+ tag_mark = clear_mark (n_row, Row) ;
+ }
+#ifndef NDEBUG
+ DEBUG3 (("check3\n")) ;
+ debug_mark (n_row, Row, tag_mark, max_mark) ;
+#endif
+
+ /* === Finalize the new pivot row, and column scores ================ */
+
+ DEBUG1 (("** Finalize scores phase. **\n")) ;
+
+ /* for each column in pivot row */
+ rp = &A [pivot_row_start] ;
+ /* compact the pivot row */
+ new_rp = rp ;
+ rp_end = rp + pivot_row_length ;
+ while (rp < rp_end)
+ {
+ col = *rp++ ;
+ /* skip dead columns */
+ if (COL_IS_DEAD (col))
+ {
+ continue ;
+ }
+ *new_rp++ = col ;
+ /* add new pivot row to column */
+ A [Col [col].start + (Col [col].length++)] = pivot_row ;
+
+ /* retrieve score so far and add on pivot row's degree. */
+ /* (we wait until here for this in case the pivot */
+ /* row's degree was reduced due to mass elimination). */
+ cur_score = Col [col].shared2.score + pivot_row_degree ;
+
+ /* calculate the max possible score as the number of */
+ /* external columns minus the 'k' value minus the */
+ /* columns thickness */
+ max_score = n_col - k - Col [col].shared1.thickness ;
+
+ /* make the score the external degree of the union-of-rows */
+ cur_score -= Col [col].shared1.thickness ;
+
+ /* make sure score is less or equal than the max score */
+ cur_score = MIN (cur_score, max_score) ;
+ assert (cur_score >= 0) ;
+
+ /* store updated score */
+ Col [col].shared2.score = cur_score ;
+
+ /* === Place column back in degree list ========================= */
+
+ assert (min_score >= 0) ;
+ assert (min_score <= n_col) ;
+ assert (cur_score >= 0) ;
+ assert (cur_score <= n_col) ;
+ assert (head [cur_score] >= EMPTY) ;
+ next_col = head [cur_score] ;
+ Col [col].shared4.degree_next = next_col ;
+ Col [col].shared3.prev = EMPTY ;
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = col ;
+ }
+ head [cur_score] = col ;
+
+ /* see if this score is less than current min */
+ min_score = MIN (min_score, cur_score) ;
+
+ }
+
+#ifndef NDEBUG
+ debug_deg_lists (n_row, n_col, Row, Col, head,
+ min_score, n_col2-k, max_deg) ;
+#endif
+
+ /* === Resurrect the new pivot row ================================== */
+
+ if (pivot_row_degree > 0)
+ {
+ /* update pivot row length to reflect any cols that were killed */
+ /* during super-col detection and mass elimination */
+ Row [pivot_row].start = pivot_row_start ;
+ Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
+ Row [pivot_row].shared1.degree = pivot_row_degree ;
+ Row [pivot_row].shared2.mark = 0 ;
+ /* pivot row is no longer dead */
+ }
+ }
+
+ /* === All principal columns have now been ordered ====================== */
+
+ return (ngarbage) ;
+}
+
+
+/* ========================================================================== */
+/* === order_children ======================================================= */
+/* ========================================================================== */
+
+/*
+ The find_ordering routine has ordered all of the principal columns (the
+ representatives of the supercolumns). The non-principal columns have not
+ yet been ordered. This routine orders those columns by walking up the
+ parent tree (a column is a child of the column which absorbed it). The
+ final permutation vector is then placed in p [0 ... n_col-1], with p [0]
+ being the first column, and p [n_col-1] being the last. It doesn't look
+ like it at first glance, but be assured that this routine takes time linear
+ in the number of columns. Although not immediately obvious, the time
+ taken by this routine is O (n_col), that is, linear in the number of
+ columns. Not user-callable.
+*/
+
+PRIVATE void order_children
+(
+ /* === Parameters ======================================================= */
+
+ int n_col, /* number of columns of A */
+ ColInfo Col [], /* of size n_col+1 */
+ int p [] /* p [0 ... n_col-1] is the column permutation*/
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ; /* loop counter for all columns */
+ int c ; /* column index */
+ int parent ; /* index of column's parent */
+ int order ; /* column's order */
+
+ /* === Order each non-principal column ================================== */
+
+ for (i = 0 ; i < n_col ; i++)
+ {
+ /* find an un-ordered non-principal column */
+ assert (COL_IS_DEAD (i)) ;
+ if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
+ {
+ parent = i ;
+ /* once found, find its principal parent */
+ do
+ {
+ parent = Col [parent].shared1.parent ;
+ } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
+
+ /* now, order all un-ordered non-principal columns along path */
+ /* to this parent. collapse tree at the same time */
+ c = i ;
+ /* get order of parent */
+ order = Col [parent].shared2.order ;
+
+ do
+ {
+ assert (Col [c].shared2.order == EMPTY) ;
+
+ /* order this column */
+ Col [c].shared2.order = order++ ;
+ /* collaps tree */
+ Col [c].shared1.parent = parent ;
+
+ /* get immediate parent of this column */
+ c = Col [c].shared1.parent ;
+
+ /* continue until we hit an ordered column. There are */
+ /* guarranteed not to be anymore unordered columns */
+ /* above an ordered column */
+ } while (Col [c].shared2.order == EMPTY) ;
+
+ /* re-order the super_col parent to largest order for this group */
+ Col [parent].shared2.order = order ;
+ }
+ }
+
+ /* === Generate the permutation ========================================= */
+
+ for (c = 0 ; c < n_col ; c++)
+ {
+ p [Col [c].shared2.order] = c ;
+ }
+}
+
+
+/* ========================================================================== */
+/* === detect_super_cols ==================================================== */
+/* ========================================================================== */
+
+/*
+ Detects supercolumns by finding matches between columns in the hash buckets.
+ Check amongst columns in the set A [row_start ... row_start + row_length-1].
+ The columns under consideration are currently *not* in the degree lists,
+ and have already been placed in the hash buckets.
+
+ The hash bucket for columns whose hash function is equal to h is stored
+ as follows:
+
+ if head [h] is >= 0, then head [h] contains a degree list, so:
+
+ head [h] is the first column in degree bucket h.
+ Col [head [h]].headhash gives the first column in hash bucket h.
+
+ otherwise, the degree list is empty, and:
+
+ -(head [h] + 2) is the first column in hash bucket h.
+
+ For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
+ column" pointer. Col [c].shared3.hash is used instead as the hash number
+ for that column. The value of Col [c].shared4.hash_next is the next column
+ in the same hash bucket.
+
+ Assuming no, or "few" hash collisions, the time taken by this routine is
+ linear in the sum of the sizes (lengths) of each column whose score has
+ just been computed in the approximate degree computation.
+ Not user-callable.
+*/
+
+PRIVATE void detect_super_cols
+(
+ /* === Parameters ======================================================= */
+
+#ifndef NDEBUG
+ /* these two parameters are only needed when debugging is enabled: */
+ int n_col, /* number of columns of A */
+ RowInfo Row [], /* of size n_row+1 */
+#endif
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* row indices of A */
+ int head [], /* head of degree lists and hash buckets */
+ int row_start, /* pointer to set of columns to check */
+ int row_length /* number of columns to check */
+)
+{
+ /* === Local variables ================================================== */
+
+ int hash ; /* hash # for a column */
+ int *rp ; /* pointer to a row */
+ int c ; /* a column index */
+ int super_c ; /* column index of the column to absorb into */
+ int *cp1 ; /* column pointer for column super_c */
+ int *cp2 ; /* column pointer for column c */
+ int length ; /* length of column super_c */
+ int prev_c ; /* column preceding c in hash bucket */
+ int i ; /* loop counter */
+ int *rp_end ; /* pointer to the end of the row */
+ int col ; /* a column index in the row to check */
+ int head_column ; /* first column in hash bucket or degree list */
+ int first_col ; /* first column in hash bucket */
+
+ /* === Consider each column in the row ================================== */
+
+ rp = &A [row_start] ;
+ rp_end = rp + row_length ;
+ while (rp < rp_end)
+ {
+ col = *rp++ ;
+ if (COL_IS_DEAD (col))
+ {
+ continue ;
+ }
+
+ /* get hash number for this column */
+ hash = Col [col].shared3.hash ;
+ assert (hash <= n_col) ;
+
+ /* === Get the first column in this hash bucket ===================== */
+
+ head_column = head [hash] ;
+ if (head_column > EMPTY)
+ {
+ first_col = Col [head_column].shared3.headhash ;
+ }
+ else
+ {
+ first_col = - (head_column + 2) ;
+ }
+
+ /* === Consider each column in the hash bucket ====================== */
+
+ for (super_c = first_col ; super_c != EMPTY ;
+ super_c = Col [super_c].shared4.hash_next)
+ {
+ assert (COL_IS_ALIVE (super_c)) ;
+ assert (Col [super_c].shared3.hash == hash) ;
+ length = Col [super_c].length ;
+
+ /* prev_c is the column preceding column c in the hash bucket */
+ prev_c = super_c ;
+
+ /* === Compare super_c with all columns after it ================ */
+
+ for (c = Col [super_c].shared4.hash_next ;
+ c != EMPTY ; c = Col [c].shared4.hash_next)
+ {
+ assert (c != super_c) ;
+ assert (COL_IS_ALIVE (c)) ;
+ assert (Col [c].shared3.hash == hash) ;
+
+ /* not identical if lengths or scores are different */
+ if (Col [c].length != length ||
+ Col [c].shared2.score != Col [super_c].shared2.score)
+ {
+ prev_c = c ;
+ continue ;
+ }
+
+ /* compare the two columns */
+ cp1 = &A [Col [super_c].start] ;
+ cp2 = &A [Col [c].start] ;
+
+ for (i = 0 ; i < length ; i++)
+ {
+ /* the columns are "clean" (no dead rows) */
+ assert (ROW_IS_ALIVE (*cp1)) ;
+ assert (ROW_IS_ALIVE (*cp2)) ;
+ /* row indices will same order for both supercols, */
+ /* no gather scatter nessasary */
+ if (*cp1++ != *cp2++)
+ {
+ break ;
+ }
+ }
+
+ /* the two columns are different if the for-loop "broke" */
+ if (i != length)
+ {
+ prev_c = c ;
+ continue ;
+ }
+
+ /* === Got it! two columns are identical =================== */
+
+ assert (Col [c].shared2.score == Col [super_c].shared2.score) ;
+
+ Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
+ Col [c].shared1.parent = super_c ;
+ KILL_NON_PRINCIPAL_COL (c) ;
+ /* order c later, in order_children() */
+ Col [c].shared2.order = EMPTY ;
+ /* remove c from hash bucket */
+ Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
+ }
+ }
+
+ /* === Empty this hash bucket ======================================= */
+
+ if (head_column > EMPTY)
+ {
+ /* corresponding degree list "hash" is not empty */
+ Col [head_column].shared3.headhash = EMPTY ;
+ }
+ else
+ {
+ /* corresponding degree list "hash" is empty */
+ head [hash] = EMPTY ;
+ }
+ }
+}
+
+
+/* ========================================================================== */
+/* === garbage_collection =================================================== */
+/* ========================================================================== */
+
+/*
+ Defragments and compacts columns and rows in the workspace A. Used when
+ all avaliable memory has been used while performing row merging. Returns
+ the index of the first free position in A, after garbage collection. The
+ time taken by this routine is linear is the size of the array A, which is
+ itself linear in the number of nonzeros in the input matrix.
+ Not user-callable.
+*/
+
+PRIVATE int garbage_collection /* returns the new value of pfree */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows */
+ int n_col, /* number of columns */
+ RowInfo Row [], /* row info */
+ ColInfo Col [], /* column info */
+ int A [], /* A [0 ... Alen-1] holds the matrix */
+ int *pfree /* &A [0] ... pfree is in use */
+)
+{
+ /* === Local variables ================================================== */
+
+ int *psrc ; /* source pointer */
+ int *pdest ; /* destination pointer */
+ int j ; /* counter */
+ int r ; /* a row index */
+ int c ; /* a column index */
+ int length ; /* length of a row or column */
+
+#ifndef NDEBUG
+ int debug_rows ;
+ DEBUG0 (("Defrag..\n")) ;
+ for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ;
+ debug_rows = 0 ;
+#endif
+
+ /* === Defragment the columns =========================================== */
+
+ pdest = &A[0] ;
+ for (c = 0 ; c < n_col ; c++)
+ {
+ if (COL_IS_ALIVE (c))
+ {
+ psrc = &A [Col [c].start] ;
+
+ /* move and compact the column */
+ assert (pdest <= psrc) ;
+ Col [c].start = (int) (pdest - &A [0]) ;
+ length = Col [c].length ;
+ for (j = 0 ; j < length ; j++)
+ {
+ r = *psrc++ ;
+ if (ROW_IS_ALIVE (r))
+ {
+ *pdest++ = r ;
+ }
+ }
+ Col [c].length = (int) (pdest - &A [Col [c].start]) ;
+ }
+ }
+
+ /* === Prepare to defragment the rows =================================== */
+
+ for (r = 0 ; r < n_row ; r++)
+ {
+ if (ROW_IS_ALIVE (r))
+ {
+ if (Row [r].length == 0)
+ {
+ /* this row is of zero length. cannot compact it, so kill it */
+ DEBUG0 (("Defrag row kill\n")) ;
+ KILL_ROW (r) ;
+ }
+ else
+ {
+ /* save first column index in Row [r].shared2.first_column */
+ psrc = &A [Row [r].start] ;
+ Row [r].shared2.first_column = *psrc ;
+ assert (ROW_IS_ALIVE (r)) ;
+ /* flag the start of the row with the one's complement of row */
+ *psrc = ONES_COMPLEMENT (r) ;
+#ifndef NDEBUG
+ debug_rows++ ;
+#endif
+ }
+ }
+ }
+
+ /* === Defragment the rows ============================================== */
+
+ psrc = pdest ;
+ while (psrc < pfree)
+ {
+ /* find a negative number ... the start of a row */
+ if (*psrc++ < 0)
+ {
+ psrc-- ;
+ /* get the row index */
+ r = ONES_COMPLEMENT (*psrc) ;
+ assert (r >= 0 && r < n_row) ;
+ /* restore first column index */
+ *psrc = Row [r].shared2.first_column ;
+ assert (ROW_IS_ALIVE (r)) ;
+
+ /* move and compact the row */
+ assert (pdest <= psrc) ;
+ Row [r].start = (int) (pdest - &A [0]) ;
+ length = Row [r].length ;
+ for (j = 0 ; j < length ; j++)
+ {
+ c = *psrc++ ;
+ if (COL_IS_ALIVE (c))
+ {
+ *pdest++ = c ;
+ }
+ }
+ Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+#ifndef NDEBUG
+ debug_rows-- ;
+#endif
+ }
+ }
+ /* ensure we found all the rows */
+ assert (debug_rows == 0) ;
+
+ /* === Return the new value of pfree ==================================== */
+
+ return ((int) (pdest - &A [0])) ;
+}
+
+
+/* ========================================================================== */
+/* === clear_mark =========================================================== */
+/* ========================================================================== */
+
+/*
+ Clears the Row [].shared2.mark array, and returns the new tag_mark.
+ Return value is the new tag_mark. Not user-callable.
+*/
+
+PRIVATE int clear_mark /* return the new value for tag_mark */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows in A */
+ RowInfo Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
+)
+{
+ /* === Local variables ================================================== */
+
+ int r ;
+
+ DEBUG0 (("Clear mark\n")) ;
+ for (r = 0 ; r < n_row ; r++)
+ {
+ if (ROW_IS_ALIVE (r))
+ {
+ Row [r].shared2.mark = 0 ;
+ }
+ }
+ return (1) ;
+}
+
+
+/* ========================================================================== */
+/* === debugging routines =================================================== */
+/* ========================================================================== */
+
+/* When debugging is disabled, the remainder of this file is ignored. */
+
+#ifndef NDEBUG
+
+
+/* ========================================================================== */
+/* === debug_structures ===================================================== */
+/* ========================================================================== */
+
+/*
+ At this point, all empty rows and columns are dead. All live columns
+ are "clean" (containing no dead rows) and simplicial (no supercolumns
+ yet). Rows may contain dead columns, but all live rows contain at
+ least one live column.
+*/
+
+PRIVATE void debug_structures
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int n_col2
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ;
+ int c ;
+ int *cp ;
+ int *cp_end ;
+ int len ;
+ int score ;
+ int r ;
+ int *rp ;
+ int *rp_end ;
+ int deg ;
+
+ /* === Check A, Row, and Col ============================================ */
+
+ for (c = 0 ; c < n_col ; c++)
+ {
+ if (COL_IS_ALIVE (c))
+ {
+ len = Col [c].length ;
+ score = Col [c].shared2.score ;
+ DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
+ assert (len > 0) ;
+ assert (score >= 0) ;
+ assert (Col [c].shared1.thickness == 1) ;
+ cp = &A [Col [c].start] ;
+ cp_end = cp + len ;
+ while (cp < cp_end)
+ {
+ r = *cp++ ;
+ assert (ROW_IS_ALIVE (r)) ;
+ }
+ }
+ else
+ {
+ i = Col [c].shared2.order ;
+ assert (i >= n_col2 && i < n_col) ;
+ }
+ }
+
+ for (r = 0 ; r < n_row ; r++)
+ {
+ if (ROW_IS_ALIVE (r))
+ {
+ i = 0 ;
+ len = Row [r].length ;
+ deg = Row [r].shared1.degree ;
+ assert (len > 0) ;
+ assert (deg > 0) ;
+ rp = &A [Row [r].start] ;
+ rp_end = rp + len ;
+ while (rp < rp_end)
+ {
+ c = *rp++ ;
+ if (COL_IS_ALIVE (c))
+ {
+ i++ ;
+ }
+ }
+ assert (i > 0) ;
+ }
+ }
+}
+
+
+/* ========================================================================== */
+/* === debug_deg_lists ====================================================== */
+/* ========================================================================== */
+
+/*
+ Prints the contents of the degree lists. Counts the number of columns
+ in the degree list and compares it to the total it should have. Also
+ checks the row degrees.
+*/
+
+PRIVATE void debug_deg_lists
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int head [],
+ int min_score,
+ int should,
+ int max_deg
+)
+{
+ /* === Local variables ================================================== */
+
+ int deg ;
+ int col ;
+ int have ;
+ int row ;
+
+ /* === Check the degree lists =========================================== */
+
+ if (n_col > 10000 && debug_colamd <= 0)
+ {
+ return ;
+ }
+ have = 0 ;
+ DEBUG4 (("Degree lists: %d\n", min_score)) ;
+ for (deg = 0 ; deg <= n_col ; deg++)
+ {
+ col = head [deg] ;
+ if (col == EMPTY)
+ {
+ continue ;
+ }
+ DEBUG4 (("%d:", deg)) ;
+ while (col != EMPTY)
+ {
+ DEBUG4 ((" %d", col)) ;
+ have += Col [col].shared1.thickness ;
+ assert (COL_IS_ALIVE (col)) ;
+ col = Col [col].shared4.degree_next ;
+ }
+ DEBUG4 (("\n")) ;
+ }
+ DEBUG4 (("should %d have %d\n", should, have)) ;
+ assert (should == have) ;
+
+ /* === Check the row degrees ============================================ */
+
+ if (n_row > 10000 && debug_colamd <= 0)
+ {
+ return ;
+ }
+ for (row = 0 ; row < n_row ; row++)
+ {
+ if (ROW_IS_ALIVE (row))
+ {
+ assert (Row [row].shared1.degree <= max_deg) ;
+ }
+ }
+}
+
+
+/* ========================================================================== */
+/* === debug_mark =========================================================== */
+/* ========================================================================== */
+
+/*
+ Ensures that the tag_mark is less that the maximum and also ensures that
+ each entry in the mark array is less than the tag mark.
+*/
+
+PRIVATE void debug_mark
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ RowInfo Row [],
+ int tag_mark,
+ int max_mark
+)
+{
+ /* === Local variables ================================================== */
+
+ int r ;
+
+ /* === Check the Row marks ============================================== */
+
+ assert (tag_mark > 0 && tag_mark <= max_mark) ;
+ if (n_row > 10000 && debug_colamd <= 0)
+ {
+ return ;
+ }
+ for (r = 0 ; r < n_row ; r++)
+ {
+ assert (Row [r].shared2.mark < tag_mark) ;
+ }
+}
+
+
+/* ========================================================================== */
+/* === debug_matrix ========================================================= */
+/* ========================================================================== */
+
+/*
+ Prints out the contents of the columns and the rows.
+*/
+
+PRIVATE void debug_matrix
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A []
+)
+{
+ /* === Local variables ================================================== */
+
+ int r ;
+ int c ;
+ int *rp ;
+ int *rp_end ;
+ int *cp ;
+ int *cp_end ;
+
+ /* === Dump the rows and columns of the matrix ========================== */
+
+ if (debug_colamd < 3)
+ {
+ return ;
+ }
+ DEBUG3 (("DUMP MATRIX:\n")) ;
+ for (r = 0 ; r < n_row ; r++)
+ {
+ DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ;
+ if (ROW_IS_DEAD (r))
+ {
+ continue ;
+ }
+ DEBUG3 (("start %d length %d degree %d\n",
+ Row [r].start, Row [r].length, Row [r].shared1.degree)) ;
+ rp = &A [Row [r].start] ;
+ rp_end = rp + Row [r].length ;
+ while (rp < rp_end)
+ {
+ c = *rp++ ;
+ DEBUG3 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ;
+ }
+ }
+
+ for (c = 0 ; c < n_col ; c++)
+ {
+ DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ;
+ if (COL_IS_DEAD (c))
+ {
+ continue ;
+ }
+ DEBUG3 (("start %d length %d shared1 %d shared2 %d\n",
+ Col [c].start, Col [c].length,
+ Col [c].shared1.thickness, Col [c].shared2.score)) ;
+ cp = &A [Col [c].start] ;
+ cp_end = cp + Col [c].length ;
+ while (cp < cp_end)
+ {
+ r = *cp++ ;
+ DEBUG3 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ;
+ }
+ }
+}
+
+#endif
+
diff --git a/intern/opennl/superlu/colamd.h b/intern/opennl/superlu/colamd.h
new file mode 100644
index 00000000000..00783983b27
--- /dev/null
+++ b/intern/opennl/superlu/colamd.h
@@ -0,0 +1,67 @@
+/* ========================================================================== */
+/* === colamd prototypes and definitions ==================================== */
+/* ========================================================================== */
+
+/*
+ This is the colamd include file,
+
+ http://www.cise.ufl.edu/~davis/colamd/colamd.h
+
+ for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+
+ http://www.cise.ufl.edu/~davis/colamd/
+
+ See those files for a description of colamd and symamd, and for the
+ copyright notice, which also applies to this file.
+
+ August 3, 1998. Version 1.0.
+*/
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */
+#define COLAMD_KNOBS 20
+
+/* number of output statistics. Only A [0..2] are currently used. */
+#define COLAMD_STATS 20
+
+/* knobs [0] and A [0]: dense row knob and output statistic. */
+#define COLAMD_DENSE_ROW 0
+
+/* knobs [1] and A [1]: dense column knob and output statistic. */
+#define COLAMD_DENSE_COL 1
+
+/* A [2]: memory defragmentation count output statistic */
+#define COLAMD_DEFRAG_COUNT 2
+
+/* A [3]: whether or not the input columns were jumbled or had duplicates */
+#define COLAMD_JUMBLED_COLS 3
+
+/* ========================================================================== */
+/* === Prototypes of user-callable routines ================================= */
+/* ========================================================================== */
+
+int colamd_recommended /* returns recommended value of Alen */
+(
+ int nnz, /* nonzeros in A */
+ int n_row, /* number of rows in A */
+ int n_col /* number of columns in A */
+) ;
+
+void colamd_set_defaults /* sets default parameters */
+( /* knobs argument is modified on output */
+ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
+) ;
+
+int colamd /* returns TRUE if successful, FALSE otherwise*/
+( /* A and p arguments are modified on output */
+ int n_row, /* number of rows in A */
+ int n_col, /* number of columns in A */
+ int Alen, /* size of the array A */
+ int A [], /* row indices of A, of size Alen */
+ int p [], /* column pointers of A, of size n_col+1 */
+ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
+) ;
+
diff --git a/intern/opennl/superlu/get_perm_c.c b/intern/opennl/superlu/get_perm_c.c
new file mode 100644
index 00000000000..9cdf5a876bf
--- /dev/null
+++ b/intern/opennl/superlu/get_perm_c.c
@@ -0,0 +1,453 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+
+#include "ssp_defs.h"
+#include "colamd.h"
+
+extern int genmmd_(int *, int *, int *, int *, int *, int *, int *,
+ int *, int *, int *, int *, int *);
+
+void
+get_colamd(
+ const int m, /* number of rows in matrix A. */
+ const int n, /* number of columns in matrix A. */
+ const int nnz,/* number of nonzeros in matrix A. */
+ int *colptr, /* column pointer of size n+1 for matrix A. */
+ int *rowind, /* row indices of size nz for matrix A. */
+ int *perm_c /* out - the column permutation vector. */
+ )
+{
+ int Alen, *A, i, info, *p;
+ double *knobs;
+
+ Alen = colamd_recommended(nnz, m, n);
+
+ if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
+ ABORT("Malloc fails for knobs");
+ colamd_set_defaults(knobs);
+
+ if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
+ ABORT("Malloc fails for A[]");
+ if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
+ ABORT("Malloc fails for p[]");
+ for (i = 0; i <= n; ++i) p[i] = colptr[i];
+ for (i = 0; i < nnz; ++i) A[i] = rowind[i];
+ info = colamd(m, n, Alen, A, p, knobs);
+ if ( info == FALSE ) ABORT("COLAMD failed");
+
+ for (i = 0; i < n; ++i) perm_c[p[i]] = i;
+
+ SUPERLU_FREE(knobs);
+ SUPERLU_FREE(A);
+ SUPERLU_FREE(p);
+}
+
+void
+getata(
+ const int m, /* number of rows in matrix A. */
+ const int n, /* number of columns in matrix A. */
+ const int nz, /* number of nonzeros in matrix A */
+ int *colptr, /* column pointer of size n+1 for matrix A. */
+ int *rowind, /* row indices of size nz for matrix A. */
+ int *atanz, /* out - on exit, returns the actual number of
+ nonzeros in matrix A'*A. */
+ int **ata_colptr, /* out - size n+1 */
+ int **ata_rowind /* out - size *atanz */
+ )
+/*
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'*A. A is an m-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'*A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (ata_colptr, ata_rowind).
+ *
+ * This routine is modified from GETATA routine by Tim Davis.
+ * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
+ * i.e., the sum of the square of the row counts.
+ *
+ * Questions
+ * =========
+ * o Do I need to withhold the *dense* rows?
+ * o How do I know the number of nonzeros in A'*A?
+ *
+ */
+{
+ register int i, j, k, col, num_nz, ti, trow;
+ int *marker, *b_colptr, *b_rowind;
+ int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
+
+ if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) )
+ ABORT("SUPERLU_MALLOC fails for marker[]");
+ if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) )
+ ABORT("SUPERLU_MALLOC t_colptr[]");
+ if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) )
+ ABORT("SUPERLU_MALLOC fails for t_rowind[]");
+
+
+ /* Get counts of each column of T, and set up column pointers */
+ for (i = 0; i < m; ++i) marker[i] = 0;
+ for (j = 0; j < n; ++j) {
+ for (i = colptr[j]; i < colptr[j+1]; ++i)
+ ++marker[rowind[i]];
+ }
+ t_colptr[0] = 0;
+ for (i = 0; i < m; ++i) {
+ t_colptr[i+1] = t_colptr[i] + marker[i];
+ marker[i] = t_colptr[i];
+ }
+
+ /* Transpose the matrix from A to T */
+ for (j = 0; j < n; ++j)
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ col = rowind[i];
+ t_rowind[marker[col]] = j;
+ ++marker[col];
+ }
+
+
+ /* ----------------------------------------------------------------
+ compute B = T * A, where column j of B is:
+
+ Struct (B_*j) = UNION ( Struct (T_*k) )
+ A_kj != 0
+
+ do not include the diagonal entry
+
+ ( Partition A as: A = (A_*1, ..., A_*n)
+ Then B = T * A = (T * A_*1, ..., T * A_*n), where
+ T * A_*j = (T_*1, ..., T_*m) * A_*j. )
+ ---------------------------------------------------------------- */
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* First pass determines number of nonzeros in B */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ /* A_kj is nonzero, add pattern of column T_*k to B_*j */
+ k = rowind[i];
+ for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
+ trow = t_rowind[ti];
+ if ( marker[trow] != j ) {
+ marker[trow] = j;
+ num_nz++;
+ }
+ }
+ }
+ }
+ *atanz = num_nz;
+
+ /* Allocate storage for A'*A */
+ if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
+ if ( *atanz ) {
+ if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
+ }
+ b_colptr = *ata_colptr; /* aliasing */
+ b_rowind = *ata_rowind;
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* Compute each column of B, one at a time */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ b_colptr[j] = num_nz;
+
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ /* A_kj is nonzero, add pattern of column T_*k to B_*j */
+ k = rowind[i];
+ for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
+ trow = t_rowind[ti];
+ if ( marker[trow] != j ) {
+ marker[trow] = j;
+ b_rowind[num_nz++] = trow;
+ }
+ }
+ }
+ }
+ b_colptr[n] = num_nz;
+
+ SUPERLU_FREE(marker);
+ SUPERLU_FREE(t_colptr);
+ SUPERLU_FREE(t_rowind);
+}
+
+
+void
+at_plus_a(
+ const int n, /* number of columns in matrix A. */
+ const int nz, /* number of nonzeros in matrix A */
+ int *colptr, /* column pointer of size n+1 for matrix A. */
+ int *rowind, /* row indices of size nz for matrix A. */
+ int *bnz, /* out - on exit, returns the actual number of
+ nonzeros in matrix A'*A. */
+ int **b_colptr, /* out - size n+1 */
+ int **b_rowind /* out - size *bnz */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'+A. A is an n-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'+A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (b_colptr, b_rowind).
+ *
+ */
+ register int i, j, k, col, num_nz;
+ int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
+ int *marker;
+
+ if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for marker[]");
+ if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for t_colptr[]");
+ if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails t_rowind[]");
+
+
+ /* Get counts of each column of T, and set up column pointers */
+ for (i = 0; i < n; ++i) marker[i] = 0;
+ for (j = 0; j < n; ++j) {
+ for (i = colptr[j]; i < colptr[j+1]; ++i)
+ ++marker[rowind[i]];
+ }
+ t_colptr[0] = 0;
+ for (i = 0; i < n; ++i) {
+ t_colptr[i+1] = t_colptr[i] + marker[i];
+ marker[i] = t_colptr[i];
+ }
+
+ /* Transpose the matrix from A to T */
+ for (j = 0; j < n; ++j)
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ col = rowind[i];
+ t_rowind[marker[col]] = j;
+ ++marker[col];
+ }
+
+
+ /* ----------------------------------------------------------------
+ compute B = A + T, where column j of B is:
+
+ Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)
+
+ do not include the diagonal entry
+ ---------------------------------------------------------------- */
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* First pass determines number of nonzeros in B */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ /* Add pattern of column A_*k to B_*j */
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ k = rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ ++num_nz;
+ }
+ }
+
+ /* Add pattern of column T_*k to B_*j */
+ for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
+ k = t_rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ ++num_nz;
+ }
+ }
+ }
+ *bnz = num_nz;
+
+ /* Allocate storage for A+A' */
+ if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for b_colptr[]");
+ if ( *bnz) {
+ if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for b_rowind[]");
+ }
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* Compute each column of B, one at a time */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ (*b_colptr)[j] = num_nz;
+
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ /* Add pattern of column A_*k to B_*j */
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ k = rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ (*b_rowind)[num_nz++] = k;
+ }
+ }
+
+ /* Add pattern of column T_*k to B_*j */
+ for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
+ k = t_rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ (*b_rowind)[num_nz++] = k;
+ }
+ }
+ }
+ (*b_colptr)[n] = num_nz;
+
+ SUPERLU_FREE(marker);
+ SUPERLU_FREE(t_colptr);
+ SUPERLU_FREE(t_rowind);
+}
+
+void
+get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
+/*
+ * Purpose
+ * =======
+ *
+ * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
+ * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
+ * or using approximate minimum degree column ordering by Davis et. al.
+ * The LU factorization of A*Pc tends to have less fill than the LU
+ * factorization of A.
+ *
+ * Arguments
+ * =========
+ *
+ * ispec (input) int
+ * Specifies the type of column ordering to reduce fill:
+ * = 1: minimum degree on the structure of A^T * A
+ * = 2: minimum degree on the structure of A^T + A
+ * = 3: approximate minimum degree for unsymmetric matrices
+ * If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
+ *
+ * A (input) SuperMatrix*
+ * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ * of the linear equations is A->nrow. Currently, the type of A
+ * can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
+ * more general A can be handled.
+ *
+ * perm_c (output) int*
+ * Column permutation vector of size A->ncol, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ *
+ */
+{
+ NCformat *Astore = A->Store;
+ int m, n, bnz, *b_colptr, i;
+ int delta, maxint, nofsub, *invp;
+ int *b_rowind, *dhead, *qsize, *llist, *marker;
+ double t, SuperLU_timer_();
+
+ m = A->nrow;
+ n = A->ncol;
+
+ t = SuperLU_timer_();
+ switch ( ispec ) {
+ case 0: /* Natural ordering */
+ for (i = 0; i < n; ++i) perm_c[i] = i;
+#if ( PRNTlevel>=1 )
+ printf("Use natural column ordering.\n");
+#endif
+ return;
+ case 1: /* Minimum degree ordering on A'*A */
+ getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
+ &bnz, &b_colptr, &b_rowind);
+#if ( PRNTlevel>=1 )
+ printf("Use minimum degree ordering on A'*A.\n");
+#endif
+ t = SuperLU_timer_() - t;
+ /*printf("Form A'*A time = %8.3f\n", t);*/
+ break;
+ case 2: /* Minimum degree ordering on A'+A */
+ if ( m != n ) ABORT("Matrix is not square");
+ at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
+ &bnz, &b_colptr, &b_rowind);
+#if ( PRNTlevel>=1 )
+ printf("Use minimum degree ordering on A'+A.\n");
+#endif
+ t = SuperLU_timer_() - t;
+ /*printf("Form A'+A time = %8.3f\n", t);*/
+ break;
+ case 3: /* Approximate minimum degree column ordering. */
+ get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
+ perm_c);
+#if ( PRNTlevel>=1 )
+ printf(".. Use approximate minimum degree column ordering.\n");
+#endif
+ return;
+ default:
+ ABORT("Invalid ISPEC");
+ }
+
+ if ( bnz != 0 ) {
+ t = SuperLU_timer_();
+
+ /* Initialize and allocate storage for GENMMD. */
+ delta = 1; /* DELTA is a parameter to allow the choice of nodes
+ whose degree <= min-degree + DELTA. */
+ maxint = 2147483647; /* 2**31 - 1 */
+ invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+ if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
+ dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+ if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
+ qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+ if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
+ llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
+ marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
+
+ /* Transform adjacency list into 1-based indexing required by GENMMD.*/
+ for (i = 0; i <= n; ++i) ++b_colptr[i];
+ for (i = 0; i < bnz; ++i) ++b_rowind[i];
+
+ genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead,
+ qsize, llist, marker, &maxint, &nofsub);
+
+ /* Transform perm_c into 0-based indexing. */
+ for (i = 0; i < n; ++i) --perm_c[i];
+
+ SUPERLU_FREE(b_colptr);
+ SUPERLU_FREE(b_rowind);
+ SUPERLU_FREE(invp);
+ SUPERLU_FREE(dhead);
+ SUPERLU_FREE(qsize);
+ SUPERLU_FREE(llist);
+ SUPERLU_FREE(marker);
+
+ t = SuperLU_timer_() - t;
+ /* printf("call GENMMD time = %8.3f\n", t);*/
+
+ } else { /* Empty adjacency structure */
+ for (i = 0; i < n; ++i) perm_c[i] = i;
+ }
+
+}
diff --git a/intern/opennl/superlu/heap_relax_snode.c b/intern/opennl/superlu/heap_relax_snode.c
new file mode 100644
index 00000000000..86971f59571
--- /dev/null
+++ b/intern/opennl/superlu/heap_relax_snode.c
@@ -0,0 +1,116 @@
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+heap_relax_snode (
+ const int n,
+ int *et, /* column elimination tree */
+ const int relax_columns, /* max no of columns allowed in a
+ relaxed snode */
+ int *descendants, /* no of descendants of each node
+ in the etree */
+ int *relax_end /* last column in a supernode */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * relax_snode() - Identify the initial relaxed supernodes, assuming that
+ * the matrix has been reordered according to the postorder of the etree.
+ *
+ */
+ register int i, j, k, l, parent;
+ register int snode_start; /* beginning of a snode */
+ int *et_save, *post, *inv_post, *iwork;
+ int nsuper_et = 0, nsuper_et_post = 0;
+
+ /* The etree may not be postordered, but is heap ordered. */
+
+ iwork = (int*) intMalloc(3*n+2);
+ if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
+ inv_post = iwork + n+1;
+ et_save = inv_post + n+1;
+
+ /* Post order etree */
+ post = (int *) TreePostorder(n, et);
+ for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
+
+ /* Renumber etree in postorder */
+ for (i = 0; i < n; ++i) {
+ iwork[post[i]] = post[et[i]];
+ et_save[i] = et[i]; /* Save the original etree */
+ }
+ for (i = 0; i < n; ++i) et[i] = iwork[i];
+
+ /* Compute the number of descendants of each node in the etree */
+ ifill (relax_end, n, EMPTY);
+ for (j = 0; j < n; j++) descendants[j] = 0;
+ for (j = 0; j < n; j++) {
+ parent = et[j];
+ if ( parent != n ) /* not the dummy root */
+ descendants[parent] += descendants[j] + 1;
+ }
+
+ /* Identify the relaxed supernodes by postorder traversal of the etree. */
+ for (j = 0; j < n; ) {
+ parent = et[j];
+ snode_start = j;
+ while ( parent != n && descendants[parent] < relax_columns ) {
+ j = parent;
+ parent = et[j];
+ }
+ /* Found a supernode in postordered etree; j is the last column. */
+ ++nsuper_et_post;
+ k = n;
+ for (i = snode_start; i <= j; ++i)
+ k = SUPERLU_MIN(k, inv_post[i]);
+ l = inv_post[j];
+ if ( (l - k) == (j - snode_start) ) {
+ /* It's also a supernode in the original etree */
+ relax_end[k] = l; /* Last column is recorded */
+ ++nsuper_et;
+ } else {
+ for (i = snode_start; i <= j; ++i) {
+ l = inv_post[i];
+ if ( descendants[i] == 0 ) relax_end[l] = l;
+ }
+ }
+ j++;
+ /* Search for a new leaf */
+ while ( descendants[j] != 0 && j < n ) j++;
+ }
+
+#if ( PRNTlevel>=1 )
+ printf(".. heap_snode_relax:\n"
+ "\tNo of relaxed snodes in postordered etree:\t%d\n"
+ "\tNo of relaxed snodes in original etree:\t%d\n",
+ nsuper_et_post, nsuper_et);
+#endif
+
+ /* Recover the original etree */
+ for (i = 0; i < n; ++i) et[i] = et_save[i];
+
+ SUPERLU_FREE(post);
+ SUPERLU_FREE(iwork);
+}
+
+
diff --git a/intern/opennl/superlu/lsame.c b/intern/opennl/superlu/lsame.c
new file mode 100644
index 00000000000..29f27d38fa9
--- /dev/null
+++ b/intern/opennl/superlu/lsame.c
@@ -0,0 +1,70 @@
+int lsame_(char *ca, char *cb)
+{
+/* -- LAPACK auxiliary routine (version 2.0) --
+ Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+ Courant Institute, Argonne National Lab, and Rice University
+ September 30, 1994
+
+ Purpose
+ =======
+
+ LSAME returns .TRUE. if CA is the same letter as CB regardless of case.
+
+ Arguments
+ =========
+
+ CA (input) CHARACTER*1
+ CB (input) CHARACTER*1
+ CA and CB specify the single characters to be compared.
+
+ =====================================================================
+*/
+
+ /* System generated locals */
+ int ret_val;
+
+ /* Local variables */
+ int inta, intb, zcode;
+
+ ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+ if (ret_val) {
+ return ret_val;
+ }
+
+ /* Now test for equivalence if both characters are alphabetic. */
+
+ zcode = 'Z';
+
+ /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+ machines, on which ICHAR returns a value with bit 8 set.
+ ICHAR('A') on Prime machines returns 193 which is the same as
+ ICHAR('A') on an EBCDIC machine. */
+
+ inta = *(unsigned char *)ca;
+ intb = *(unsigned char *)cb;
+
+ if (zcode == 90 || zcode == 122) {
+ /* ASCII is assumed - ZCODE is the ASCII code of either lower or
+ upper case 'Z'. */
+ if (inta >= 97 && inta <= 122) inta += -32;
+ if (intb >= 97 && intb <= 122) intb += -32;
+
+ } else if (zcode == 233 || zcode == 169) {
+ /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+ upper case 'Z'. */
+ if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta
+ >= 162 && inta <= 169))
+ inta += 64;
+ if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb
+ >= 162 && intb <= 169))
+ intb += 64;
+ } else if (zcode == 218 || zcode == 250) {
+ /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+ plus 128 of either lower or upper case 'Z'. */
+ if (inta >= 225 && inta <= 250) inta += -32;
+ if (intb >= 225 && intb <= 250) intb += -32;
+ }
+ ret_val = inta == intb;
+ return ret_val;
+
+} /* lsame_ */
diff --git a/intern/opennl/superlu/memory.c b/intern/opennl/superlu/memory.c
new file mode 100644
index 00000000000..54d863ea9e9
--- /dev/null
+++ b/intern/opennl/superlu/memory.c
@@ -0,0 +1,207 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/** Precision-independent memory-related routines.
+ (Shared by [sdcz]memory.c) **/
+
+#include "ssp_defs.h"
+
+
+#if ( DEBUGlevel>=1 ) /* Debug malloc/free. */
+int superlu_malloc_total = 0;
+
+#define PAD_FACTOR 2
+#define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */
+
+void *superlu_malloc(size_t size)
+{
+ char *buf;
+
+ buf = (char *) malloc(size + DWORD);
+ if ( !buf ) {
+ printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n",
+ superlu_malloc_total*1e-6, size);
+ ABORT("superlu_malloc: out of memory");
+ }
+
+ ((int_t *) buf)[0] = size;
+#if 0
+ superlu_malloc_total += size + DWORD;
+#else
+ superlu_malloc_total += size;
+#endif
+ return (void *) (buf + DWORD);
+}
+
+void superlu_free(void *addr)
+{
+ char *p = ((char *) addr) - DWORD;
+
+ if ( !addr )
+ ABORT("superlu_free: tried to free NULL pointer");
+
+ if ( !p )
+ ABORT("superlu_free: tried to free NULL+DWORD pointer");
+
+ {
+ int_t n = ((int_t *) p)[0];
+
+ if ( !n )
+ ABORT("superlu_free: tried to free a freed pointer");
+ *((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */
+#if 0
+ superlu_malloc_total -= (n + DWORD);
+#else
+ superlu_malloc_total -= n;
+#endif
+
+ if ( superlu_malloc_total < 0 )
+ ABORT("superlu_malloc_total went negative!");
+
+ /*free (addr);*/
+ free (p);
+ }
+
+}
+
+#else /* production mode */
+
+void *superlu_malloc(size_t size)
+{
+ void *buf;
+ buf = (void *) malloc(size);
+ return (buf);
+}
+
+void superlu_free(void *addr)
+{
+ free (addr);
+}
+
+#endif
+
+
+/*
+ * Set up pointers for integer working arrays.
+ */
+void
+SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep,
+ int **parent, int **xplore, int **repfnz, int **panel_lsub,
+ int **xprune, int **marker)
+{
+ *segrep = iworkptr;
+ *parent = iworkptr + m;
+ *xplore = *parent + m;
+ *repfnz = *xplore + m;
+ *panel_lsub = *repfnz + panel_size * m;
+ *xprune = *panel_lsub + panel_size * m;
+ *marker = *xprune + n;
+ ifill (*repfnz, m * panel_size, EMPTY);
+ ifill (*panel_lsub, m * panel_size, EMPTY);
+}
+
+
+void
+copy_mem_int(int howmany, void *old, void *new)
+{
+ register int i;
+ int *iold = old;
+ int *inew = new;
+ for (i = 0; i < howmany; i++) inew[i] = iold[i];
+}
+
+
+void
+user_bcopy(char *src, char *dest, int bytes)
+{
+ char *s_ptr, *d_ptr;
+
+ s_ptr = src + bytes - 1;
+ d_ptr = dest + bytes - 1;
+ for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
+}
+
+
+
+int *intMalloc(int n)
+{
+ int *buf;
+ buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC fails for buf in intMalloc()");
+ }
+ return (buf);
+}
+
+int *intCalloc(int n)
+{
+ int *buf;
+ register int i;
+ buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC fails for buf in intCalloc()");
+ }
+ for (i = 0; i < n; ++i) buf[i] = 0;
+ return (buf);
+}
+
+
+
+#if 0
+check_expanders()
+{
+ int p;
+ printf("Check expanders:\n");
+ for (p = 0; p < NO_MEMTYPE; p++) {
+ printf("type %d, size %d, mem %d\n",
+ p, expanders[p].size, (int)expanders[p].mem);
+ }
+
+ return 0;
+}
+
+
+StackInfo()
+{
+ printf("Stack: size %d, used %d, top1 %d, top2 %d\n",
+ stack.size, stack.used, stack.top1, stack.top2);
+ return 0;
+}
+
+
+
+PrintStack(char *msg, GlobalLU_t *Glu)
+{
+ int i;
+ int *xlsub, *lsub, *xusub, *usub;
+
+ xlsub = Glu->xlsub;
+ lsub = Glu->lsub;
+ xusub = Glu->xusub;
+ usub = Glu->usub;
+
+ printf("%s\n", msg);
+
+/* printf("\nUCOL: ");
+ for (i = 0; i < xusub[ndim]; ++i)
+ printf("%f ", ucol[i]);
+
+ printf("\nLSUB: ");
+ for (i = 0; i < xlsub[ndim]; ++i)
+ printf("%d ", lsub[i]);
+
+ printf("\nUSUB: ");
+ for (i = 0; i < xusub[ndim]; ++i)
+ printf("%d ", usub[i]);
+
+ printf("\n");*/
+ return 0;
+}
+#endif
+
+
+
diff --git a/intern/opennl/superlu/mmd.c b/intern/opennl/superlu/mmd.c
new file mode 100644
index 00000000000..05f26ce0995
--- /dev/null
+++ b/intern/opennl/superlu/mmd.c
@@ -0,0 +1,1012 @@
+
+typedef int shortint;
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* **** GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE **** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE */
+/* ALGORITHM. IT MAKES USE OF THE IMPLICIT REPRESENTATION */
+/* OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE */
+/* NOTION OF INDISTINGUISHABLE NODES. IT ALSO IMPLEMENTS */
+/* THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM */
+/* EXTERNAL DEGREE. */
+/* --------------------------------------------- */
+/* CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE */
+/* DESTROYED. */
+/* --------------------------------------------- */
+
+/* INPUT PARAMETERS - */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE. */
+/* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
+/* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER */
+/* (ANY SMALLER ESTIMATE WILL DO) FOR MARKING */
+/* NODES. */
+
+/* OUTPUT PARAMETERS - */
+/* PERM - THE MINIMUM DEGREE ORDERING. */
+/* INVP - THE INVERSE OF PERM. */
+/* NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO */
+/* SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME. */
+
+/* WORKING PARAMETERS - */
+/* DHEAD - VECTOR FOR HEAD OF DEGREE LISTS. */
+/* INVP - USED TEMPORARILY FOR DEGREE FORWARD LINK. */
+/* PERM - USED TEMPORARILY FOR DEGREE BACKWARD LINK. */
+/* QSIZE - VECTOR FOR SIZE OF SUPERNODES. */
+/* LLIST - VECTOR FOR TEMPORARY LINKED LISTS. */
+/* MARKER - A TEMPORARY MARKER VECTOR. */
+
+/* PROGRAM SUBROUTINES - */
+/* MMDELM, MMDINT, MMDNUM, MMDUPD. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int genmmd_(int *neqns, int *xadj, shortint *adjncy,
+ shortint *invp, shortint *perm, int *delta, shortint *dhead,
+ shortint *qsize, shortint *llist, shortint *marker, int *maxint,
+ int *nofsub)
+{
+ /* System generated locals */
+ int i__1;
+
+ /* Local variables */
+ static int mdeg, ehead, i, mdlmt, mdnode;
+ extern /* Subroutine */ int mmdelm_(int *, int *, shortint *,
+ shortint *, shortint *, shortint *, shortint *, shortint *,
+ shortint *, int *, int *), mmdupd_(int *, int *,
+ int *, shortint *, int *, int *, shortint *, shortint
+ *, shortint *, shortint *, shortint *, shortint *, int *,
+ int *), mmdint_(int *, int *, shortint *, shortint *,
+ shortint *, shortint *, shortint *, shortint *, shortint *),
+ mmdnum_(int *, shortint *, shortint *, shortint *);
+ static int nextmd, tag, num;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dhead;
+ --perm;
+ --invp;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ if (*neqns <= 0) {
+ return 0;
+ }
+
+/* ------------------------------------------------ */
+/* INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM. */
+/* ------------------------------------------------ */
+ *nofsub = 0;
+ mmdint_(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
+ qsize[1], &llist[1], &marker[1]);
+
+/* ---------------------------------------------- */
+/* NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1. */
+/* ---------------------------------------------- */
+ num = 1;
+
+/* ----------------------------- */
+/* ELIMINATE ALL ISOLATED NODES. */
+/* ----------------------------- */
+ nextmd = dhead[1];
+L100:
+ if (nextmd <= 0) {
+ goto L200;
+ }
+ mdnode = nextmd;
+ nextmd = invp[mdnode];
+ marker[mdnode] = *maxint;
+ invp[mdnode] = -num;
+ ++num;
+ goto L100;
+
+L200:
+/* ---------------------------------------- */
+/* SEARCH FOR NODE OF THE MINIMUM DEGREE. */
+/* MDEG IS THE CURRENT MINIMUM DEGREE; */
+/* TAG IS USED TO FACILITATE MARKING NODES. */
+/* ---------------------------------------- */
+ if (num > *neqns) {
+ goto L1000;
+ }
+ tag = 1;
+ dhead[1] = 0;
+ mdeg = 2;
+L300:
+ if (dhead[mdeg] > 0) {
+ goto L400;
+ }
+ ++mdeg;
+ goto L300;
+L400:
+/* ------------------------------------------------- */
+/* USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS */
+/* WHEN A DEGREE UPDATE IS TO BE PERFORMED. */
+/* ------------------------------------------------- */
+ mdlmt = mdeg + *delta;
+ ehead = 0;
+
+L500:
+ mdnode = dhead[mdeg];
+ if (mdnode > 0) {
+ goto L600;
+ }
+ ++mdeg;
+ if (mdeg > mdlmt) {
+ goto L900;
+ }
+ goto L500;
+L600:
+/* ---------------------------------------- */
+/* REMOVE MDNODE FROM THE DEGREE STRUCTURE. */
+/* ---------------------------------------- */
+ nextmd = invp[mdnode];
+ dhead[mdeg] = nextmd;
+ if (nextmd > 0) {
+ perm[nextmd] = -mdeg;
+ }
+ invp[mdnode] = -num;
+ *nofsub = *nofsub + mdeg + qsize[mdnode] - 2;
+ if (num + qsize[mdnode] > *neqns) {
+ goto L1000;
+ }
+/* ---------------------------------------------- */
+/* ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH */
+/* TRANSFORMATION. RESET TAG VALUE IF NECESSARY. */
+/* ---------------------------------------------- */
+ ++tag;
+ if (tag < *maxint) {
+ goto L800;
+ }
+ tag = 1;
+ i__1 = *neqns;
+ for (i = 1; i <= i__1; ++i) {
+ if (marker[i] < *maxint) {
+ marker[i] = 0;
+ }
+/* L700: */
+ }
+L800:
+ mmdelm_(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
+ qsize[1], &llist[1], &marker[1], maxint, &tag);
+ num += qsize[mdnode];
+ llist[mdnode] = ehead;
+ ehead = mdnode;
+ if (*delta >= 0) {
+ goto L500;
+ }
+L900:
+/* ------------------------------------------- */
+/* UPDATE DEGREES OF THE NODES INVOLVED IN THE */
+/* MINIMUM DEGREE NODES ELIMINATION. */
+/* ------------------------------------------- */
+ if (num > *neqns) {
+ goto L1000;
+ }
+ mmdupd_(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], &
+ invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag)
+ ;
+ goto L300;
+
+L1000:
+ mmdnum_(neqns, &perm[1], &invp[1], &qsize[1]);
+ return 0;
+
+} /* genmmd_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* *** MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION *** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE */
+/* MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE */
+/* ALGORITHM. */
+
+/* INPUT PARAMETERS - */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
+
+/* OUTPUT PARAMETERS - */
+/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/* QSIZE - SIZE OF SUPERNODE (INITIALIZED TO ONE). */
+/* LLIST - LINKED LIST. */
+/* MARKER - MARKER VECTOR. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdint_(int *neqns, int *xadj, shortint *adjncy,
+ shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize,
+ shortint *llist, shortint *marker)
+{
+ /* System generated locals */
+ int i__1;
+
+ /* Local variables */
+ static int ndeg, node, fnode;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dbakw;
+ --dforw;
+ --dhead;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ dhead[node] = 0;
+ qsize[node] = 1;
+ marker[node] = 0;
+ llist[node] = 0;
+/* L100: */
+ }
+/* ------------------------------------------ */
+/* INITIALIZE THE DEGREE DOUBLY LINKED LISTS. */
+/* ------------------------------------------ */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ ndeg = xadj[node + 1] - xadj[node] + 1;
+ fnode = dhead[ndeg];
+ dforw[node] = fnode;
+ dhead[ndeg] = node;
+ if (fnode > 0) {
+ dbakw[fnode] = node;
+ }
+ dbakw[node] = -ndeg;
+/* L200: */
+ }
+ return 0;
+
+} /* mmdint_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ** MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION *** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF */
+/* MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH */
+/* IS STORED IN THE QUOTIENT GRAPH FORMAT. IT ALSO */
+/* TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE */
+/* ELIMINATION GRAPH. */
+
+/* INPUT PARAMETERS - */
+/* MDNODE - NODE OF MINIMUM DEGREE. */
+/* MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT) */
+/* INT. */
+/* TAG - TAG VALUE. */
+
+/* UPDATED PARAMETERS - */
+/* (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE. */
+/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/* QSIZE - SIZE OF SUPERNODE. */
+/* MARKER - MARKER VECTOR. */
+/* LLIST - TEMPORARY LINKED LIST OF ELIMINATED NABORS. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdelm_(int *mdnode, int *xadj, shortint *adjncy,
+ shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize,
+ shortint *llist, shortint *marker, int *maxint, int *tag)
+{
+ /* System generated locals */
+ int i__1, i__2;
+
+ /* Local variables */
+ static int node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr,
+ istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+/* ----------------------------------------------- */
+/* FIND REACHABLE SET AND PLACE IN DATA STRUCTURE. */
+/* ----------------------------------------------- */
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dbakw;
+ --dforw;
+ --dhead;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ marker[*mdnode] = *tag;
+ istrt = xadj[*mdnode];
+ istop = xadj[*mdnode + 1] - 1;
+/* ------------------------------------------------------- */
+/* ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED */
+/* NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION */
+/* FOR THE NEXT REACHABLE NODE. */
+/* ------------------------------------------------------- */
+ elmnt = 0;
+ rloc = istrt;
+ rlmt = istop;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ nabor = adjncy[i];
+ if (nabor == 0) {
+ goto L300;
+ }
+ if (marker[nabor] >= *tag) {
+ goto L200;
+ }
+ marker[nabor] = *tag;
+ if (dforw[nabor] < 0) {
+ goto L100;
+ }
+ adjncy[rloc] = nabor;
+ ++rloc;
+ goto L200;
+L100:
+ llist[nabor] = elmnt;
+ elmnt = nabor;
+L200:
+ ;
+ }
+L300:
+/* ----------------------------------------------------- */
+/* MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS. */
+/* ----------------------------------------------------- */
+ if (elmnt <= 0) {
+ goto L1000;
+ }
+ adjncy[rlmt] = -elmnt;
+ link = elmnt;
+L400:
+ jstrt = xadj[link];
+ jstop = xadj[link + 1] - 1;
+ i__1 = jstop;
+ for (j = jstrt; j <= i__1; ++j) {
+ node = adjncy[j];
+ link = -node;
+ if (node < 0) {
+ goto L400;
+ } else if (node == 0) {
+ goto L900;
+ } else {
+ goto L500;
+ }
+L500:
+ if (marker[node] >= *tag || dforw[node] < 0) {
+ goto L800;
+ }
+ marker[node] = *tag;
+/* --------------------------------- */
+/* USE STORAGE FROM ELIMINATED NODES */
+/* IF NECESSARY. */
+/* --------------------------------- */
+L600:
+ if (rloc < rlmt) {
+ goto L700;
+ }
+ link = -adjncy[rlmt];
+ rloc = xadj[link];
+ rlmt = xadj[link + 1] - 1;
+ goto L600;
+L700:
+ adjncy[rloc] = node;
+ ++rloc;
+L800:
+ ;
+ }
+L900:
+ elmnt = llist[elmnt];
+ goto L300;
+L1000:
+ if (rloc <= rlmt) {
+ adjncy[rloc] = 0;
+ }
+/* -------------------------------------------------------- */
+/* FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ... */
+/* -------------------------------------------------------- */
+ link = *mdnode;
+L1100:
+ istrt = xadj[link];
+ istop = xadj[link + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ rnode = adjncy[i];
+ link = -rnode;
+ if (rnode < 0) {
+ goto L1100;
+ } else if (rnode == 0) {
+ goto L1800;
+ } else {
+ goto L1200;
+ }
+L1200:
+/* -------------------------------------------- */
+/* IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */
+/* -------------------------------------------- */
+ pvnode = dbakw[rnode];
+ if (pvnode == 0 || pvnode == -(*maxint)) {
+ goto L1300;
+ }
+/* ------------------------------------- */
+/* THEN REMOVE RNODE FROM THE STRUCTURE. */
+/* ------------------------------------- */
+ nxnode = dforw[rnode];
+ if (nxnode > 0) {
+ dbakw[nxnode] = pvnode;
+ }
+ if (pvnode > 0) {
+ dforw[pvnode] = nxnode;
+ }
+ npv = -pvnode;
+ if (pvnode < 0) {
+ dhead[npv] = nxnode;
+ }
+L1300:
+/* ---------------------------------------- */
+/* PURGE INACTIVE QUOTIENT NABORS OF RNODE. */
+/* ---------------------------------------- */
+ jstrt = xadj[rnode];
+ jstop = xadj[rnode + 1] - 1;
+ xqnbr = jstrt;
+ i__2 = jstop;
+ for (j = jstrt; j <= i__2; ++j) {
+ nabor = adjncy[j];
+ if (nabor == 0) {
+ goto L1500;
+ }
+ if (marker[nabor] >= *tag) {
+ goto L1400;
+ }
+ adjncy[xqnbr] = nabor;
+ ++xqnbr;
+L1400:
+ ;
+ }
+L1500:
+/* ---------------------------------------- */
+/* IF NO ACTIVE NABOR AFTER THE PURGING ... */
+/* ---------------------------------------- */
+ nqnbrs = xqnbr - jstrt;
+ if (nqnbrs > 0) {
+ goto L1600;
+ }
+/* ----------------------------- */
+/* THEN MERGE RNODE WITH MDNODE. */
+/* ----------------------------- */
+ qsize[*mdnode] += qsize[rnode];
+ qsize[rnode] = 0;
+ marker[rnode] = *maxint;
+ dforw[rnode] = -(*mdnode);
+ dbakw[rnode] = -(*maxint);
+ goto L1700;
+L1600:
+/* -------------------------------------- */
+/* ELSE FLAG RNODE FOR DEGREE UPDATE, AND */
+/* ADD MDNODE AS A NABOR OF RNODE. */
+/* -------------------------------------- */
+ dforw[rnode] = nqnbrs + 1;
+ dbakw[rnode] = 0;
+ adjncy[xqnbr] = *mdnode;
+ ++xqnbr;
+ if (xqnbr <= jstop) {
+ adjncy[xqnbr] = 0;
+ }
+
+L1700:
+ ;
+ }
+L1800:
+ return 0;
+
+} /* mmdelm_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ***** MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE ***** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES */
+/* AFTER A MULTIPLE ELIMINATION STEP. */
+
+/* INPUT PARAMETERS - */
+/* EHEAD - THE BEGINNING OF THE LIST OF ELIMINATED */
+/* NODES (I.E., NEWLY FORMED ELEMENTS). */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
+/* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
+/* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) */
+/* INTEGER. */
+
+/* UPDATED PARAMETERS - */
+/* MDEG - NEW MINIMUM DEGREE AFTER DEGREE UPDATE. */
+/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/* QSIZE - SIZE OF SUPERNODE. */
+/* LLIST - WORKING LINKED LIST. */
+/* MARKER - MARKER VECTOR FOR DEGREE UPDATE. */
+/* TAG - TAG VALUE. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdupd_(int *ehead, int *neqns, int *xadj,
+ shortint *adjncy, int *delta, int *mdeg, shortint *dhead,
+ shortint *dforw, shortint *dbakw, shortint *qsize, shortint *llist,
+ shortint *marker, int *maxint, int *tag)
+{
+ /* System generated locals */
+ int i__1, i__2;
+
+ /* Local variables */
+ static int node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt,
+ istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dbakw;
+ --dforw;
+ --dhead;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ mdeg0 = *mdeg + *delta;
+ elmnt = *ehead;
+L100:
+/* ------------------------------------------------------- */
+/* FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING. */
+/* (RESET TAG VALUE IF NECESSARY.) */
+/* ------------------------------------------------------- */
+ if (elmnt <= 0) {
+ return 0;
+ }
+ mtag = *tag + mdeg0;
+ if (mtag < *maxint) {
+ goto L300;
+ }
+ *tag = 1;
+ i__1 = *neqns;
+ for (i = 1; i <= i__1; ++i) {
+ if (marker[i] < *maxint) {
+ marker[i] = 0;
+ }
+/* L200: */
+ }
+ mtag = *tag + mdeg0;
+L300:
+/* --------------------------------------------- */
+/* CREATE TWO LINKED LISTS FROM NODES ASSOCIATED */
+/* WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN */
+/* ADJACENCY STRUCTURE, AND THE OTHER WITH MORE */
+/* THAN TWO NABORS (QXHEAD). ALSO COMPUTE DEG0, */
+/* NUMBER OF NODES IN THIS ELEMENT. */
+/* --------------------------------------------- */
+ q2head = 0;
+ qxhead = 0;
+ deg0 = 0;
+ link = elmnt;
+L400:
+ istrt = xadj[link];
+ istop = xadj[link + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ enode = adjncy[i];
+ link = -enode;
+ if (enode < 0) {
+ goto L400;
+ } else if (enode == 0) {
+ goto L800;
+ } else {
+ goto L500;
+ }
+
+L500:
+ if (qsize[enode] == 0) {
+ goto L700;
+ }
+ deg0 += qsize[enode];
+ marker[enode] = mtag;
+/* ---------------------------------- */
+/* IF ENODE REQUIRES A DEGREE UPDATE, */
+/* THEN DO THE FOLLOWING. */
+/* ---------------------------------- */
+ if (dbakw[enode] != 0) {
+ goto L700;
+ }
+/* ---------------------------------------
+*/
+/* PLACE EITHER IN QXHEAD OR Q2HEAD LISTS.
+*/
+/* ---------------------------------------
+*/
+ if (dforw[enode] == 2) {
+ goto L600;
+ }
+ llist[enode] = qxhead;
+ qxhead = enode;
+ goto L700;
+L600:
+ llist[enode] = q2head;
+ q2head = enode;
+L700:
+ ;
+ }
+L800:
+/* -------------------------------------------- */
+/* FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING. */
+/* -------------------------------------------- */
+ enode = q2head;
+ iq2 = 1;
+L900:
+ if (enode <= 0) {
+ goto L1500;
+ }
+ if (dbakw[enode] != 0) {
+ goto L2200;
+ }
+ ++(*tag);
+ deg = deg0;
+/* ------------------------------------------ */
+/* IDENTIFY THE OTHER ADJACENT ELEMENT NABOR. */
+/* ------------------------------------------ */
+ istrt = xadj[enode];
+ nabor = adjncy[istrt];
+ if (nabor == elmnt) {
+ nabor = adjncy[istrt + 1];
+ }
+/* ------------------------------------------------ */
+/* IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT. */
+/* ------------------------------------------------ */
+ link = nabor;
+ if (dforw[nabor] < 0) {
+ goto L1000;
+ }
+ deg += qsize[nabor];
+ goto L2100;
+L1000:
+/* -------------------------------------------- */
+/* OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */
+/* DO THE FOLLOWING. */
+/* -------------------------------------------- */
+ istrt = xadj[link];
+ istop = xadj[link + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ node = adjncy[i];
+ link = -node;
+ if (node == enode) {
+ goto L1400;
+ }
+ if (node < 0) {
+ goto L1000;
+ } else if (node == 0) {
+ goto L2100;
+ } else {
+ goto L1100;
+ }
+
+L1100:
+ if (qsize[node] == 0) {
+ goto L1400;
+ }
+ if (marker[node] >= *tag) {
+ goto L1200;
+ }
+/* -----------------------------------
+-- */
+/* CASE WHEN NODE IS NOT YET CONSIDERED
+. */
+/* -----------------------------------
+-- */
+ marker[node] = *tag;
+ deg += qsize[node];
+ goto L1400;
+L1200:
+/* ----------------------------------------
+ */
+/* CASE WHEN NODE IS INDISTINGUISHABLE FROM
+ */
+/* ENODE. MERGE THEM INTO A NEW SUPERNODE.
+ */
+/* ----------------------------------------
+ */
+ if (dbakw[node] != 0) {
+ goto L1400;
+ }
+ if (dforw[node] != 2) {
+ goto L1300;
+ }
+ qsize[enode] += qsize[node];
+ qsize[node] = 0;
+ marker[node] = *maxint;
+ dforw[node] = -enode;
+ dbakw[node] = -(*maxint);
+ goto L1400;
+L1300:
+/* --------------------------------------
+*/
+/* CASE WHEN NODE IS OUTMATCHED BY ENODE.
+*/
+/* --------------------------------------
+*/
+ if (dbakw[node] == 0) {
+ dbakw[node] = -(*maxint);
+ }
+L1400:
+ ;
+ }
+ goto L2100;
+L1500:
+/* ------------------------------------------------ */
+/* FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING. */
+/* ------------------------------------------------ */
+ enode = qxhead;
+ iq2 = 0;
+L1600:
+ if (enode <= 0) {
+ goto L2300;
+ }
+ if (dbakw[enode] != 0) {
+ goto L2200;
+ }
+ ++(*tag);
+ deg = deg0;
+/* --------------------------------- */
+/* FOR EACH UNMARKED NABOR OF ENODE, */
+/* DO THE FOLLOWING. */
+/* --------------------------------- */
+ istrt = xadj[enode];
+ istop = xadj[enode + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ nabor = adjncy[i];
+ if (nabor == 0) {
+ goto L2100;
+ }
+ if (marker[nabor] >= *tag) {
+ goto L2000;
+ }
+ marker[nabor] = *tag;
+ link = nabor;
+/* ------------------------------ */
+/* IF UNELIMINATED, INCLUDE IT IN */
+/* DEG COUNT. */
+/* ------------------------------ */
+ if (dforw[nabor] < 0) {
+ goto L1700;
+ }
+ deg += qsize[nabor];
+ goto L2000;
+L1700:
+/* -------------------------------
+*/
+/* IF ELIMINATED, INCLUDE UNMARKED
+*/
+/* NODES IN THIS ELEMENT INTO THE
+*/
+/* DEGREE COUNT. */
+/* -------------------------------
+*/
+ jstrt = xadj[link];
+ jstop = xadj[link + 1] - 1;
+ i__2 = jstop;
+ for (j = jstrt; j <= i__2; ++j) {
+ node = adjncy[j];
+ link = -node;
+ if (node < 0) {
+ goto L1700;
+ } else if (node == 0) {
+ goto L2000;
+ } else {
+ goto L1800;
+ }
+
+L1800:
+ if (marker[node] >= *tag) {
+ goto L1900;
+ }
+ marker[node] = *tag;
+ deg += qsize[node];
+L1900:
+ ;
+ }
+L2000:
+ ;
+ }
+L2100:
+/* ------------------------------------------- */
+/* UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE */
+/* STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */
+/* ------------------------------------------- */
+ deg = deg - qsize[enode] + 1;
+ fnode = dhead[deg];
+ dforw[enode] = fnode;
+ dbakw[enode] = -deg;
+ if (fnode > 0) {
+ dbakw[fnode] = enode;
+ }
+ dhead[deg] = enode;
+ if (deg < *mdeg) {
+ *mdeg = deg;
+ }
+L2200:
+/* ---------------------------------- */
+/* GET NEXT ENODE IN CURRENT ELEMENT. */
+/* ---------------------------------- */
+ enode = llist[enode];
+ if (iq2 == 1) {
+ goto L900;
+ }
+ goto L1600;
+L2300:
+/* ----------------------------- */
+/* GET NEXT ELEMENT IN THE LIST. */
+/* ----------------------------- */
+ *tag = mtag;
+ elmnt = llist[elmnt];
+ goto L100;
+
+} /* mmdupd_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ***** MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING ***** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN */
+/* PRODUCING THE PERMUTATION AND INVERSE PERMUTATION */
+/* VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE */
+/* MINIMUM DEGREE ORDERING ALGORITHM. */
+
+/* INPUT PARAMETERS - */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* QSIZE - SIZE OF SUPERNODES AT ELIMINATION. */
+
+/* UPDATED PARAMETERS - */
+/* INVP - INVERSE PERMUTATION VECTOR. ON INPUT, */
+/* IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */
+/* INTO THE NODE -INVP(NODE); OTHERWISE, */
+/* -INVP(NODE) IS ITS INVERSE LABELLING. */
+
+/* OUTPUT PARAMETERS - */
+/* PERM - THE PERMUTATION VECTOR. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdnum_(int *neqns, shortint *perm, shortint *invp,
+ shortint *qsize)
+{
+ /* System generated locals */
+ int i__1;
+
+ /* Local variables */
+ static int node, root, nextf, father, nqsize, num;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --qsize;
+ --invp;
+ --perm;
+
+ /* Function Body */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ nqsize = qsize[node];
+ if (nqsize <= 0) {
+ perm[node] = invp[node];
+ }
+ if (nqsize > 0) {
+ perm[node] = -invp[node];
+ }
+/* L100: */
+ }
+/* ------------------------------------------------------ */
+/* FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING. */
+/* ------------------------------------------------------ */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ if (perm[node] > 0) {
+ goto L500;
+ }
+/* ----------------------------------------- */
+/* TRACE THE MERGED TREE UNTIL ONE WHICH HAS */
+/* NOT BEEN MERGED, CALL IT ROOT. */
+/* ----------------------------------------- */
+ father = node;
+L200:
+ if (perm[father] > 0) {
+ goto L300;
+ }
+ father = -perm[father];
+ goto L200;
+L300:
+/* ----------------------- */
+/* NUMBER NODE AFTER ROOT. */
+/* ----------------------- */
+ root = father;
+ num = perm[root] + 1;
+ invp[node] = -num;
+ perm[root] = num;
+/* ------------------------ */
+/* SHORTEN THE MERGED TREE. */
+/* ------------------------ */
+ father = node;
+L400:
+ nextf = -perm[father];
+ if (nextf <= 0) {
+ goto L500;
+ }
+ perm[father] = -root;
+ father = nextf;
+ goto L400;
+L500:
+ ;
+ }
+/* ---------------------- */
+/* READY TO COMPUTE PERM. */
+/* ---------------------- */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ num = -invp[node];
+ invp[node] = num;
+ perm[num] = node;
+/* L600: */
+ }
+ return 0;
+
+} /* mmdnum_ */
+
diff --git a/intern/opennl/superlu/relax_snode.c b/intern/opennl/superlu/relax_snode.c
new file mode 100644
index 00000000000..549f3fcf873
--- /dev/null
+++ b/intern/opennl/superlu/relax_snode.c
@@ -0,0 +1,71 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+relax_snode (
+ const int n,
+ int *et, /* column elimination tree */
+ const int relax_columns, /* max no of columns allowed in a
+ relaxed snode */
+ int *descendants, /* no of descendants of each node
+ in the etree */
+ int *relax_end /* last column in a supernode */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * relax_snode() - Identify the initial relaxed supernodes, assuming that
+ * the matrix has been reordered according to the postorder of the etree.
+ *
+ */
+ register int j, parent;
+ register int snode_start; /* beginning of a snode */
+
+ ifill (relax_end, n, EMPTY);
+ for (j = 0; j < n; j++) descendants[j] = 0;
+
+ /* Compute the number of descendants of each node in the etree */
+ for (j = 0; j < n; j++) {
+ parent = et[j];
+ if ( parent != n ) /* not the dummy root */
+ descendants[parent] += descendants[j] + 1;
+ }
+
+ /* Identify the relaxed supernodes by postorder traversal of the etree. */
+ for (j = 0; j < n; ) {
+ parent = et[j];
+ snode_start = j;
+ while ( parent != n && descendants[parent] < relax_columns ) {
+ j = parent;
+ parent = et[j];
+ }
+ /* Found a supernode with j being the last column. */
+ relax_end[snode_start] = j; /* Last column is recorded */
+ j++;
+ /* Search for a new leaf */
+ while ( descendants[j] != 0 && j < n ) j++;
+ }
+
+ /*printf("No of relaxed snodes: %d; relaxed columns: %d\n",
+ nsuper, no_relaxed_col); */
+}
diff --git a/intern/opennl/superlu/scolumn_bmod.c b/intern/opennl/superlu/scolumn_bmod.c
new file mode 100644
index 00000000000..c877a27dd53
--- /dev/null
+++ b/intern/opennl/superlu/scolumn_bmod.c
@@ -0,0 +1,353 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+
+/* Return value: 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ */
+int
+scolumn_bmod (
+ const int jcol, /* in */
+ const int nseg, /* in */
+ float *dense, /* in */
+ float *tempv, /* working array */
+ int *segrep, /* in */
+ int *repfnz, /* in */
+ int fpanelc, /* in -- first column in the current panel */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+
+#ifdef USE_VENDOR_BLAS
+ int incx = 1, incy = 1;
+ float alpha, beta;
+#endif
+
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = no of columns in supernode
+ * nsupr = no of rows in supernode (used as leading dimension)
+ * luptr = location of supernodal LU-block in storage
+ * kfnz = first nonz in the k-th supernodal segment
+ * no_zeros = no of leading zeros in a supernodal U-segment
+ */
+ float ukj, ukj1, ukj2;
+ int luptr, luptr1, luptr2;
+ int fsupc, nsupc, nsupr, segsze;
+ int nrow; /* No of rows in the matrix of matrix-vector */
+ int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
+ register int lptr, kfnz, isub, irow, i;
+ register int no_zeros, new_next;
+ int ufirst, nextlu;
+ int fst_col; /* First column within small LU update */
+ int d_fsupc; /* Distance between the first column of the current
+ panel and the first column of the current snode. */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ int nzlumax;
+ float *tempv1;
+ float zero = 0.0;
+#ifdef USE_VENDOR_BLAS
+ float one = 1.0;
+ float none = -1.0;
+#endif
+ int mem_error;
+ flops_t *ops = stat->ops;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ nzlumax = Glu->nzlumax;
+ jcolp1 = jcol + 1;
+ jsupno = supno[jcol];
+
+ /*
+ * For each nonz supernode segment of U[*,j] in topological order
+ */
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) {
+
+ krep = segrep[k];
+ k--;
+ ksupno = supno[krep];
+ if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
+
+ fsupc = xsup[ksupno];
+ fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+ /* Distance from the current supernode to the current panel;
+ d_fsupc=0 if fsupc > fpanelc. */
+ d_fsupc = fst_col - fsupc;
+
+ luptr = xlusup[fst_col] + d_fsupc;
+ lptr = xlsub[fsupc] + d_fsupc;
+
+ kfnz = repfnz[krep];
+ kfnz = SUPERLU_MAX ( kfnz, fpanelc );
+
+ segsze = krep - kfnz + 1;
+ nsupc = krep - fst_col + 1;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
+ nrow = nsupr - d_fsupc - nsupc;
+ krep_ind = lptr + nsupc - 1;
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+
+ /*
+ * Case 1: Update U-segment of size 1 -- col-col update
+ */
+ if ( segsze == 1 ) {
+ ukj = dense[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ dense[irow] -= ukj*lusup[luptr];
+ luptr++;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ ukj1 = dense[lsub[krep_ind - 1]];
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) { /* Case 2: 2cols-col update */
+ ukj -= ukj1 * lusup[luptr1];
+ dense[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++;
+ luptr1++;
+ dense[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] );
+ }
+ } else { /* Case 3: 3cols-col update */
+ ukj2 = dense[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense[lsub[krep_ind]] = ukj;
+ dense[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++;
+ luptr1++;
+ luptr2++;
+ dense[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+
+
+ } else {
+ /*
+ * Case: sup-col update
+ * Perform a triangular solve and block update,
+ * then scatter the result of sup-col update to dense
+ */
+
+ no_zeros = kfnz - fst_col;
+
+ /* Copy U[*,j] segment from dense[*] to tempv[*] */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ tempv[i] = dense[irow];
+ ++isub;
+ }
+
+ /* Dense triangular solve -- start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#endif
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+ sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
+#endif
+
+
+ /* Scatter tempv[] into SPA dense[] as a temporary storage */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense[irow] = tempv[i];
+ tempv[i] = zero;
+ ++isub;
+ }
+
+ /* Scatter tempv1[] into SPA dense[] */
+ for (i = 0; i < nrow; i++) {
+ irow = lsub[isub];
+ dense[irow] -= tempv1[i];
+ tempv1[i] = zero;
+ ++isub;
+ }
+ }
+
+ } /* if jsupno ... */
+
+ } /* for each segment... */
+
+ /*
+ * Process the supernodal portion of L\U[*,j]
+ */
+ nextlu = xlusup[jcol];
+ fsupc = xsup[jsupno];
+
+ /* Copy the SPA dense into L\U[*,j] */
+ new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
+ while ( new_next > nzlumax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
+ return (mem_error);
+ lusup = Glu->lusup;
+ lsub = Glu->lsub;
+ }
+
+ for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+ irow = lsub[isub];
+ lusup[nextlu] = dense[irow];
+ dense[irow] = zero;
+ ++nextlu;
+ }
+
+ xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
+
+ /* For more updates within the panel (also within the current supernode),
+ * should start from the first column of the panel, or the first column
+ * of the supernode, whichever is bigger. There are 2 cases:
+ * 1) fsupc < fpanelc, then fst_col := fpanelc
+ * 2) fsupc >= fpanelc, then fst_col := fsupc
+ */
+ fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+ if ( fst_col < jcol ) {
+
+ /* Distance between the current supernode and the current panel.
+ d_fsupc=0 if fsupc >= fpanelc. */
+ d_fsupc = fst_col - fsupc;
+
+ lptr = xlsub[fsupc] + d_fsupc;
+ luptr = xlusup[fst_col] + d_fsupc;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
+ nsupc = jcol - fst_col; /* Excluding jcol */
+ nrow = nsupr - d_fsupc - nsupc;
+
+ /* Points to the beginning of jcol in snode L\U(jsupno) */
+ ufirst = xlusup[jcol] + d_fsupc;
+
+ ops[TRSV] += nsupc * (nsupc - 1);
+ ops[GEMV] += 2 * nrow * nsupc;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
+ &nsupr, &lusup[ufirst], &incx );
+#else
+ strsv_( "L", "N", "U", &nsupc, &lusup[luptr],
+ &nsupr, &lusup[ufirst], &incx );
+#endif
+
+ alpha = none; beta = one; /* y := beta*y + alpha*A*x */
+
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+ sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+ slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+
+ smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
+ &lusup[ufirst], tempv );
+
+ /* Copy updates from tempv[*] into lusup[*] */
+ isub = ufirst + nsupc;
+ for (i = 0; i < nrow; i++) {
+ lusup[isub] -= tempv[i];
+ tempv[i] = 0.0;
+ ++isub;
+ }
+
+#endif
+
+
+ } /* if fst_col < jcol ... */
+
+ return 0;
+}
diff --git a/intern/opennl/superlu/scolumn_dfs.c b/intern/opennl/superlu/scolumn_dfs.c
new file mode 100644
index 00000000000..ecfb5c3b839
--- /dev/null
+++ b/intern/opennl/superlu/scolumn_dfs.c
@@ -0,0 +1,270 @@
+
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+/* What type of supernodes we want */
+#define T2_SUPER
+
+int
+scolumn_dfs(
+ const int m, /* in - number of rows in the matrix */
+ const int jcol, /* in */
+ int *perm_r, /* in */
+ int *nseg, /* modified - with new segments appended */
+ int *lsub_col, /* in - defines the RHS vector to start the dfs */
+ int *segrep, /* modified - with new segments appended */
+ int *repfnz, /* modified */
+ int *xprune, /* modified */
+ int *marker, /* modified */
+ int *parent, /* working array */
+ int *xplore, /* working array */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * "column_dfs" performs a symbolic factorization on column jcol, and
+ * decide the supernode boundary.
+ *
+ * This routine does not use numeric values, but only use the RHS
+ * row indices to start the dfs.
+ *
+ * A supernode representative is the last column of a supernode.
+ * The nonzeros in U[*,j] are segments that end at supernodal
+ * representatives. The routine returns a list of such supernodal
+ * representatives in topological order of the dfs that generates them.
+ * The location of the first nonzero in each such supernodal segment
+ * (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ * nseg: no of segments in current U[*,j]
+ * jsuper: jsuper=EMPTY if column j does not belong to the same
+ * supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ * marker2: A-row --> A-row/col (0/1)
+ * repfnz: SuperA-col --> PA-row
+ * parent: SuperA-col --> SuperA-col
+ * xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ * 0 success;
+ * > 0 number of bytes allocated when run out of space.
+ *
+ */
+ int jcolp1, jcolm1, jsuper, nsuper, nextl;
+ int k, krep, krow, kmark, kperm;
+ int *marker2; /* Used for small panel LU */
+ int fsupc; /* First column of a snode */
+ int myfnz; /* First nonz column of a U-segment */
+ int chperm, chmark, chrep, kchild;
+ int xdfs, maxdfs, kpar, oldrep;
+ int jptr, jm1ptr;
+ int ito, ifrom, istop; /* Used to compress row subscripts */
+ int mem_error;
+ int *xsup, *supno, *lsub, *xlsub;
+ int nzlmax;
+ static int first = 1, maxsuper;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ nzlmax = Glu->nzlmax;
+
+ if ( first ) {
+ maxsuper = sp_ienv(3);
+ first = 0;
+ }
+ jcolp1 = jcol + 1;
+ jcolm1 = jcol - 1;
+ nsuper = supno[jcol];
+ jsuper = nsuper;
+ nextl = xlsub[jcol];
+ marker2 = &marker[2*m];
+
+
+ /* For each nonzero in A[*,jcol] do dfs */
+ for (k = 0; lsub_col[k] != EMPTY; k++) {
+
+ krow = lsub_col[k];
+ lsub_col[k] = EMPTY;
+ kmark = marker2[krow];
+
+ /* krow was visited before, go to the next nonz */
+ if ( kmark == jcol ) continue;
+
+ /* For each unmarked nbr krow of jcol
+ * krow is in L: place it in structure of L[*,jcol]
+ */
+ marker2[krow] = jcol;
+ kperm = perm_r[krow];
+
+ if ( kperm == EMPTY ) {
+ lsub[nextl++] = krow; /* krow is indexed into A */
+ if ( nextl >= nzlmax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)))
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */
+ } else {
+ /* krow is in U: if its supernode-rep krep
+ * has been explored, update repfnz[*]
+ */
+ krep = xsup[supno[kperm]+1] - 1;
+ myfnz = repfnz[krep];
+
+ if ( myfnz != EMPTY ) { /* Visited before */
+ if ( myfnz > kperm ) repfnz[krep] = kperm;
+ /* continue; */
+ }
+ else {
+ /* Otherwise, perform dfs starting at krep */
+ oldrep = EMPTY;
+ parent[krep] = oldrep;
+ repfnz[krep] = kperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+
+ do {
+ /*
+ * For each unmarked kchild of krep
+ */
+ while ( xdfs < maxdfs ) {
+
+ kchild = lsub[xdfs];
+ xdfs++;
+ chmark = marker2[kchild];
+
+ if ( chmark != jcol ) { /* Not reached yet */
+ marker2[kchild] = jcol;
+ chperm = perm_r[kchild];
+
+ /* Case kchild is in L: place it in L[*,k] */
+ if ( chperm == EMPTY ) {
+ lsub[nextl++] = kchild;
+ if ( nextl >= nzlmax ) {
+ if ((mem_error =
+ sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu)))
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ if ( chmark != jcolm1 ) jsuper = EMPTY;
+ } else {
+ /* Case kchild is in U:
+ * chrep = its supernode-rep. If its rep has
+ * been explored, update its repfnz[*]
+ */
+ chrep = xsup[supno[chperm]+1] - 1;
+ myfnz = repfnz[chrep];
+ if ( myfnz != EMPTY ) { /* Visited before */
+ if ( myfnz > chperm )
+ repfnz[chrep] = chperm;
+ } else {
+ /* Continue dfs at super-rep of kchild */
+ xplore[krep] = xdfs;
+ oldrep = krep;
+ krep = chrep; /* Go deeper down G(L^t) */
+ parent[krep] = oldrep;
+ repfnz[krep] = chperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+ } /* else */
+
+ } /* else */
+
+ } /* if */
+
+ } /* while */
+
+ /* krow has no more unexplored nbrs;
+ * place supernode-rep krep in postorder DFS.
+ * backtrack dfs to its parent
+ */
+ segrep[*nseg] = krep;
+ ++(*nseg);
+ kpar = parent[krep]; /* Pop from stack, mimic recursion */
+ if ( kpar == EMPTY ) break; /* dfs done */
+ krep = kpar;
+ xdfs = xplore[krep];
+ maxdfs = xprune[krep];
+
+ } while ( kpar != EMPTY ); /* Until empty stack */
+
+ } /* else */
+
+ } /* else */
+
+ } /* for each nonzero ... */
+
+ /* Check to see if j belongs in the same supernode as j-1 */
+ if ( jcol == 0 ) { /* Do nothing for column 0 */
+ nsuper = supno[0] = 0;
+ } else {
+ fsupc = xsup[nsuper];
+ jptr = xlsub[jcol]; /* Not compressed yet */
+ jm1ptr = xlsub[jcolm1];
+
+#ifdef T2_SUPER
+ if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
+#endif
+ /* Make sure the number of columns in a supernode doesn't
+ exceed threshold. */
+ if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
+
+ /* If jcol starts a new supernode, reclaim storage space in
+ * lsub from the previous supernode. Note we only store
+ * the subscript set of the first and last columns of
+ * a supernode. (first for num values, last for pruning)
+ */
+ if ( jsuper == EMPTY ) { /* starts a new supernode */
+ if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
+#ifdef CHK_COMPRESS
+ printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
+#endif
+ ito = xlsub[fsupc+1];
+ xlsub[jcolm1] = ito;
+ istop = ito + jptr - jm1ptr;
+ xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
+ xlsub[jcol] = istop;
+ for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
+ lsub[ito] = lsub[ifrom];
+ nextl = ito; /* = istop + length(jcol) */
+ }
+ nsuper++;
+ supno[jcol] = nsuper;
+ } /* if a new supernode */
+
+ } /* else: jcol > 0 */
+
+ /* Tidy up the pointers before exit */
+ xsup[nsuper+1] = jcolp1;
+ supno[jcolp1] = nsuper;
+ xprune[jcol] = nextl; /* Initialize upper bound for pruning */
+ xlsub[jcolp1] = nextl;
+
+ return 0;
+}
diff --git a/intern/opennl/superlu/scopy_to_ucol.c b/intern/opennl/superlu/scopy_to_ucol.c
new file mode 100644
index 00000000000..fd97352923f
--- /dev/null
+++ b/intern/opennl/superlu/scopy_to_ucol.c
@@ -0,0 +1,105 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+scopy_to_ucol(
+ int jcol, /* in */
+ int nseg, /* in */
+ int *segrep, /* in */
+ int *repfnz, /* in */
+ int *perm_r, /* in */
+ float *dense, /* modified - reset to zero on return */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/*
+ * Gather from SPA dense[*] to global ucol[*].
+ */
+ int ksub, krep, ksupno;
+ int i, k, kfnz, segsze;
+ int fsupc, isub, irow;
+ int jsupno, nextu;
+ int new_next, mem_error;
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *ucol;
+ int *usub, *xusub;
+ int nzumax;
+
+ float zero = 0.0;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ ucol = Glu->ucol;
+ usub = Glu->usub;
+ xusub = Glu->xusub;
+ nzumax = Glu->nzumax;
+
+ jsupno = supno[jcol];
+ nextu = xusub[jcol];
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) {
+ krep = segrep[k--];
+ ksupno = supno[krep];
+
+ if ( ksupno != jsupno ) { /* Should go into ucol[] */
+ kfnz = repfnz[krep];
+ if ( kfnz != EMPTY ) { /* Nonzero U-segment */
+
+ fsupc = xsup[ksupno];
+ isub = xlsub[fsupc] + kfnz - fsupc;
+ segsze = krep - kfnz + 1;
+
+ new_next = nextu + segsze;
+ while ( new_next > nzumax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)))
+ return (mem_error);
+ ucol = Glu->ucol;
+ if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)))
+ return (mem_error);
+ usub = Glu->usub;
+ lsub = Glu->lsub;
+ }
+
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ usub[nextu] = perm_r[irow];
+ ucol[nextu] = dense[irow];
+ dense[irow] = zero;
+ nextu++;
+ isub++;
+ }
+
+ }
+
+ }
+
+ } /* for each segment... */
+
+ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */
+ return 0;
+}
diff --git a/intern/opennl/superlu/sgssv.c b/intern/opennl/superlu/sgssv.c
new file mode 100644
index 00000000000..ede3dc83907
--- /dev/null
+++ b/intern/opennl/superlu/sgssv.c
@@ -0,0 +1,221 @@
+
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#include "ssp_defs.h"
+
+void
+sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
+ SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
+ SuperLUStat_t *stat, int *info )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSSV solves the system of linear equations A*X=B, using the
+ * LU factorization from SGSTRF. It performs the following steps:
+ *
+ * 1. If A is stored column-wise (A->Stype = SLU_NC):
+ *
+ * 1.1. Permute the columns of A, forming A*Pc, where Pc
+ * is a permutation matrix. For more details of this step,
+ * see sp_preorder.c.
+ *
+ * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
+ * by Gaussian elimination with partial pivoting.
+ * L is unit lower triangular with offdiagonal entries
+ * bounded by 1 in magnitude, and U is upper triangular.
+ *
+ * 1.3. Solve the system of equations A*X=B using the factored
+ * form of A.
+ *
+ * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the
+ * above algorithm to the transpose of A:
+ *
+ * 2.1. Permute columns of transpose(A) (rows of A),
+ * forming transpose(A)*Pc, where Pc is a permutation matrix.
+ * For more details of this step, see sp_preorder.c.
+ *
+ * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
+ * determined by Gaussian elimination with partial pivoting.
+ * L is unit lower triangular with offdiagonal entries
+ * bounded by 1 in magnitude, and U is upper triangular.
+ *
+ * 2.3. Solve the system of equations A*X=B using the factored
+ * form of A.
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ * The structure defines the input parameters to control
+ * how the LU decomposition will be performed and how the
+ * system will be solved.
+ *
+ * A (input) SuperMatrix*
+ * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ * of linear equations is A->nrow. Currently, the type of A can be:
+ * Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE.
+ * In the future, more general A may be handled.
+ *
+ * perm_c (input/output) int*
+ * If A->Stype = SLU_NC, column permutation vector of size A->ncol
+ * which defines the permutation matrix Pc; perm_c[i] = j means
+ * column i of A is in position j in A*Pc.
+ * If A->Stype = SLU_NR, column permutation vector of size A->nrow
+ * which describes permutation of columns of transpose(A)
+ * (rows of A) as described above.
+ *
+ * If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
+ * options->Fact = SamePattern_SameRowPerm, it is an input argument.
+ * On exit, perm_c may be overwritten by the product of the input
+ * perm_c and a permutation that postorders the elimination tree
+ * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ * is already in postorder.
+ * Otherwise, it is an output argument.
+ *
+ * perm_r (input/output) int*
+ * If A->Stype = SLU_NC, row permutation vector of size A->nrow,
+ * which defines the permutation matrix Pr, and is determined
+ * by partial pivoting. perm_r[i] = j means row i of A is in
+ * position j in Pr*A.
+ * If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ * determines permutation of rows of transpose(A)
+ * (columns of A) as described above.
+ *
+ * If options->RowPerm = MY_PERMR or
+ * options->Fact = SamePattern_SameRowPerm, perm_r is an
+ * input argument.
+ * otherwise it is an output argument.
+ *
+ * L (output) SuperMatrix*
+ * The factor L from the factorization
+ * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
+ * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
+ * Uses compressed row subscripts storage for supernodes, i.e.,
+ * L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U (output) SuperMatrix*
+ * The factor U from the factorization
+ * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
+ * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
+ * Uses column-wise storage scheme, i.e., U has types:
+ * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * B (input/output) SuperMatrix*
+ * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ * On entry, the right hand side matrix.
+ * On exit, the solution matrix if info = 0;
+ *
+ * stat (output) SuperLUStat_t*
+ * Record the statistics on runtime and floating-point operation count.
+ * See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info (output) int*
+ * = 0: successful exit
+ * > 0: if info = i, and i is
+ * <= A->ncol: U(i,i) is exactly zero. The factorization has
+ * been completed, but the factor U is exactly singular,
+ * so the solution could not be computed.
+ * > A->ncol: number of bytes allocated when memory allocation
+ * failure occurred, plus A->ncol.
+ *
+ */
+ DNformat *Bstore;
+ SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/
+ SuperMatrix AC; /* Matrix postmultiplied by Pc */
+ int lwork = 0, *etree, i;
+
+ /* Set default values for some parameters */
+ int panel_size; /* panel size */
+ int relax; /* no of columns in a relaxed snodes */
+ int permc_spec;
+ trans_t trans = NOTRANS;
+ double *utime;
+ double t; /* Temporary time */
+
+ /* Test the input parameters ... */
+ *info = 0;
+ Bstore = B->Store;
+ if ( options->Fact != DOFACT ) *info = -1;
+ else if ( A->nrow != A->ncol || A->nrow < 0 ||
+ (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
+ A->Dtype != SLU_S || A->Mtype != SLU_GE )
+ *info = -2;
+ else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
+ B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
+ *info = -7;
+ if ( *info != 0 ) {
+ i = -(*info);
+ xerbla_("sgssv", &i);
+ return;
+ }
+
+ utime = stat->utime;
+
+ /* Convert A to SLU_NC format when necessary. */
+ if ( A->Stype == SLU_NR ) {
+ NRformat *Astore = A->Store;
+ AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
+ sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
+ Astore->nzval, Astore->colind, Astore->rowptr,
+ SLU_NC, A->Dtype, A->Mtype);
+ trans = TRANS;
+ } else {
+ if ( A->Stype == SLU_NC ) AA = A;
+ }
+
+ t = SuperLU_timer_();
+ /*
+ * Get column permutation vector perm_c[], according to permc_spec:
+ * permc_spec = NATURAL: natural ordering
+ * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
+ * permc_spec = MMD_ATA: minimum degree on structure of A'*A
+ * permc_spec = COLAMD: approximate minimum degree column ordering
+ * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
+ */
+ permc_spec = options->ColPerm;
+ if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
+ get_perm_c(permc_spec, AA, perm_c);
+ utime[COLPERM] = SuperLU_timer_() - t;
+
+ etree = intMalloc(A->ncol);
+
+ t = SuperLU_timer_();
+ sp_preorder(options, AA, perm_c, etree, &AC);
+ utime[ETREE] = SuperLU_timer_() - t;
+
+ panel_size = sp_ienv(1);
+ relax = sp_ienv(2);
+
+ /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
+ relax, panel_size, sp_ienv(3), sp_ienv(4));*/
+ t = SuperLU_timer_();
+ /* Compute the LU factorization of A. */
+ sgstrf(options, &AC, relax, panel_size,
+ etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
+ utime[FACT] = SuperLU_timer_() - t;
+
+ t = SuperLU_timer_();
+ if ( *info == 0 ) {
+ /* Solve the system A*X=B, overwriting B with X. */
+ sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
+ }
+ utime[SOLVE] = SuperLU_timer_() - t;
+
+ SUPERLU_FREE (etree);
+ Destroy_CompCol_Permuted(&AC);
+ if ( A->Stype == SLU_NR ) {
+ Destroy_SuperMatrix_Store(AA);
+ SUPERLU_FREE(AA);
+ }
+
+}
diff --git a/intern/opennl/superlu/sgstrf.c b/intern/opennl/superlu/sgstrf.c
new file mode 100644
index 00000000000..42f8dc9d0ee
--- /dev/null
+++ b/intern/opennl/superlu/sgstrf.c
@@ -0,0 +1,433 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+sgstrf (superlu_options_t *options, SuperMatrix *A,
+ int relax, int panel_size, int *etree, void *work, int lwork,
+ int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
+ SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSTRF computes an LU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ * Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ * The structure defines the input parameters to control
+ * how the LU decomposition will be performed.
+ *
+ * A (input) SuperMatrix*
+ * Original matrix A, permuted by columns, of dimension
+ * (A->nrow, A->ncol). The type of A can be:
+ * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
+ *
+ * drop_tol (input) float (NOT IMPLEMENTED)
+ * Drop tolerance parameter. At step j of the Gaussian elimination,
+ * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
+ * 0 <= drop_tol <= 1. The default value of drop_tol is 0.
+ *
+ * relax (input) int
+ * To control degree of relaxing supernodes. If the number
+ * of nodes (columns) in a subtree of the elimination tree is less
+ * than relax, this subtree is considered as one supernode,
+ * regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ * A panel consists of at most panel_size consecutive columns.
+ *
+ * etree (input) int*, dimension (A->ncol)
+ * Elimination tree of A'*A.
+ * Note: etree is a vector of parent pointers for a forest whose
+ * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ * On input, the columns of A should be permuted so that the
+ * etree is in a certain postorder.
+ *
+ * work (input/output) void*, size (lwork) (in bytes)
+ * User-supplied work space and space for the output data structures.
+ * Not referenced if lwork = 0;
+ *
+ * lwork (input) int
+ * Specifies the size of work array in bytes.
+ * = 0: allocate space internally by system malloc;
+ * > 0: use user-supplied work array of length lwork in bytes,
+ * returns error if space runs out.
+ * = -1: the routine guesses the amount of space needed without
+ * performing the factorization, and returns it in
+ * *info; no other side effects.
+ *
+ * perm_c (input) int*, dimension (A->ncol)
+ * Column permutation vector, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ * When searching for diagonal, perm_c[*] is applied to the
+ * row subscripts of A, so that diagonal threshold pivoting
+ * can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r (input/output) int*, dimension (A->nrow)
+ * Row permutation vector which defines the permutation matrix Pr,
+ * perm_r[i] = j means row i of A is in position j in Pr*A.
+ * If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ * will try to use the input perm_r, unless a certain threshold
+ * criterion is violated. In that case, perm_r is overwritten by
+ * a new permutation determined by partial pivoting or diagonal
+ * threshold pivoting.
+ * Otherwise, perm_r is output argument;
+ *
+ * L (output) SuperMatrix*
+ * The factor L from the factorization Pr*A=L*U; use compressed row
+ * subscripts storage for supernodes, i.e., L has type:
+ * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U (output) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ * storage scheme, i.e., U has types: Stype = SLU_NC,
+ * Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * stat (output) SuperLUStat_t*
+ * Record the statistics on runtime and floating-point operation count.
+ * See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info (output) int*
+ * = 0: successful exit
+ * < 0: if info = -i, the i-th argument had an illegal value
+ * > 0: if info = i, and i is
+ * <= A->ncol: U(i,i) is exactly zero. The factorization has
+ * been completed, but the factor U is exactly singular,
+ * and division by zero will occur if it is used to solve a
+ * system of equations.
+ * > A->ncol: number of bytes allocated when memory allocation
+ * failure occurred, plus A->ncol. If lwork = -1, it is
+ * the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays:
+ * ======================
+ * m = number of rows in the matrix
+ * n = number of columns in the matrix
+ *
+ * xprune[0:n-1]: xprune[*] points to locations in subscript
+ * vector lsub[*]. For column i, xprune[i] denotes the point where
+ * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
+ * to be traversed for symbolic factorization.
+ *
+ * marker[0:3*m-1]: marker[i] = j means that node i has been
+ * reached when working on column j.
+ * Storage: relative to original row subscripts
+ * NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
+ * see spanel_dfs.c; marker2 is used for inner-factorization,
+ * see scolumn_dfs.c.
+ *
+ * parent[0:m-1]: parent vector used during dfs
+ * Storage: relative to new row subscripts
+ *
+ * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
+ * unexplored neighbor of i in lsub[*]
+ *
+ * segrep[0:nseg-1]: contains the list of supernodal representatives
+ * in topological order of the dfs. A supernode representative is the
+ * last column of a supernode.
+ * The maximum size of segrep[] is n.
+ *
+ * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
+ * supernodal representative r, repfnz[r] is the location of the first
+ * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
+ * indicates the supernode r has been explored.
+ * NOTE: There are W of them, each used for one column of a panel.
+ *
+ * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
+ * the panel diagonal. These are filled in during spanel_dfs(), and are
+ * used later in the inner LU factorization within the panel.
+ * panel_lsub[]/dense[] pair forms the SPA data structure.
+ * NOTE: There are W of them.
+ *
+ * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ * NOTE: there are W of them.
+ *
+ * tempv[0:*]: real temporary used for dense numeric kernels;
+ * The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
+ *
+ */
+ /* Local working arrays */
+ NCPformat *Astore;
+ int *iperm_r = NULL; /* inverse of perm_r;
+ used when options->Fact == SamePattern_SameRowPerm */
+ int *iperm_c; /* inverse of perm_c */
+ int *iwork;
+ float *swork;
+ int *segrep, *repfnz, *parent, *xplore;
+ int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
+ int *xprune;
+ int *marker;
+ float *dense, *tempv;
+ int *relax_end;
+ float *a;
+ int *asub;
+ int *xa_begin, *xa_end;
+ int *xsup, *supno;
+ int *xlsub, *xlusup, *xusub;
+ int nzlumax;
+ static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
+
+ /* Local scalars */
+ fact_t fact = options->Fact;
+ double diag_pivot_thresh = options->DiagPivotThresh;
+ int pivrow; /* pivotal row number in the original matrix A */
+ int nseg1; /* no of segments in U-column above panel row jcol */
+ int nseg; /* no of segments in each U-column */
+ register int jcol;
+ register int kcol; /* end column of a relaxed snode */
+ register int icol;
+ register int i, k, jj, new_next, iinfo;
+ int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
+ int w_def; /* upper bound on panel width */
+ int usepr, iperm_r_allocated = 0;
+ int nnzL, nnzU;
+ int *panel_histo = stat->panel_histo;
+ flops_t *ops = stat->ops;
+
+ iinfo = 0;
+ m = A->nrow;
+ n = A->ncol;
+ min_mn = SUPERLU_MIN(m, n);
+ Astore = A->Store;
+ a = Astore->nzval;
+ asub = Astore->rowind;
+ xa_begin = Astore->colbeg;
+ xa_end = Astore->colend;
+
+ /* Allocate storage common to the factor routines */
+ *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
+ panel_size, L, U, &Glu, &iwork, &swork);
+ if ( *info ) return;
+
+ xsup = Glu.xsup;
+ supno = Glu.supno;
+ xlsub = Glu.xlsub;
+ xlusup = Glu.xlusup;
+ xusub = Glu.xusub;
+
+ SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
+ &repfnz, &panel_lsub, &xprune, &marker);
+ sSetRWork(m, panel_size, swork, &dense, &tempv);
+
+ usepr = (fact == SamePattern_SameRowPerm);
+ if ( usepr ) {
+ /* Compute the inverse of perm_r */
+ iperm_r = (int *) intMalloc(m);
+ for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
+ iperm_r_allocated = 1;
+ }
+ iperm_c = (int *) intMalloc(n);
+ for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
+
+ /* Identify relaxed snodes */
+ relax_end = (int *) intMalloc(n);
+ if ( options->SymmetricMode == YES ) {
+ heap_relax_snode(n, etree, relax, marker, relax_end);
+ } else {
+ relax_snode(n, etree, relax, marker, relax_end);
+ }
+
+ ifill (perm_r, m, EMPTY);
+ ifill (marker, m * NO_MARKER, EMPTY);
+ supno[0] = -1;
+ xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
+ w_def = panel_size;
+
+ /*
+ * Work on one "panel" at a time. A panel is one of the following:
+ * (a) a relaxed supernode at the bottom of the etree, or
+ * (b) panel_size contiguous columns, defined by the user
+ */
+ for (jcol = 0; jcol < min_mn; ) {
+
+ if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
+ kcol = relax_end[jcol]; /* end of the relaxed snode */
+ panel_histo[kcol-jcol+1]++;
+
+ /* --------------------------------------
+ * Factorize the relaxed supernode(jcol:kcol)
+ * -------------------------------------- */
+ /* Determine the union of the row structure of the snode */
+ if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
+ xprune, marker, &Glu)) != 0 )
+ return;
+
+ nextu = xusub[jcol];
+ nextlu = xlusup[jcol];
+ jsupno = supno[jcol];
+ fsupc = xsup[jsupno];
+ new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
+ nzlumax = Glu.nzlumax;
+ while ( new_next > nzlumax ) {
+ if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) )
+ return;
+ }
+
+ for (icol = jcol; icol<= kcol; icol++) {
+ xusub[icol+1] = nextu;
+
+ /* Scatter into SPA dense[*] */
+ for (k = xa_begin[icol]; k < xa_end[icol]; k++)
+ dense[asub[k]] = a[k];
+
+ /* Numeric update within the snode */
+ ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat);
+
+ if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
+ iperm_r, iperm_c, &pivrow, &Glu, stat)) )
+ if ( iinfo == 0 ) iinfo = *info;
+
+#ifdef DEBUG
+ sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
+#endif
+
+ }
+
+ jcol = icol;
+
+ } else { /* Work on one panel of panel_size columns */
+
+ /* Adjust panel_size so that a panel won't overlap with the next
+ * relaxed snode.
+ */
+ panel_size = w_def;
+ for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
+ if ( relax_end[k] != EMPTY ) {
+ panel_size = k - jcol;
+ break;
+ }
+ if ( k == min_mn ) panel_size = min_mn - jcol;
+ panel_histo[panel_size]++;
+
+ /* symbolic factor on a panel of columns */
+ spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
+ dense, panel_lsub, segrep, repfnz, xprune,
+ marker, parent, xplore, &Glu);
+
+ /* numeric sup-panel updates in topological order */
+ spanel_bmod(m, panel_size, jcol, nseg1, dense,
+ tempv, segrep, repfnz, &Glu, stat);
+
+ /* Sparse LU within the panel, and below panel diagonal */
+ for ( jj = jcol; jj < jcol + panel_size; jj++) {
+ k = (jj - jcol) * m; /* column index for w-wide arrays */
+
+ nseg = nseg1; /* Begin after all the panel segments */
+
+ if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
+ segrep, &repfnz[k], xprune, marker,
+ parent, xplore, &Glu)) != 0) return;
+
+ /* Numeric updates */
+ if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
+ tempv, &segrep[nseg1], &repfnz[k],
+ jcol, &Glu, stat)) != 0) return;
+
+ /* Copy the U-segments to ucol[*] */
+ if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
+ perm_r, &dense[k], &Glu)) != 0)
+ return;
+
+ if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
+ iperm_r, iperm_c, &pivrow, &Glu, stat)) )
+ if ( iinfo == 0 ) iinfo = *info;
+
+ /* Prune columns (0:jj-1) using column jj */
+ spruneL(jj, perm_r, pivrow, nseg, segrep,
+ &repfnz[k], xprune, &Glu);
+
+ /* Reset repfnz[] for this column */
+ resetrep_col (nseg, segrep, &repfnz[k]);
+
+#ifdef DEBUG
+ sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
+#endif
+
+ }
+
+ jcol += panel_size; /* Move to the next panel */
+
+ } /* else */
+
+ } /* for */
+
+ *info = iinfo;
+
+ if ( m > n ) {
+ k = 0;
+ for (i = 0; i < m; ++i)
+ if ( perm_r[i] == EMPTY ) {
+ perm_r[i] = n + k;
+ ++k;
+ }
+ }
+
+ countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
+ fixupL(min_mn, perm_r, &Glu);
+
+ sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
+
+ if ( fact == SamePattern_SameRowPerm ) {
+ /* L and U structures may have changed due to possibly different
+ pivoting, even though the storage is available.
+ There could also be memory expansions, so the array locations
+ may have changed, */
+ ((SCformat *)L->Store)->nnz = nnzL;
+ ((SCformat *)L->Store)->nsuper = Glu.supno[n];
+ ((SCformat *)L->Store)->nzval = Glu.lusup;
+ ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
+ ((SCformat *)L->Store)->rowind = Glu.lsub;
+ ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
+ ((NCformat *)U->Store)->nnz = nnzU;
+ ((NCformat *)U->Store)->nzval = Glu.ucol;
+ ((NCformat *)U->Store)->rowind = Glu.usub;
+ ((NCformat *)U->Store)->colptr = Glu.xusub;
+ } else {
+ sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup,
+ Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
+ Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
+ sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
+ Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
+ }
+
+ ops[FACT] += ops[TRSV] + ops[GEMV];
+
+ if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
+ SUPERLU_FREE (iperm_c);
+ SUPERLU_FREE (relax_end);
+
+}
diff --git a/intern/opennl/superlu/sgstrs.c b/intern/opennl/superlu/sgstrs.c
new file mode 100644
index 00000000000..5f7b9b57195
--- /dev/null
+++ b/intern/opennl/superlu/sgstrs.c
@@ -0,0 +1,331 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+void
+sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
+ int *perm_c, int *perm_r, SuperMatrix *B,
+ SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSTRS solves a system of linear equations A*X=B or A'*X=B
+ * with A sparse and B dense, using the LU factorization computed by
+ * SGSTRF.
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * trans (input) trans_t
+ * Specifies the form of the system of equations:
+ * = NOTRANS: A * X = B (No transpose)
+ * = TRANS: A'* X = B (Transpose)
+ * = CONJ: A**H * X = B (Conjugate transpose)
+ *
+ * L (input) SuperMatrix*
+ * The factor L from the factorization Pr*A*Pc=L*U as computed by
+ * sgstrf(). Use compressed row subscripts storage for supernodes,
+ * i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U (input) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U as computed by
+ * sgstrf(). Use column-wise storage scheme, i.e., U has types:
+ * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * perm_c (input) int*, dimension (L->ncol)
+ * Column permutation vector, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ *
+ * perm_r (input) int*, dimension (L->nrow)
+ * Row permutation vector, which defines the permutation matrix Pr;
+ * perm_r[i] = j means row i of A is in position j in Pr*A.
+ *
+ * B (input/output) SuperMatrix*
+ * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ * On entry, the right hand side matrix.
+ * On exit, the solution matrix if info = 0;
+ *
+ * stat (output) SuperLUStat_t*
+ * Record the statistics on runtime and floating-point operation count.
+ * See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info (output) int*
+ * = 0: successful exit
+ * < 0: if info = -i, the i-th argument had an illegal value
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1, ftcs2, ftcs3, ftcs4;
+#endif
+#ifdef USE_VENDOR_BLAS
+ float alpha = 1.0, beta = 1.0;
+ float *work_col;
+#endif
+ DNformat *Bstore;
+ float *Bmat;
+ SCformat *Lstore;
+ NCformat *Ustore;
+ float *Lval, *Uval;
+ int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
+ int i, j, k, iptr, jcol, n, ldb, nrhs;
+ float *work, *rhs_work, *soln;
+ flops_t solve_ops;
+ void sprint_soln();
+
+ /* Test input parameters ... */
+ *info = 0;
+ Bstore = B->Store;
+ ldb = Bstore->lda;
+ nrhs = B->ncol;
+ if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
+ else if ( L->nrow != L->ncol || L->nrow < 0 ||
+ L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
+ *info = -2;
+ else if ( U->nrow != U->ncol || U->nrow < 0 ||
+ U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
+ *info = -3;
+ else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
+ B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
+ *info = -6;
+ if ( *info ) {
+ i = -(*info);
+ xerbla_("sgstrs", &i);
+ return;
+ }
+
+ n = L->nrow;
+ work = floatCalloc(n * nrhs);
+ if ( !work ) ABORT("Malloc fails for local work[].");
+ soln = floatMalloc(n);
+ if ( !soln ) ABORT("Malloc fails for local soln[].");
+
+ Bmat = Bstore->nzval;
+ Lstore = L->Store;
+ Lval = Lstore->nzval;
+ Ustore = U->Store;
+ Uval = Ustore->nzval;
+ solve_ops = 0;
+
+ if ( trans == NOTRANS ) {
+ /* Permute right hand sides to form Pr*B */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ /* Forward solve PLy=Pb. */
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ nrow = nsupr - nsupc;
+
+ solve_ops += nsupc * (nsupc - 1) * nrhs;
+ solve_ops += 2 * nrow * nsupc * nrhs;
+
+ if ( nsupc == 1 ) {
+ for (j = 0; j < nrhs; j++) {
+ rhs_work = &Bmat[j*ldb];
+ luptr = L_NZ_START(fsupc);
+ for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
+ irow = L_SUB(iptr);
+ ++luptr;
+ rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
+ }
+ }
+ } else {
+ luptr = L_NZ_START(fsupc);
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("N", strlen("N"));
+ ftcs3 = _cptofcd("U", strlen("U"));
+ STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+
+ SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
+ &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
+ &beta, &work[0], &n );
+#else
+ strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+
+ sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
+ &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
+ &beta, &work[0], &n );
+#endif
+ for (j = 0; j < nrhs; j++) {
+ rhs_work = &Bmat[j*ldb];
+ work_col = &work[j*n];
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; i++) {
+ irow = L_SUB(iptr);
+ rhs_work[irow] -= work_col[i]; /* Scatter */
+ work_col[i] = 0.0;
+ iptr++;
+ }
+ }
+#else
+ for (j = 0; j < nrhs; j++) {
+ rhs_work = &Bmat[j*ldb];
+ slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
+ smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
+ &rhs_work[fsupc], &work[0] );
+
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; i++) {
+ irow = L_SUB(iptr);
+ rhs_work[irow] -= work[i];
+ work[i] = 0.0;
+ iptr++;
+ }
+ }
+#endif
+ } /* else ... */
+ } /* for L-solve */
+
+#ifdef DEBUG
+ printf("After L-solve: y=\n");
+ sprint_soln(n, Bmat);
+#endif
+
+ /*
+ * Back solve Ux=y.
+ */
+ for (k = Lstore->nsuper; k >= 0; k--) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += nsupc * (nsupc + 1) * nrhs;
+
+ if ( nsupc == 1 ) {
+ rhs_work = &Bmat[0];
+ for (j = 0; j < nrhs; j++) {
+ rhs_work[fsupc] /= Lval[luptr];
+ rhs_work += ldb;
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("U", strlen("U"));
+ ftcs3 = _cptofcd("N", strlen("N"));
+ STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+#else
+ strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+#endif
+#else
+ for (j = 0; j < nrhs; j++)
+ susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
+#endif
+ }
+
+ for (j = 0; j < nrhs; ++j) {
+ rhs_work = &Bmat[j*ldb];
+ for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
+ irow = U_SUB(i);
+ rhs_work[irow] -= rhs_work[jcol] * Uval[i];
+ }
+ }
+ }
+
+ } /* for U-solve */
+
+#ifdef DEBUG
+ printf("After U-solve: x=\n");
+ sprint_soln(n, Bmat);
+#endif
+
+ /* Compute the final solution X := Pc*X. */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ stat->ops[SOLVE] = solve_ops;
+
+ } else { /* Solve A'*X=B or CONJ(A)*X=B */
+ /* Permute right hand sides to form Pc'*B. */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ stat->ops[SOLVE] = 0;
+ for (k = 0; k < nrhs; ++k) {
+
+ /* Multiply by inv(U'). */
+ sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
+
+ /* Multiply by inv(L'). */
+ sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
+
+ }
+ /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ }
+
+ SUPERLU_FREE(work);
+ SUPERLU_FREE(soln);
+}
+
+/*
+ * Diagnostic print of the solution vector
+ */
+void
+sprint_soln(int n, float *soln)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ printf("\t%d: %.4f\n", i, soln[i]);
+}
diff --git a/intern/opennl/superlu/smemory.c b/intern/opennl/superlu/smemory.c
new file mode 100644
index 00000000000..79da748671a
--- /dev/null
+++ b/intern/opennl/superlu/smemory.c
@@ -0,0 +1,676 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#include "ssp_defs.h"
+
+/* Constants */
+#define NO_MEMTYPE 4 /* 0: lusup;
+ 1: ucol;
+ 2: lsub;
+ 3: usub */
+#define GluIntArray(n) (5 * (n) + 5)
+
+/* Internal prototypes */
+void *sexpand (int *, MemType,int, int, GlobalLU_t *);
+int sLUWorkInit (int, int, int, int **, float **, LU_space_t);
+void copy_mem_float (int, void *, void *);
+void sStackCompress (GlobalLU_t *);
+void sSetupSpace (void *, int, LU_space_t *);
+void *suser_malloc (int, int);
+void suser_free (int, int);
+
+/* External prototypes (in memory.c - prec-indep) */
+extern void copy_mem_int (int, void *, void *);
+extern void user_bcopy (char *, char *, int);
+
+/* Headers for 4 types of dynamatically managed memory */
+typedef struct e_node {
+ int size; /* length of the memory that has been used */
+ void *mem; /* pointer to the new malloc'd store */
+} ExpHeader;
+
+typedef struct {
+ int size;
+ int used;
+ int top1; /* grow upward, relative to &array[0] */
+ int top2; /* grow downward */
+ void *array;
+} LU_stack_t;
+
+/* Variables local to this file */
+static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
+static LU_stack_t stack;
+static int no_expand;
+
+/* Macros to manipulate stack */
+#define StackFull(x) ( x + stack.used >= stack.size )
+#define NotDoubleAlign(addr) ( (long int)addr & 7 )
+#define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L )
+#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
+ (w + 1) * m * sizeof(float) )
+#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
+
+
+
+
+/*
+ * Setup the memory model to be used for factorization.
+ * lwork = 0: use system malloc;
+ * lwork > 0: use user-supplied work[] space.
+ */
+void sSetupSpace(void *work, int lwork, LU_space_t *MemModel)
+{
+ if ( lwork == 0 ) {
+ *MemModel = SYSTEM; /* malloc/free */
+ } else if ( lwork > 0 ) {
+ *MemModel = USER; /* user provided space */
+ stack.used = 0;
+ stack.top1 = 0;
+ stack.top2 = (lwork/4)*4; /* must be word addressable */
+ stack.size = stack.top2;
+ stack.array = (void *) work;
+ }
+}
+
+
+
+void *suser_malloc(int bytes, int which_end)
+{
+ void *buf;
+
+ if ( StackFull(bytes) ) return (NULL);
+
+ if ( which_end == HEAD ) {
+ buf = (char*) stack.array + stack.top1;
+ stack.top1 += bytes;
+ } else {
+ stack.top2 -= bytes;
+ buf = (char*) stack.array + stack.top2;
+ }
+
+ stack.used += bytes;
+ return buf;
+}
+
+
+void suser_free(int bytes, int which_end)
+{
+ if ( which_end == HEAD ) {
+ stack.top1 -= bytes;
+ } else {
+ stack.top2 += bytes;
+ }
+ stack.used -= bytes;
+}
+
+
+
+/*
+ * mem_usage consists of the following fields:
+ * - for_lu (float)
+ * The amount of space used in bytes for the L\U data structures.
+ * - total_needed (float)
+ * The amount of space needed in bytes to perform factorization.
+ * - expansions (int)
+ * Number of memory expansions during the LU factorization.
+ */
+int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
+{
+ SCformat *Lstore;
+ NCformat *Ustore;
+ register int n, iword, dword, panel_size = sp_ienv(1);
+
+ Lstore = L->Store;
+ Ustore = U->Store;
+ n = L->ncol;
+ iword = sizeof(int);
+ dword = sizeof(float);
+
+ /* For LU factors */
+ mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
+ dword + Lstore->rowind_colptr[n] * iword );
+ mem_usage->for_lu += (float)( (n + 1) * iword +
+ Ustore->colptr[n] * (dword + iword) );
+
+ /* Working storage to support factorization */
+ mem_usage->total_needed = mem_usage->for_lu +
+ (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
+ (panel_size + 1) * n * dword );
+
+ mem_usage->expansions = --no_expand;
+
+ return 0;
+} /* sQuerySpace */
+
+/*
+ * Allocate storage for the data structures common to all factor routines.
+ * For those unpredictable size, make a guess as FILL * nnz(A).
+ * Return value:
+ * If lwork = -1, return the estimated amount of space required, plus n;
+ * otherwise, return the amount of space actually allocated when
+ * memory allocation failure occurred.
+ */
+int
+sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
+ int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
+ int **iwork, float **dwork)
+{
+ int info, iword, dword;
+ SCformat *Lstore;
+ NCformat *Ustore;
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ float *ucol;
+ int *usub, *xusub;
+ int nzlmax, nzumax, nzlumax;
+ int FILL = sp_ienv(6);
+
+ Glu->n = n;
+ no_expand = 0;
+ iword = sizeof(int);
+ dword = sizeof(float);
+
+ if ( !expanders )
+ expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
+ if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
+
+ if ( fact != SamePattern_SameRowPerm ) {
+ /* Guess for L\U factors */
+ nzumax = nzlumax = FILL * annz;
+ nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
+
+ if ( lwork == -1 ) {
+ return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+ + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
+ } else {
+ sSetupSpace(work, lwork, &Glu->MemModel);
+ }
+
+#ifdef DEBUG
+ printf("sLUMemInit() called: annz %d, MemModel %d\n",
+ annz, Glu->MemModel);
+#endif
+
+ /* Integer pointers for L\U factors */
+ if ( Glu->MemModel == SYSTEM ) {
+ xsup = intMalloc(n+1);
+ supno = intMalloc(n+1);
+ xlsub = intMalloc(n+1);
+ xlusup = intMalloc(n+1);
+ xusub = intMalloc(n+1);
+ } else {
+ xsup = (int *)suser_malloc((n+1) * iword, HEAD);
+ supno = (int *)suser_malloc((n+1) * iword, HEAD);
+ xlsub = (int *)suser_malloc((n+1) * iword, HEAD);
+ xlusup = (int *)suser_malloc((n+1) * iword, HEAD);
+ xusub = (int *)suser_malloc((n+1) * iword, HEAD);
+ }
+
+ lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+ ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+ lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
+ usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
+
+ while ( !lusup || !ucol || !lsub || !usub ) {
+ if ( Glu->MemModel == SYSTEM ) {
+ SUPERLU_FREE(lusup);
+ SUPERLU_FREE(ucol);
+ SUPERLU_FREE(lsub);
+ SUPERLU_FREE(usub);
+ } else {
+ suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
+ }
+ nzlumax /= 2;
+ nzumax /= 2;
+ nzlmax /= 2;
+ if ( nzlumax < annz ) {
+ printf("Not enough memory to perform factorization.\n");
+ return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
+ }
+ lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+ ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+ lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
+ usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
+ }
+
+ } else {
+ /* fact == SamePattern_SameRowPerm */
+ Lstore = L->Store;
+ Ustore = U->Store;
+ xsup = Lstore->sup_to_col;
+ supno = Lstore->col_to_sup;
+ xlsub = Lstore->rowind_colptr;
+ xlusup = Lstore->nzval_colptr;
+ xusub = Ustore->colptr;
+ nzlmax = Glu->nzlmax; /* max from previous factorization */
+ nzumax = Glu->nzumax;
+ nzlumax = Glu->nzlumax;
+
+ if ( lwork == -1 ) {
+ return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+ + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
+ } else if ( lwork == 0 ) {
+ Glu->MemModel = SYSTEM;
+ } else {
+ Glu->MemModel = USER;
+ stack.top2 = (lwork/4)*4; /* must be word-addressable */
+ stack.size = stack.top2;
+ }
+
+ lsub = expanders[LSUB].mem = Lstore->rowind;
+ lusup = expanders[LUSUP].mem = Lstore->nzval;
+ usub = expanders[USUB].mem = Ustore->rowind;
+ ucol = expanders[UCOL].mem = Ustore->nzval;;
+ expanders[LSUB].size = nzlmax;
+ expanders[LUSUP].size = nzlumax;
+ expanders[USUB].size = nzumax;
+ expanders[UCOL].size = nzumax;
+ }
+
+ Glu->xsup = xsup;
+ Glu->supno = supno;
+ Glu->lsub = lsub;
+ Glu->xlsub = xlsub;
+ Glu->lusup = lusup;
+ Glu->xlusup = xlusup;
+ Glu->ucol = ucol;
+ Glu->usub = usub;
+ Glu->xusub = xusub;
+ Glu->nzlmax = nzlmax;
+ Glu->nzumax = nzumax;
+ Glu->nzlumax = nzlumax;
+
+ info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
+ if ( info )
+ return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
+
+ ++no_expand;
+ return 0;
+
+} /* sLUMemInit */
+
+/* Allocate known working storage. Returns 0 if success, otherwise
+ returns the number of bytes allocated so far when failure occurred. */
+int
+sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
+ float **dworkptr, LU_space_t MemModel)
+{
+ int isize, dsize, extra;
+ float *old_ptr;
+ int maxsuper = sp_ienv(3),
+ rowblk = sp_ienv(4);
+
+ isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
+ dsize = (m * panel_size +
+ NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
+
+ if ( MemModel == SYSTEM )
+ *iworkptr = (int *) intCalloc(isize/sizeof(int));
+ else
+ *iworkptr = (int *) suser_malloc(isize, TAIL);
+ if ( ! *iworkptr ) {
+ fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
+ return (isize + n);
+ }
+
+ if ( MemModel == SYSTEM )
+ *dworkptr = (float *) SUPERLU_MALLOC(dsize);
+ else {
+ *dworkptr = (float *) suser_malloc(dsize, TAIL);
+ if ( NotDoubleAlign(*dworkptr) ) {
+ old_ptr = *dworkptr;
+ *dworkptr = (float*) DoubleAlign(*dworkptr);
+ *dworkptr = (float*) ((double*)*dworkptr - 1);
+ extra = (char*)old_ptr - (char*)*dworkptr;
+#ifdef DEBUG
+ printf("sLUWorkInit: not aligned, extra %d\n", extra);
+#endif
+ stack.top2 -= extra;
+ stack.used += extra;
+ }
+ }
+ if ( ! *dworkptr ) {
+ fprintf(stderr, "malloc fails for local dworkptr[].");
+ return (isize + dsize + n);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Set up pointers for real working arrays.
+ */
+void
+sSetRWork(int m, int panel_size, float *dworkptr,
+ float **dense, float **tempv)
+{
+ float zero = 0.0;
+
+ int maxsuper = sp_ienv(3),
+ rowblk = sp_ienv(4);
+ *dense = dworkptr;
+ *tempv = *dense + panel_size*m;
+ sfill (*dense, m * panel_size, zero);
+ sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
+}
+
+/*
+ * Free the working storage used by factor routines.
+ */
+void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
+{
+ if ( Glu->MemModel == SYSTEM ) {
+ SUPERLU_FREE (iwork);
+ SUPERLU_FREE (dwork);
+ } else {
+ stack.used -= (stack.size - stack.top2);
+ stack.top2 = stack.size;
+/* sStackCompress(Glu); */
+ }
+
+ SUPERLU_FREE (expanders);
+ expanders = 0;
+}
+
+/* Expand the data structures for L and U during the factorization.
+ * Return value: 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ */
+int
+sLUMemXpand(int jcol,
+ int next, /* number of elements currently in the factors */
+ MemType mem_type, /* which type of memory to expand */
+ int *maxlen, /* modified - maximum length of a data structure */
+ GlobalLU_t *Glu /* modified - global LU data structures */
+ )
+{
+ void *new_mem;
+
+#ifdef DEBUG
+ printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
+ jcol, next, *maxlen, mem_type);
+#endif
+
+ if (mem_type == USUB)
+ new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
+ else
+ new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
+
+ if ( !new_mem ) {
+ int nzlmax = Glu->nzlmax;
+ int nzumax = Glu->nzumax;
+ int nzlumax = Glu->nzlumax;
+ fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
+ return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
+ }
+
+ switch ( mem_type ) {
+ case LUSUP:
+ Glu->lusup = (float *) new_mem;
+ Glu->nzlumax = *maxlen;
+ break;
+ case UCOL:
+ Glu->ucol = (float *) new_mem;
+ Glu->nzumax = *maxlen;
+ break;
+ case LSUB:
+ Glu->lsub = (int *) new_mem;
+ Glu->nzlmax = *maxlen;
+ break;
+ case USUB:
+ Glu->usub = (int *) new_mem;
+ Glu->nzumax = *maxlen;
+ break;
+ }
+
+ return 0;
+
+}
+
+
+
+void
+copy_mem_float(int howmany, void *old, void *new)
+{
+ register int i;
+ float *dold = old;
+ float *dnew = new;
+ for (i = 0; i < howmany; i++) dnew[i] = dold[i];
+}
+
+/*
+ * Expand the existing storage to accommodate more fill-ins.
+ */
+void
+*sexpand (
+ int *prev_len, /* length used from previous call */
+ MemType type, /* which part of the memory to expand */
+ int len_to_copy, /* size of the memory to be copied to new store */
+ int keep_prev, /* = 1: use prev_len;
+ = 0: compute new_len to expand */
+ GlobalLU_t *Glu /* modified - global LU data structures */
+ )
+{
+ float EXPAND = 1.5;
+ float alpha;
+ void *new_mem, *old_mem;
+ int new_len, tries, lword, extra, bytes_to_copy;
+
+ alpha = EXPAND;
+
+ if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
+ new_len = *prev_len;
+ else {
+ new_len = alpha * *prev_len;
+ }
+
+ if ( type == LSUB || type == USUB ) lword = sizeof(int);
+ else lword = sizeof(float);
+
+ if ( Glu->MemModel == SYSTEM ) {
+ new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
+/* new_mem = (void *) calloc(new_len, lword); */
+ if ( no_expand != 0 ) {
+ tries = 0;
+ if ( keep_prev ) {
+ if ( !new_mem ) return (NULL);
+ } else {
+ while ( !new_mem ) {
+ if ( ++tries > 10 ) return (NULL);
+ alpha = Reduce(alpha);
+ new_len = alpha * *prev_len;
+ new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
+/* new_mem = (void *) calloc(new_len, lword); */
+ }
+ }
+ if ( type == LSUB || type == USUB ) {
+ copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
+ } else {
+ copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
+ }
+ SUPERLU_FREE (expanders[type].mem);
+ }
+ expanders[type].mem = (void *) new_mem;
+
+ } else { /* MemModel == USER */
+ if ( no_expand == 0 ) {
+ new_mem = suser_malloc(new_len * lword, HEAD);
+ if ( NotDoubleAlign(new_mem) &&
+ (type == LUSUP || type == UCOL) ) {
+ old_mem = new_mem;
+ new_mem = (void *)DoubleAlign(new_mem);
+ extra = (char*)new_mem - (char*)old_mem;
+#ifdef DEBUG
+ printf("expand(): not aligned, extra %d\n", extra);
+#endif
+ stack.top1 += extra;
+ stack.used += extra;
+ }
+ expanders[type].mem = (void *) new_mem;
+ }
+ else {
+ tries = 0;
+ extra = (new_len - *prev_len) * lword;
+ if ( keep_prev ) {
+ if ( StackFull(extra) ) return (NULL);
+ } else {
+ while ( StackFull(extra) ) {
+ if ( ++tries > 10 ) return (NULL);
+ alpha = Reduce(alpha);
+ new_len = alpha * *prev_len;
+ extra = (new_len - *prev_len) * lword;
+ }
+ }
+
+ if ( type != USUB ) {
+ new_mem = (void*)((char*)expanders[type + 1].mem + extra);
+ bytes_to_copy = (char*)stack.array + stack.top1
+ - (char*)expanders[type + 1].mem;
+ user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
+
+ if ( type < USUB ) {
+ Glu->usub = expanders[USUB].mem =
+ (void*)((char*)expanders[USUB].mem + extra);
+ }
+ if ( type < LSUB ) {
+ Glu->lsub = expanders[LSUB].mem =
+ (void*)((char*)expanders[LSUB].mem + extra);
+ }
+ if ( type < UCOL ) {
+ Glu->ucol = expanders[UCOL].mem =
+ (void*)((char*)expanders[UCOL].mem + extra);
+ }
+ stack.top1 += extra;
+ stack.used += extra;
+ if ( type == UCOL ) {
+ stack.top1 += extra; /* Add same amount for USUB */
+ stack.used += extra;
+ }
+
+ } /* if ... */
+
+ } /* else ... */
+ }
+
+ expanders[type].size = new_len;
+ *prev_len = new_len;
+ if ( no_expand ) ++no_expand;
+
+ return (void *) expanders[type].mem;
+
+} /* sexpand */
+
+
+/*
+ * Compress the work[] array to remove fragmentation.
+ */
+void
+sStackCompress(GlobalLU_t *Glu)
+{
+ register int iword, dword, ndim;
+ char *last, *fragment;
+ int *ifrom, *ito;
+ float *dfrom, *dto;
+ int *xlsub, *lsub, *xusub, *usub, *xlusup;
+ float *ucol, *lusup;
+
+ iword = sizeof(int);
+ dword = sizeof(float);
+ ndim = Glu->n;
+
+ xlsub = Glu->xlsub;
+ lsub = Glu->lsub;
+ xusub = Glu->xusub;
+ usub = Glu->usub;
+ xlusup = Glu->xlusup;
+ ucol = Glu->ucol;
+ lusup = Glu->lusup;
+
+ dfrom = ucol;
+ dto = (float *)((char*)lusup + xlusup[ndim] * dword);
+ copy_mem_float(xusub[ndim], dfrom, dto);
+ ucol = dto;
+
+ ifrom = lsub;
+ ito = (int *) ((char*)ucol + xusub[ndim] * iword);
+ copy_mem_int(xlsub[ndim], ifrom, ito);
+ lsub = ito;
+
+ ifrom = usub;
+ ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
+ copy_mem_int(xusub[ndim], ifrom, ito);
+ usub = ito;
+
+ last = (char*)usub + xusub[ndim] * iword;
+ fragment = (char*) (((char*)stack.array + stack.top1) - last);
+ stack.used -= (long int) fragment;
+ stack.top1 -= (long int) fragment;
+
+ Glu->ucol = ucol;
+ Glu->lsub = lsub;
+ Glu->usub = usub;
+
+#ifdef DEBUG
+ printf("sStackCompress: fragment %d\n", fragment);
+ /* for (last = 0; last < ndim; ++last)
+ print_lu_col("After compress:", last, 0);*/
+#endif
+
+}
+
+/*
+ * Allocate storage for original matrix A
+ */
+void
+sallocateA(int n, int nnz, float **a, int **asub, int **xa)
+{
+ *a = (float *) floatMalloc(nnz);
+ *asub = (int *) intMalloc(nnz);
+ *xa = (int *) intMalloc(n+1);
+}
+
+
+float *floatMalloc(int n)
+{
+ float *buf;
+ buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
+ }
+ return (buf);
+}
+
+float *floatCalloc(int n)
+{
+ float *buf;
+ register int i;
+ float zero = 0.0;
+ buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
+ }
+ for (i = 0; i < n; ++i) buf[i] = zero;
+ return (buf);
+}
+
+
+int smemory_usage(const int nzlmax, const int nzumax,
+ const int nzlumax, const int n)
+{
+ register int iword, dword;
+
+ iword = sizeof(int);
+ dword = sizeof(float);
+
+ return (10 * n * iword +
+ nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
+
+}
diff --git a/intern/opennl/superlu/smyblas2.c b/intern/opennl/superlu/smyblas2.c
new file mode 100644
index 00000000000..729e17f7674
--- /dev/null
+++ b/intern/opennl/superlu/smyblas2.c
@@ -0,0 +1,225 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name: smyblas2.c
+ * Purpose:
+ * Level 2 BLAS operations: solves and matvec, written in C.
+ * Note:
+ * This is only used when the system lacks an efficient BLAS library.
+ */
+
+/*
+ * Solves a dense UNIT lower triangular system. The unit lower
+ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
+ * The solution will be returned in the rhs vector.
+ */
+void slsolve ( int ldm, int ncol, float *M, float *rhs )
+{
+ int k;
+ float x0, x1, x2, x3, x4, x5, x6, x7;
+ float *M0;
+ register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+ register int firstcol = 0;
+
+ M0 = &M[0];
+
+ while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+ Mki2 = Mki1 + ldm + 1;
+ Mki3 = Mki2 + ldm + 1;
+ Mki4 = Mki3 + ldm + 1;
+ Mki5 = Mki4 + ldm + 1;
+ Mki6 = Mki5 + ldm + 1;
+ Mki7 = Mki6 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+ x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+ x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+ x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++;
+ x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++;
+ x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
+ x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
+ - x6 * *Mki6++;
+
+ rhs[++firstcol] = x1;
+ rhs[++firstcol] = x2;
+ rhs[++firstcol] = x3;
+ rhs[++firstcol] = x4;
+ rhs[++firstcol] = x5;
+ rhs[++firstcol] = x6;
+ rhs[++firstcol] = x7;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+ - x2 * *Mki2++ - x3 * *Mki3++
+ - x4 * *Mki4++ - x5 * *Mki5++
+ - x6 * *Mki6++ - x7 * *Mki7++;
+
+ M0 += 8 * ldm + 8;
+ }
+
+ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+ Mki2 = Mki1 + ldm + 1;
+ Mki3 = Mki2 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+ x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+ x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+
+ rhs[++firstcol] = x1;
+ rhs[++firstcol] = x2;
+ rhs[++firstcol] = x3;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+ - x2 * *Mki2++ - x3 * *Mki3++;
+
+ M0 += 4 * ldm + 4;
+ }
+
+ if ( firstcol < ncol - 1 ) { /* Do 2 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+
+ rhs[++firstcol] = x1;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
+
+ }
+
+}
+
+/*
+ * Solves a dense upper triangular system. The upper triangular matrix is
+ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
+ * in the rhs vector.
+ */
+void
+susolve ( ldm, ncol, M, rhs )
+int ldm; /* in */
+int ncol; /* in */
+float *M; /* in */
+float *rhs; /* modified */
+{
+ float xj;
+ int jcol, j, irow;
+
+ jcol = ncol - 1;
+
+ for (j = 0; j < ncol; j++) {
+
+ xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
+ rhs[jcol] = xj;
+
+ for (irow = 0; irow < jcol; irow++)
+ rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
+
+ jcol--;
+
+ }
+}
+
+
+/*
+ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
+ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
+ */
+void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
+
+int ldm; /* in -- leading dimension of M */
+int nrow; /* in */
+int ncol; /* in */
+float *M; /* in */
+float *vec; /* in */
+float *Mxvec; /* in/out */
+
+{
+ float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
+ float *M0;
+ register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+ register int firstcol = 0;
+ int k;
+
+ M0 = &M[0];
+ while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+
+ Mki0 = M0;
+ Mki1 = Mki0 + ldm;
+ Mki2 = Mki1 + ldm;
+ Mki3 = Mki2 + ldm;
+ Mki4 = Mki3 + ldm;
+ Mki5 = Mki4 + ldm;
+ Mki6 = Mki5 + ldm;
+ Mki7 = Mki6 + ldm;
+
+ vi0 = vec[firstcol++];
+ vi1 = vec[firstcol++];
+ vi2 = vec[firstcol++];
+ vi3 = vec[firstcol++];
+ vi4 = vec[firstcol++];
+ vi5 = vec[firstcol++];
+ vi6 = vec[firstcol++];
+ vi7 = vec[firstcol++];
+
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ + vi2 * *Mki2++ + vi3 * *Mki3++
+ + vi4 * *Mki4++ + vi5 * *Mki5++
+ + vi6 * *Mki6++ + vi7 * *Mki7++;
+
+ M0 += 8 * ldm;
+ }
+
+ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+
+ Mki0 = M0;
+ Mki1 = Mki0 + ldm;
+ Mki2 = Mki1 + ldm;
+ Mki3 = Mki2 + ldm;
+
+ vi0 = vec[firstcol++];
+ vi1 = vec[firstcol++];
+ vi2 = vec[firstcol++];
+ vi3 = vec[firstcol++];
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ + vi2 * *Mki2++ + vi3 * *Mki3++ ;
+
+ M0 += 4 * ldm;
+ }
+
+ while ( firstcol < ncol ) { /* Do 1 column */
+
+ Mki0 = M0;
+ vi0 = vec[firstcol++];
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++;
+
+ M0 += ldm;
+ }
+
+}
+
diff --git a/intern/opennl/superlu/sp_coletree.c b/intern/opennl/superlu/sp_coletree.c
new file mode 100644
index 00000000000..d49919167f5
--- /dev/null
+++ b/intern/opennl/superlu/sp_coletree.c
@@ -0,0 +1,332 @@
+
+/* Elimination tree computation and layout routines */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Implementation of disjoint set union routines.
+ * Elements are integers in 0..n-1, and the
+ * names of the sets themselves are of type int.
+ *
+ * Calls are:
+ * initialize_disjoint_sets (n) initial call.
+ * s = make_set (i) returns a set containing only i.
+ * s = link (t, u) returns s = t union u, destroying t and u.
+ * s = find (i) return name of set containing i.
+ * finalize_disjoint_sets final call.
+ *
+ * This implementation uses path compression but not weighted union.
+ * See Tarjan's book for details.
+ * John Gilbert, CMI, 1987.
+ *
+ * Implemented path-halving by XSL 07/05/95.
+ */
+
+static int *pp; /* parent array for sets */
+
+static
+int *mxCallocInt(int n)
+{
+ register int i;
+ int *buf;
+
+ buf = (int *) SUPERLU_MALLOC( n * sizeof(int) );
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()");
+ }
+ for (i = 0; i < n; i++) buf[i] = 0;
+ return (buf);
+}
+
+static
+void initialize_disjoint_sets (
+ int n
+ )
+{
+ pp = mxCallocInt(n);
+}
+
+
+static
+int make_set (
+ int i
+ )
+{
+ pp[i] = i;
+ return i;
+}
+
+
+static
+int link (
+ int s,
+ int t
+ )
+{
+ pp[s] = t;
+ return t;
+}
+
+
+/* PATH HALVING */
+static
+int find (int i)
+{
+ register int p, gp;
+
+ p = pp[i];
+ gp = pp[p];
+ while (gp != p) {
+ pp[i] = gp;
+ i = gp;
+ p = pp[i];
+ gp = pp[p];
+ }
+ return (p);
+}
+
+#if 0
+/* PATH COMPRESSION */
+static
+int find (
+ int i
+ )
+{
+ if (pp[i] != i)
+ pp[i] = find (pp[i]);
+ return pp[i];
+}
+#endif
+
+static
+void finalize_disjoint_sets (
+ void
+ )
+{
+ SUPERLU_FREE(pp);
+}
+
+
+/*
+ * Find the elimination tree for A'*A.
+ * This uses something similar to Liu's algorithm.
+ * It runs in time O(nz(A)*log n) and does not form A'*A.
+ *
+ * Input:
+ * Sparse matrix A. Numeric values are ignored, so any
+ * explicit zeros are treated as nonzero.
+ * Output:
+ * Integer array of parents representing the elimination
+ * tree of the symbolic product A'*A. Each vertex is a
+ * column of A, and nc means a root of the elimination forest.
+ *
+ * John R. Gilbert, Xerox, 10 Dec 1990
+ * Based on code by JRG dated 1987, 1988, and 1990.
+ */
+
+/*
+ * Nonsymmetric elimination tree
+ */
+int
+sp_coletree(
+ int *acolst, int *acolend, /* column start and end past 1 */
+ int *arow, /* row indices of A */
+ int nr, int nc, /* dimension of A */
+ int *parent /* parent in elim tree */
+ )
+{
+ int *root; /* root of subtee of etree */
+ int *firstcol; /* first nonzero col in each row*/
+ int rset, cset;
+ int row, col;
+ int rroot;
+ int p;
+
+ root = mxCallocInt (nc);
+ initialize_disjoint_sets (nc);
+
+ /* Compute firstcol[row] = first nonzero column in row */
+
+ firstcol = mxCallocInt (nr);
+ for (row = 0; row < nr; firstcol[row++] = nc);
+ for (col = 0; col < nc; col++)
+ for (p = acolst[col]; p < acolend[col]; p++) {
+ row = arow[p];
+ firstcol[row] = SUPERLU_MIN(firstcol[row], col);
+ }
+
+ /* Compute etree by Liu's algorithm for symmetric matrices,
+ except use (firstcol[r],c) in place of an edge (r,c) of A.
+ Thus each row clique in A'*A is replaced by a star
+ centered at its first vertex, which has the same fill. */
+
+ for (col = 0; col < nc; col++) {
+ cset = make_set (col);
+ root[cset] = col;
+ parent[col] = nc; /* Matlab */
+ for (p = acolst[col]; p < acolend[col]; p++) {
+ row = firstcol[arow[p]];
+ if (row >= col) continue;
+ rset = find (row);
+ rroot = root[rset];
+ if (rroot != col) {
+ parent[rroot] = col;
+ cset = link (cset, rset);
+ root[cset] = col;
+ }
+ }
+ }
+
+ SUPERLU_FREE (root);
+ SUPERLU_FREE (firstcol);
+ finalize_disjoint_sets ();
+ return 0;
+}
+
+/*
+ * q = TreePostorder (n, p);
+ *
+ * Postorder a tree.
+ * Input:
+ * p is a vector of parent pointers for a forest whose
+ * vertices are the integers 0 to n-1; p[root]==n.
+ * Output:
+ * q is a vector indexed by 0..n-1 such that q[i] is the
+ * i-th vertex in a postorder numbering of the tree.
+ *
+ * ( 2/7/95 modified by X.Li:
+ * q is a vector indexed by 0:n-1 such that vertex i is the
+ * q[i]-th vertex in a postorder numbering of the tree.
+ * That is, this is the inverse of the previous q. )
+ *
+ * In the child structure, lower-numbered children are represented
+ * first, so that a tree which is already numbered in postorder
+ * will not have its order changed.
+ *
+ * Written by John Gilbert, Xerox, 10 Dec 1990.
+ * Based on code written by John Gilbert at CMI in 1987.
+ */
+
+static int *first_kid, *next_kid; /* Linked list of children. */
+static int *post, postnum;
+
+static
+/*
+ * Depth-first search from vertex v.
+ */
+void etdfs (
+ int v
+ )
+{
+ int w;
+
+ for (w = first_kid[v]; w != -1; w = next_kid[w]) {
+ etdfs (w);
+ }
+ /* post[postnum++] = v; in Matlab */
+ post[v] = postnum++; /* Modified by X.Li on 2/14/95 */
+}
+
+
+/*
+ * Post order a tree
+ */
+int *TreePostorder(
+ int n,
+ int *parent
+)
+{
+ int v, dad;
+
+ /* Allocate storage for working arrays and results */
+ first_kid = mxCallocInt (n+1);
+ next_kid = mxCallocInt (n+1);
+ post = mxCallocInt (n+1);
+
+ /* Set up structure describing children */
+ for (v = 0; v <= n; first_kid[v++] = -1);
+ for (v = n-1; v >= 0; v--) {
+ dad = parent[v];
+ next_kid[v] = first_kid[dad];
+ first_kid[dad] = v;
+ }
+
+ /* Depth-first search from dummy root vertex #n */
+ postnum = 0;
+ etdfs (n);
+
+ SUPERLU_FREE (first_kid);
+ SUPERLU_FREE (next_kid);
+ return post;
+}
+
+
+/*
+ * p = spsymetree (A);
+ *
+ * Find the elimination tree for symmetric matrix A.
+ * This uses Liu's algorithm, and runs in time O(nz*log n).
+ *
+ * Input:
+ * Square sparse matrix A. No check is made for symmetry;
+ * elements below and on the diagonal are ignored.
+ * Numeric values are ignored, so any explicit zeros are
+ * treated as nonzero.
+ * Output:
+ * Integer array of parents representing the etree, with n
+ * meaning a root of the elimination forest.
+ * Note:
+ * This routine uses only the upper triangle, while sparse
+ * Cholesky (as in spchol.c) uses only the lower. Matlab's
+ * dense Cholesky uses only the upper. This routine could
+ * be modified to use the lower triangle either by transposing
+ * the matrix or by traversing it by rows with auxiliary
+ * pointer and link arrays.
+ *
+ * John R. Gilbert, Xerox, 10 Dec 1990
+ * Based on code by JRG dated 1987, 1988, and 1990.
+ * Modified by X.S. Li, November 1999.
+ */
+
+/*
+ * Symmetric elimination tree
+ */
+int
+sp_symetree(
+ int *acolst, int *acolend, /* column starts and ends past 1 */
+ int *arow, /* row indices of A */
+ int n, /* dimension of A */
+ int *parent /* parent in elim tree */
+ )
+{
+ int *root; /* root of subtree of etree */
+ int rset, cset;
+ int row, col;
+ int rroot;
+ int p;
+
+ root = mxCallocInt (n);
+ initialize_disjoint_sets (n);
+
+ for (col = 0; col < n; col++) {
+ cset = make_set (col);
+ root[cset] = col;
+ parent[col] = n; /* Matlab */
+ for (p = acolst[col]; p < acolend[col]; p++) {
+ row = arow[p];
+ if (row >= col) continue;
+ rset = find (row);
+ rroot = root[rset];
+ if (rroot != col) {
+ parent[rroot] = col;
+ cset = link (cset, rset);
+ root[cset] = col;
+ }
+ }
+ }
+ SUPERLU_FREE (root);
+ finalize_disjoint_sets ();
+ return 0;
+} /* SP_SYMETREE */
diff --git a/intern/opennl/superlu/sp_ienv.c b/intern/opennl/superlu/sp_ienv.c
new file mode 100644
index 00000000000..5b0ba7b2151
--- /dev/null
+++ b/intern/opennl/superlu/sp_ienv.c
@@ -0,0 +1,65 @@
+/*
+ * File name: sp_ienv.c
+ * History: Modified from lapack routine ILAENV
+ */
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+sp_ienv(int ispec)
+{
+/*
+ Purpose
+ =======
+
+ sp_ienv() is inquired to choose machine-dependent parameters for the
+ local environment. See ISPEC for a description of the parameters.
+
+ This version provides a set of parameters which should give good,
+ but not optimal, performance on many of the currently available
+ computers. Users are encouraged to modify this subroutine to set
+ the tuning parameters for their particular machine using the option
+ and problem size information in the arguments.
+
+ Arguments
+ =========
+
+ ISPEC (input) int
+ Specifies the parameter to be returned as the value of SP_IENV.
+ = 1: the panel size w; a panel consists of w consecutive
+ columns of matrix A in the process of Gaussian elimination.
+ The best value depends on machine's cache characters.
+ = 2: the relaxation parameter relax; if the number of
+ nodes (columns) in a subtree of the elimination tree is less
+ than relax, this subtree is considered as one supernode,
+ regardless of their row structures.
+ = 3: the maximum size for a supernode;
+ = 4: the minimum row dimension for 2-D blocking to be used;
+ = 5: the minimum column dimension for 2-D blocking to be used;
+ = 6: the estimated fills factor for L and U, compared with A;
+
+ (SP_IENV) (output) int
+ >= 0: the value of the parameter specified by ISPEC
+ < 0: if SP_IENV = -k, the k-th argument had an illegal value.
+
+ =====================================================================
+*/
+ int i;
+
+ switch (ispec) {
+ case 1: return (10);
+ case 2: return (5);
+ case 3: return (100);
+ case 4: return (200);
+ case 5: return (40);
+ case 6: return (20);
+ }
+
+ /* Invalid value for ISPEC */
+ i = 1;
+ xerbla_("sp_ienv", &i);
+ return 0;
+
+} /* sp_ienv_ */
+
diff --git a/intern/opennl/superlu/sp_preorder.c b/intern/opennl/superlu/sp_preorder.c
new file mode 100644
index 00000000000..f82da2de1aa
--- /dev/null
+++ b/intern/opennl/superlu/sp_preorder.c
@@ -0,0 +1,203 @@
+#include "ssp_defs.h"
+
+void
+sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c,
+ int *etree, SuperMatrix *AC)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * sp_preorder() permutes the columns of the original matrix. It performs
+ * the following steps:
+ *
+ * 1. Apply column permutation perm_c[] to A's column pointers to form AC;
+ *
+ * 2. If options->Fact = DOFACT, then
+ * (1) Compute column elimination tree etree[] of AC'AC;
+ * (2) Post order etree[] to get a postordered elimination tree etree[],
+ * and a postorder permutation post[];
+ * (3) Apply post[] permutation to columns of AC;
+ * (4) Overwrite perm_c[] with the product perm_c * post.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ * Specifies whether or not the elimination tree will be re-used.
+ * If options->Fact == DOFACT, this means first time factor A,
+ * etree is computed, postered, and output.
+ * Otherwise, re-factor A, etree is input, unchanged on exit.
+ *
+ * A (input) SuperMatrix*
+ * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ * of the linear equations is A->nrow. Currently, the type of A can be:
+ * Stype = NC or SLU_NCP; Mtype = SLU_GE.
+ * In the future, more general A may be handled.
+ *
+ * perm_c (input/output) int*
+ * Column permutation vector of size A->ncol, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ * If options->Fact == DOFACT, perm_c is both input and output.
+ * On output, it is changed according to a postorder of etree.
+ * Otherwise, perm_c is input.
+ *
+ * etree (input/output) int*
+ * Elimination tree of Pc'*A'*A*Pc, dimension A->ncol.
+ * If options->Fact == DOFACT, etree is an output argument,
+ * otherwise it is an input argument.
+ * Note: etree is a vector of parent pointers for a forest whose
+ * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * AC (output) SuperMatrix*
+ * The resulting matrix after applied the column permutation
+ * perm_c[] to matrix A. The type of AC can be:
+ * Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
+ *
+ */
+
+ NCformat *Astore;
+ NCPformat *ACstore;
+ int *iwork, *post;
+ register int n, i;
+
+ n = A->ncol;
+
+ /* Apply column permutation perm_c to A's column pointers so to
+ obtain NCP format in AC = A*Pc. */
+ AC->Stype = SLU_NCP;
+ AC->Dtype = A->Dtype;
+ AC->Mtype = A->Mtype;
+ AC->nrow = A->nrow;
+ AC->ncol = A->ncol;
+ Astore = A->Store;
+ ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) );
+ if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore");
+ ACstore->nnz = Astore->nnz;
+ ACstore->nzval = Astore->nzval;
+ ACstore->rowind = Astore->rowind;
+ ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for ACstore->colbeg");
+ ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for ACstore->colend");
+
+#ifdef DEBUG
+ print_int_vec("pre_order:", n, perm_c);
+ check_perm("Initial perm_c", n, perm_c);
+#endif
+
+ for (i = 0; i < n; i++) {
+ ACstore->colbeg[perm_c[i]] = Astore->colptr[i];
+ ACstore->colend[perm_c[i]] = Astore->colptr[i+1];
+ }
+
+ if ( options->Fact == DOFACT ) {
+#undef ETREE_ATplusA
+#ifdef ETREE_ATplusA
+ /*--------------------------------------------
+ COMPUTE THE ETREE OF Pc*(A'+A)*Pc'.
+ --------------------------------------------*/
+ int *b_colptr, *b_rowind, bnz, j;
+ int *c_colbeg, *c_colend;
+
+ /*printf("Use etree(A'+A)\n");*/
+
+ /* Form B = A + A'. */
+ at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
+ &bnz, &b_colptr, &b_rowind);
+
+ /* Form C = Pc*B*Pc'. */
+ c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int));
+ c_colend = c_colbeg + n;
+ if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend");
+ for (i = 0; i < n; i++) {
+ c_colbeg[perm_c[i]] = b_colptr[i];
+ c_colend[perm_c[i]] = b_colptr[i+1];
+ }
+ for (j = 0; j < n; ++j) {
+ for (i = c_colbeg[j]; i < c_colend[j]; ++i) {
+ b_rowind[i] = perm_c[b_rowind[i]];
+ }
+ }
+
+ /* Compute etree of C. */
+ sp_symetree(c_colbeg, c_colend, b_rowind, n, etree);
+
+ SUPERLU_FREE(b_colptr);
+ if ( bnz ) SUPERLU_FREE(b_rowind);
+ SUPERLU_FREE(c_colbeg);
+
+#else
+ /*--------------------------------------------
+ COMPUTE THE COLUMN ELIMINATION TREE.
+ --------------------------------------------*/
+ sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind,
+ A->nrow, A->ncol, etree);
+#endif
+#ifdef DEBUG
+ print_int_vec("etree:", n, etree);
+#endif
+
+ /* In symmetric mode, do not do postorder here. */
+ if ( options->SymmetricMode == NO ) {
+ /* Post order etree */
+ post = (int *) TreePostorder(n, etree);
+ /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
+ iwork = post; */
+
+#ifdef DEBUG
+ print_int_vec("post:", n+1, post);
+ check_perm("post", n, post);
+#endif
+ iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int));
+ if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
+
+ /* Renumber etree in postorder */
+ for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]];
+ for (i = 0; i < n; ++i) etree[i] = iwork[i];
+
+#ifdef DEBUG
+ print_int_vec("postorder etree:", n, etree);
+#endif
+
+ /* Postmultiply A*Pc by post[] */
+ for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i];
+ for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i];
+ for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i];
+ for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i];
+
+ for (i = 0; i < n; ++i)
+ iwork[i] = post[perm_c[i]]; /* product of perm_c and post */
+ for (i = 0; i < n; ++i) perm_c[i] = iwork[i];
+
+#ifdef DEBUG
+ print_int_vec("Pc*post:", n, perm_c);
+ check_perm("final perm_c", n, perm_c);
+#endif
+ SUPERLU_FREE (post);
+ SUPERLU_FREE (iwork);
+ } /* end postordering */
+
+ } /* if options->Fact == DOFACT ... */
+
+}
+
+int check_perm(char *what, int n, int *perm)
+{
+ register int i;
+ int *marker;
+ marker = (int *) calloc(n, sizeof(int));
+
+ for (i = 0; i < n; ++i) {
+ if ( marker[perm[i]] == 1 || perm[i] >= n ) {
+ printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]);
+ ABORT("check_perm");
+ } else {
+ marker[perm[i]] = 1;
+ }
+ }
+
+ SUPERLU_FREE(marker);
+ return 0;
+}
diff --git a/intern/opennl/superlu/spanel_bmod.c b/intern/opennl/superlu/spanel_bmod.c
new file mode 100644
index 00000000000..a59a9086df1
--- /dev/null
+++ b/intern/opennl/superlu/spanel_bmod.c
@@ -0,0 +1,449 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void slsolve(int, int, float *, float *);
+void smatvec(int, int, int, float *, float *, float *);
+extern void scheck_tempv();
+
+void
+spanel_bmod (
+ const int m, /* in - number of rows in the matrix */
+ const int w, /* in */
+ const int jcol, /* in */
+ const int nseg, /* in */
+ float *dense, /* out, of size n by w */
+ float *tempv, /* working array */
+ int *segrep, /* in */
+ int *repfnz, /* in, of size n by w */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Performs numeric block updates (sup-panel) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ *
+ * Before entering this routine, the original nonzeros in the panel
+ * were already copied into the spa[m,w].
+ *
+ * Updated/Output parameters-
+ * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned
+ * collectively in the m-by-w vector dense[*].
+ *
+ */
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ int incx = 1, incy = 1;
+ float alpha, beta;
+#endif
+
+ register int k, ksub;
+ int fsupc, nsupc, nsupr, nrow;
+ int krep, krep_ind;
+ float ukj, ukj1, ukj2;
+ int luptr, luptr1, luptr2;
+ int segsze;
+ int block_nrow; /* no of rows in a block row */
+ register int lptr; /* Points to the row subscripts of a supernode */
+ int kfnz, irow, no_zeros;
+ register int isub, isub1, i;
+ register int jj; /* Index through each column in the panel */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ int *repfnz_col; /* repfnz[] for a column in the panel */
+ float *dense_col; /* dense[] for a column in the panel */
+ float *tempv1; /* Used in 1-D update */
+ float *TriTmp, *MatvecTmp; /* used in 2-D update */
+ float zero = 0.0;
+ register int ldaTmp;
+ register int r_ind, r_hi;
+ static int first = 1, maxsuper, rowblk, colblk;
+ flops_t *ops = stat->ops;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ if ( first ) {
+ maxsuper = sp_ienv(3);
+ rowblk = sp_ienv(4);
+ colblk = sp_ienv(5);
+ first = 0;
+ }
+ ldaTmp = maxsuper + rowblk;
+
+ /*
+ * For each nonz supernode segment of U[*,j] in topological order
+ */
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
+
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = no of columns in a supernode
+ * nsupr = no of rows in a supernode
+ */
+ krep = segrep[k--];
+ fsupc = xsup[supno[krep]];
+ nsupc = krep - fsupc + 1;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+ nrow = nsupr - nsupc;
+ lptr = xlsub[fsupc];
+ krep_ind = lptr + nsupc - 1;
+
+ repfnz_col = repfnz;
+ dense_col = dense;
+
+ if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
+
+ TriTmp = tempv;
+
+ /* Sequence through each column in panel -- triangular solves */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ luptr = xlusup[fsupc];
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+ /* Case 1: Update U-segment of size 1 -- col-col update */
+ if ( segsze == 1 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+ irow = lsub[i];
+ dense_col[irow] -= ukj * lusup[luptr];
+ ++luptr;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ ukj1 = dense_col[lsub[krep_ind - 1]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) {
+ ukj -= ukj1 * lusup[luptr1];
+ dense_col[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++; luptr1++;
+ dense_col[irow] -= (ukj*lusup[luptr]
+ + ukj1*lusup[luptr1]);
+ }
+ } else {
+ ukj2 = dense_col[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense_col[lsub[krep_ind]] = ukj;
+ dense_col[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++; luptr1++; luptr2++;
+ dense_col[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+ } else { /* segsze >= 4 */
+
+ /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
+ holds the result of triangular solves. */
+ no_zeros = kfnz - fsupc;
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; ++i) {
+ irow = lsub[isub];
+ TriTmp[i] = dense_col[irow]; /* Gather */
+ ++isub;
+ }
+
+ /* start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, TriTmp, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, TriTmp, &incx );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
+#endif
+
+
+ } /* else ... */
+
+ } /* for jj ... end tri-solves */
+
+ /* Block row updates; push all the way into dense[*] block */
+ for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
+
+ r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
+ block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
+ luptr = xlusup[fsupc] + nsupc + r_ind;
+ isub1 = lptr + nsupc + r_ind;
+
+ repfnz_col = repfnz;
+ TriTmp = tempv;
+ dense_col = dense;
+
+ /* Sequence through each column in panel -- matrix-vector */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ if ( segsze <= 3 ) continue; /* skip unrolled cases */
+
+ /* Perform a block update, and scatter the result of
+ matrix-vector to dense[]. */
+ no_zeros = kfnz - fsupc;
+ luptr1 = luptr + nsupr * no_zeros;
+ MatvecTmp = &TriTmp[maxsuper];
+
+#ifdef USE_VENDOR_BLAS
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
+ &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#else
+ sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
+ &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#endif
+#else
+ smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
+ TriTmp, MatvecTmp);
+#endif
+
+ /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
+ * such that MatvecTmp[*] can be re-used for the
+ * the next blok row update. dense[] will be copied into
+ * global store after the whole panel has been finished.
+ */
+ isub = isub1;
+ for (i = 0; i < block_nrow; i++) {
+ irow = lsub[isub];
+ dense_col[irow] -= MatvecTmp[i];
+ MatvecTmp[i] = zero;
+ ++isub;
+ }
+
+ } /* for jj ... */
+
+ } /* for each block row ... */
+
+ /* Scatter the triangular solves into SPA dense[*] */
+ repfnz_col = repfnz;
+ TriTmp = tempv;
+ dense_col = dense;
+
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ if ( segsze <= 3 ) continue; /* skip unrolled cases */
+
+ no_zeros = kfnz - fsupc;
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense_col[irow] = TriTmp[i];
+ TriTmp[i] = zero;
+ ++isub;
+ }
+
+ } /* for jj ... */
+
+ } else { /* 1-D block modification */
+
+
+ /* Sequence through each column in the panel */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ luptr = xlusup[fsupc];
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+ /* Case 1: Update U-segment of size 1 -- col-col update */
+ if ( segsze == 1 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+ irow = lsub[i];
+ dense_col[irow] -= ukj * lusup[luptr];
+ ++luptr;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ ukj1 = dense_col[lsub[krep_ind - 1]];
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) {
+ ukj -= ukj1 * lusup[luptr1];
+ dense_col[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ ++luptr; ++luptr1;
+ dense_col[irow] -= (ukj*lusup[luptr]
+ + ukj1*lusup[luptr1]);
+ }
+ } else {
+ ukj2 = dense_col[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense_col[lsub[krep_ind]] = ukj;
+ dense_col[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ ++luptr; ++luptr1; ++luptr2;
+ dense_col[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+ } else { /* segsze >= 4 */
+ /*
+ * Perform a triangular solve and block update,
+ * then scatter the result of sup-col update to dense[].
+ */
+ no_zeros = kfnz - fsupc;
+
+ /* Copy U[*,j] segment from dense[*] to tempv[*]:
+ * The result of triangular solve is in tempv[*];
+ * The result of matrix vector update is in dense_col[*]
+ */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; ++i) {
+ irow = lsub[isub];
+ tempv[i] = dense_col[irow]; /* Gather */
+ ++isub;
+ }
+
+ /* start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#endif
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+ sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
+#endif
+
+ /* Scatter tempv[*] into SPA dense[*] temporarily, such
+ * that tempv[*] can be used for the triangular solve of
+ * the next column of the panel. They will be copied into
+ * ucol[*] after the whole panel has been finished.
+ */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense_col[irow] = tempv[i];
+ tempv[i] = zero;
+ isub++;
+ }
+
+ /* Scatter the update from tempv1[*] into SPA dense[*] */
+ /* Start dense rectangular L */
+ for (i = 0; i < nrow; i++) {
+ irow = lsub[isub];
+ dense_col[irow] -= tempv1[i];
+ tempv1[i] = zero;
+ ++isub;
+ }
+
+ } /* else segsze>=4 ... */
+
+ } /* for each column in the panel... */
+
+ } /* else 1-D update ... */
+
+ } /* for each updating supernode ... */
+
+}
+
+
+
diff --git a/intern/opennl/superlu/spanel_dfs.c b/intern/opennl/superlu/spanel_dfs.c
new file mode 100644
index 00000000000..7f5f3c7532a
--- /dev/null
+++ b/intern/opennl/superlu/spanel_dfs.c
@@ -0,0 +1,249 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+void
+spanel_dfs (
+ const int m, /* in - number of rows in the matrix */
+ const int w, /* in */
+ const int jcol, /* in */
+ SuperMatrix *A, /* in - original matrix */
+ int *perm_r, /* in */
+ int *nseg, /* out */
+ float *dense, /* out */
+ int *panel_lsub, /* out */
+ int *segrep, /* out */
+ int *repfnz, /* out */
+ int *xprune, /* out */
+ int *marker, /* out */
+ int *parent, /* working array */
+ int *xplore, /* working array */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ * A supernode representative is the last column of a supernode.
+ * The nonzeros in U[*,j] are segments that end at supernodal
+ * representatives.
+ *
+ * The routine returns one list of the supernodal representatives
+ * in topological order of the dfs that generates them. This list is
+ * a superset of the topological order of each individual column within
+ * the panel.
+ * The location of the first nonzero in each supernodal segment
+ * (supernodal entry location) is also returned. Each column has a
+ * separate list for this purpose.
+ *
+ * Two marker arrays are used for dfs:
+ * marker[i] == jj, if i was visited during dfs of current column jj;
+ * marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ * marker: A-row --> A-row/col (0/1)
+ * repfnz: SuperA-col --> PA-row
+ * parent: SuperA-col --> SuperA-col
+ * xplore: SuperA-col --> index to L-structure
+ *
+ */
+ NCPformat *Astore;
+ float *a;
+ int *asub;
+ int *xa_begin, *xa_end;
+ int krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
+ int k, krow, kmark, kperm;
+ int xdfs, maxdfs, kpar;
+ int jj; /* index through each column in the panel */
+ int *marker1; /* marker1[jj] >= jcol if vertex jj was visited
+ by a previous column within this panel. */
+ int *repfnz_col; /* start of each column in the panel */
+ float *dense_col; /* start of each column in the panel */
+ int nextl_col; /* next available position in panel_lsub[*,jj] */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+
+ /* Initialize pointers */
+ Astore = A->Store;
+ a = Astore->nzval;
+ asub = Astore->rowind;
+ xa_begin = Astore->colbeg;
+ xa_end = Astore->colend;
+ marker1 = marker + m;
+ repfnz_col = repfnz;
+ dense_col = dense;
+ *nseg = 0;
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+
+ /* For each column in the panel */
+ for (jj = jcol; jj < jcol + w; jj++) {
+ nextl_col = (jj - jcol) * m;
+
+#ifdef CHK_DFS
+ printf("\npanel col %d: ", jj);
+#endif
+
+ /* For each nonz in A[*,jj] do dfs */
+ for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
+ krow = asub[k];
+ dense_col[krow] = a[k];
+ kmark = marker[krow];
+ if ( kmark == jj )
+ continue; /* krow visited before, go to the next nonzero */
+
+ /* For each unmarked nbr krow of jj
+ * krow is in L: place it in structure of L[*,jj]
+ */
+ marker[krow] = jj;
+ kperm = perm_r[krow];
+
+ if ( kperm == EMPTY ) {
+ panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
+ }
+ /*
+ * krow is in U: if its supernode-rep krep
+ * has been explored, update repfnz[*]
+ */
+ else {
+
+ krep = xsup[supno[kperm]+1] - 1;
+ myfnz = repfnz_col[krep];
+
+#ifdef CHK_DFS
+ printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm);
+#endif
+ if ( myfnz != EMPTY ) { /* Representative visited before */
+ if ( myfnz > kperm ) repfnz_col[krep] = kperm;
+ /* continue; */
+ }
+ else {
+ /* Otherwise, perform dfs starting at krep */
+ oldrep = EMPTY;
+ parent[krep] = oldrep;
+ repfnz_col[krep] = kperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+
+#ifdef CHK_DFS
+ printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs);
+ for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+ printf("\n");
+#endif
+ do {
+ /*
+ * For each unmarked kchild of krep
+ */
+ while ( xdfs < maxdfs ) {
+
+ kchild = lsub[xdfs];
+ xdfs++;
+ chmark = marker[kchild];
+
+ if ( chmark != jj ) { /* Not reached yet */
+ marker[kchild] = jj;
+ chperm = perm_r[kchild];
+
+ /* Case kchild is in L: place it in L[*,j] */
+ if ( chperm == EMPTY ) {
+ panel_lsub[nextl_col++] = kchild;
+ }
+ /* Case kchild is in U:
+ * chrep = its supernode-rep. If its rep has
+ * been explored, update its repfnz[*]
+ */
+ else {
+
+ chrep = xsup[supno[chperm]+1] - 1;
+ myfnz = repfnz_col[chrep];
+#ifdef CHK_DFS
+ printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm);
+#endif
+ if ( myfnz != EMPTY ) { /* Visited before */
+ if ( myfnz > chperm )
+ repfnz_col[chrep] = chperm;
+ }
+ else {
+ /* Cont. dfs at snode-rep of kchild */
+ xplore[krep] = xdfs;
+ oldrep = krep;
+ krep = chrep; /* Go deeper down G(L) */
+ parent[krep] = oldrep;
+ repfnz_col[krep] = chperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+#ifdef CHK_DFS
+ printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs);
+ for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+ printf("\n");
+#endif
+ } /* else */
+
+ } /* else */
+
+ } /* if... */
+
+ } /* while xdfs < maxdfs */
+
+ /* krow has no more unexplored nbrs:
+ * Place snode-rep krep in postorder DFS, if this
+ * segment is seen for the first time. (Note that
+ * "repfnz[krep]" may change later.)
+ * Backtrack dfs to its parent.
+ */
+ if ( marker1[krep] < jcol ) {
+ segrep[*nseg] = krep;
+ ++(*nseg);
+ marker1[krep] = jj;
+ }
+
+ kpar = parent[krep]; /* Pop stack, mimic recursion */
+ if ( kpar == EMPTY ) break; /* dfs done */
+ krep = kpar;
+ xdfs = xplore[krep];
+ maxdfs = xprune[krep];
+
+#ifdef CHK_DFS
+ printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs);
+ for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+ printf("\n");
+#endif
+ } while ( kpar != EMPTY ); /* do-while - until empty stack */
+
+ } /* else */
+
+ } /* else */
+
+ } /* for each nonz in A[*,jj] */
+
+ repfnz_col += m; /* Move to next column */
+ dense_col += m;
+
+ } /* for jj ... */
+
+}
diff --git a/intern/opennl/superlu/spivotL.c b/intern/opennl/superlu/spivotL.c
new file mode 100644
index 00000000000..6243065bb5b
--- /dev/null
+++ b/intern/opennl/superlu/spivotL.c
@@ -0,0 +1,173 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+#undef DEBUG
+
+int
+spivotL(
+ const int jcol, /* in */
+ const float u, /* in - diagonal pivoting threshold */
+ int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */
+ int *perm_r, /* may be modified */
+ int *iperm_r, /* in - inverse of perm_r */
+ int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
+ int *pivrow, /* out */
+ GlobalLU_t *Glu, /* modified - global LU data structures */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * Performs the numerical pivoting on the current column of L,
+ * and the CDIV operation.
+ *
+ * Pivot policy:
+ * (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ * pivot row = k;
+ * ELSE IF abs(A_jj) >= thresh THEN
+ * pivot row = j;
+ * ELSE
+ * pivot row = m;
+ *
+ * Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ * Return value: 0 success;
+ * i > 0 U(i,i) is exactly zero.
+ *
+ */
+ int fsupc; /* first column in the supernode */
+ int nsupc; /* no of columns in the supernode */
+ int nsupr; /* no of rows in the supernode */
+ int lptr; /* points to the starting subscript of the supernode */
+ int pivptr, old_pivptr, diag, diagind;
+ float pivmax, rtemp, thresh;
+ float temp;
+ float *lu_sup_ptr;
+ float *lu_col_ptr;
+ int *lsub_ptr;
+ int isub, icol, k, itemp;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ flops_t *ops = stat->ops;
+
+ /* Initialize pointers */
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ fsupc = (Glu->xsup)[(Glu->supno)[jcol]];
+ nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */
+ lptr = xlsub[fsupc];
+ nsupr = xlsub[fsupc+1] - lptr;
+ lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */
+ lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
+ lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */
+
+#ifdef DEBUG
+if ( jcol == MIN_COL ) {
+ printf("Before cdiv: col %d\n", jcol);
+ for (k = nsupc; k < nsupr; k++)
+ printf(" lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
+}
+#endif
+
+ /* Determine the largest abs numerical value for partial pivoting;
+ Also search for user-specified pivot, and diagonal element. */
+ if ( *usepr ) *pivrow = iperm_r[jcol];
+ diagind = iperm_c[jcol];
+ pivmax = 0.0;
+ pivptr = nsupc;
+ diag = EMPTY;
+ old_pivptr = nsupc;
+ for (isub = nsupc; isub < nsupr; ++isub) {
+ rtemp = fabs (lu_col_ptr[isub]);
+ if ( rtemp > pivmax ) {
+ pivmax = rtemp;
+ pivptr = isub;
+ }
+ if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
+ if ( lsub_ptr[isub] == diagind ) diag = isub;
+ }
+
+ /* Test for singularity */
+ if ( pivmax == 0.0 ) {
+ *pivrow = lsub_ptr[pivptr];
+ perm_r[*pivrow] = jcol;
+ *usepr = 0;
+ return (jcol+1);
+ }
+
+ thresh = u * pivmax;
+
+ /* Choose appropriate pivotal element by our policy. */
+ if ( *usepr ) {
+ rtemp = fabs (lu_col_ptr[old_pivptr]);
+ if ( rtemp != 0.0 && rtemp >= thresh )
+ pivptr = old_pivptr;
+ else
+ *usepr = 0;
+ }
+ if ( *usepr == 0 ) {
+ /* Use diagonal pivot? */
+ if ( diag >= 0 ) { /* diagonal exists */
+ rtemp = fabs (lu_col_ptr[diag]);
+ if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
+ }
+ *pivrow = lsub_ptr[pivptr];
+ }
+
+ /* Record pivot row */
+ perm_r[*pivrow] = jcol;
+
+ /* Interchange row subscripts */
+ if ( pivptr != nsupc ) {
+ itemp = lsub_ptr[pivptr];
+ lsub_ptr[pivptr] = lsub_ptr[nsupc];
+ lsub_ptr[nsupc] = itemp;
+
+ /* Interchange numerical values as well, for the whole snode, such
+ * that L is indexed the same way as A.
+ */
+ for (icol = 0; icol <= nsupc; icol++) {
+ itemp = pivptr + icol * nsupr;
+ temp = lu_sup_ptr[itemp];
+ lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
+ lu_sup_ptr[nsupc + icol*nsupr] = temp;
+ }
+ } /* if */
+
+ /* cdiv operation */
+ ops[FACT] += nsupr - nsupc;
+
+ temp = 1.0 / lu_col_ptr[nsupc];
+ for (k = nsupc+1; k < nsupr; k++)
+ lu_col_ptr[k] *= temp;
+
+ return 0;
+}
+
diff --git a/intern/opennl/superlu/spruneL.c b/intern/opennl/superlu/spruneL.c
new file mode 100644
index 00000000000..59702706375
--- /dev/null
+++ b/intern/opennl/superlu/spruneL.c
@@ -0,0 +1,149 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+void
+spruneL(
+ const int jcol, /* in */
+ const int *perm_r, /* in */
+ const int pivrow, /* in */
+ const int nseg, /* in */
+ const int *segrep, /* in */
+ const int *repfnz, /* in */
+ int *xprune, /* out */
+ GlobalLU_t *Glu /* modified - global LU data structures */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * Prunes the L-structure of supernodes whose L-structure
+ * contains the current pivot row "pivrow"
+ *
+ */
+ float utemp;
+ int jsupno, irep, irep1, kmin, kmax, krow, movnum;
+ int i, ktemp, minloc, maxloc;
+ int do_prune; /* logical variable */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ /*
+ * For each supernode-rep irep in U[*,j]
+ */
+ jsupno = supno[jcol];
+ for (i = 0; i < nseg; i++) {
+
+ irep = segrep[i];
+ irep1 = irep + 1;
+ do_prune = FALSE;
+
+ /* Don't prune with a zero U-segment */
+ if ( repfnz[irep] == EMPTY )
+ continue;
+
+ /* If a snode overlaps with the next panel, then the U-segment
+ * is fragmented into two parts -- irep and irep1. We should let
+ * pruning occur at the rep-column in irep1's snode.
+ */
+ if ( supno[irep] == supno[irep1] ) /* Don't prune */
+ continue;
+
+ /*
+ * If it has not been pruned & it has a nonz in row L[pivrow,i]
+ */
+ if ( supno[irep] != jsupno ) {
+ if ( xprune[irep] >= xlsub[irep1] ) {
+ kmin = xlsub[irep];
+ kmax = xlsub[irep1] - 1;
+ for (krow = kmin; krow <= kmax; krow++)
+ if ( lsub[krow] == pivrow ) {
+ do_prune = TRUE;
+ break;
+ }
+ }
+
+ if ( do_prune ) {
+
+ /* Do a quicksort-type partition
+ * movnum=TRUE means that the num values have to be exchanged.
+ */
+ movnum = FALSE;
+ if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
+ movnum = TRUE;
+
+ while ( kmin <= kmax ) {
+
+ if ( perm_r[lsub[kmax]] == EMPTY )
+ kmax--;
+ else if ( perm_r[lsub[kmin]] != EMPTY )
+ kmin++;
+ else { /* kmin below pivrow, and kmax above pivrow:
+ * interchange the two subscripts
+ */
+ ktemp = lsub[kmin];
+ lsub[kmin] = lsub[kmax];
+ lsub[kmax] = ktemp;
+
+ /* If the supernode has only one column, then we
+ * only keep one set of subscripts. For any subscript
+ * interchange performed, similar interchange must be
+ * done on the numerical values.
+ */
+ if ( movnum ) {
+ minloc = xlusup[irep] + (kmin - xlsub[irep]);
+ maxloc = xlusup[irep] + (kmax - xlsub[irep]);
+ utemp = lusup[minloc];
+ lusup[minloc] = lusup[maxloc];
+ lusup[maxloc] = utemp;
+ }
+
+ kmin++;
+ kmax--;
+
+ }
+
+ } /* while */
+
+ xprune[irep] = kmin; /* Pruning */
+
+#ifdef CHK_PRUNE
+ printf(" After spruneL(),using col %d: xprune[%d] = %d\n",
+ jcol, irep, kmin);
+#endif
+ } /* if do_prune */
+
+ } /* if */
+
+ } /* for each U-segment... */
+}
diff --git a/intern/opennl/superlu/ssnode_bmod.c b/intern/opennl/superlu/ssnode_bmod.c
new file mode 100644
index 00000000000..fe97abd9ff6
--- /dev/null
+++ b/intern/opennl/superlu/ssnode_bmod.c
@@ -0,0 +1,117 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+/*
+ * Performs numeric block updates within the relaxed snode.
+ */
+int
+ssnode_bmod (
+ const int jcol, /* in */
+ const int fsupc, /* in */
+ float *dense, /* in */
+ float *tempv, /* working array */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ int incx = 1, incy = 1;
+ float alpha = -1.0, beta = 1.0;
+#endif
+
+ int luptr, nsupc, nsupr, nrow;
+ int isub, irow, i, iptr;
+ register int ufirst, nextlu;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ flops_t *ops = stat->ops;
+
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ nextlu = xlusup[jcol];
+
+ /*
+ * Process the supernodal portion of L\U[*,j]
+ */
+ for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+ irow = lsub[isub];
+ lusup[nextlu] = dense[irow];
+ dense[irow] = 0;
+ ++nextlu;
+ }
+
+ xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
+
+ if ( fsupc < jcol ) {
+
+ luptr = xlusup[fsupc];
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+ nsupc = jcol - fsupc; /* Excluding jcol */
+ ufirst = xlusup[jcol]; /* Points to the beginning of column
+ jcol in supernode L\U(jsupno). */
+ nrow = nsupr - nsupc;
+
+ ops[TRSV] += nsupc * (nsupc - 1);
+ ops[GEMV] += 2 * nrow * nsupc;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr,
+ &lusup[ufirst], &incx );
+ SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+ strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr,
+ &lusup[ufirst], &incx );
+ sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+ slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+ smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
+ &lusup[ufirst], &tempv[0] );
+
+ /* Scatter tempv[*] into lusup[*] */
+ iptr = ufirst + nsupc;
+ for (i = 0; i < nrow; i++) {
+ lusup[iptr++] -= tempv[i];
+ tempv[i] = 0.0;
+ }
+#endif
+
+ }
+
+ return 0;
+}
diff --git a/intern/opennl/superlu/ssnode_dfs.c b/intern/opennl/superlu/ssnode_dfs.c
new file mode 100644
index 00000000000..c8974237a9a
--- /dev/null
+++ b/intern/opennl/superlu/ssnode_dfs.c
@@ -0,0 +1,106 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+ssnode_dfs (
+ const int jcol, /* in - start of the supernode */
+ const int kcol, /* in - end of the supernode */
+ const int *asub, /* in */
+ const int *xa_begin, /* in */
+ const int *xa_end, /* in */
+ int *xprune, /* out */
+ int *marker, /* modified */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/* Purpose
+ * =======
+ * ssnode_dfs() - Determine the union of the row structures of those
+ * columns within the relaxed snode.
+ * Note: The relaxed snodes are leaves of the supernodal etree, therefore,
+ * the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ * 0 success;
+ * >0 number of bytes allocated when run out of memory.
+ *
+ */
+ register int i, k, ifrom, ito, nextl, new_next;
+ int nsuper, krow, kmark, mem_error;
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ int nzlmax;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ nzlmax = Glu->nzlmax;
+
+ nsuper = ++supno[jcol]; /* Next available supernode number */
+ nextl = xlsub[jcol];
+
+ for (i = jcol; i <= kcol; i++) {
+ /* For each nonzero in A[*,i] */
+ for (k = xa_begin[i]; k < xa_end[i]; k++) {
+ krow = asub[k];
+ kmark = marker[krow];
+ if ( kmark != kcol ) { /* First time visit krow */
+ marker[krow] = kcol;
+ lsub[nextl++] = krow;
+ if ( nextl >= nzlmax ) {
+ if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ }
+ }
+ supno[i] = nsuper;
+ }
+
+ /* Supernode > 1, then make a copy of the subscripts for pruning */
+ if ( jcol < kcol ) {
+ new_next = nextl + (nextl - xlsub[jcol]);
+ while ( new_next > nzlmax ) {
+ if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ ito = nextl;
+ for (ifrom = xlsub[jcol]; ifrom < nextl; )
+ lsub[ito++] = lsub[ifrom++];
+ for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
+ nextl = ito;
+ }
+
+ xsup[nsuper+1] = kcol + 1;
+ supno[kcol+1] = nsuper;
+ xprune[kcol] = nextl;
+ xlsub[kcol+1] = nextl;
+
+ return 0;
+}
+
diff --git a/intern/opennl/superlu/ssp_blas2.c b/intern/opennl/superlu/ssp_blas2.c
new file mode 100644
index 00000000000..347f9ab5fd4
--- /dev/null
+++ b/intern/opennl/superlu/ssp_blas2.c
@@ -0,0 +1,469 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ * File name: ssp_blas2.c
+ * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
+ */
+
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
+
+int
+sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
+ SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * sp_strsv() solves one of the systems of equations
+ * A*x = b, or A'*x = b,
+ * where b and x are n element vectors and A is a sparse unit , or
+ * non-unit, upper or lower triangular matrix.
+ * No test for singularity or near-singularity is included in this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Parameters
+ * ==========
+ *
+ * uplo - (input) char*
+ * On entry, uplo specifies whether the matrix is an upper or
+ * lower triangular matrix as follows:
+ * uplo = 'U' or 'u' A is an upper triangular matrix.
+ * uplo = 'L' or 'l' A is a lower triangular matrix.
+ *
+ * trans - (input) char*
+ * On entry, trans specifies the equations to be solved as
+ * follows:
+ * trans = 'N' or 'n' A*x = b.
+ * trans = 'T' or 't' A'*x = b.
+ * trans = 'C' or 'c' A'*x = b.
+ *
+ * diag - (input) char*
+ * On entry, diag specifies whether or not A is unit
+ * triangular as follows:
+ * diag = 'U' or 'u' A is assumed to be unit triangular.
+ * diag = 'N' or 'n' A is not assumed to be unit
+ * triangular.
+ *
+ * L - (input) SuperMatrix*
+ * The factor L from the factorization Pr*A*Pc=L*U. Use
+ * compressed row subscripts storage for supernodes,
+ * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
+ *
+ * U - (input) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U.
+ * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
+ *
+ * x - (input/output) float*
+ * Before entry, the incremented array X must contain the n
+ * element right-hand side vector b. On exit, X is overwritten
+ * with the solution vector x.
+ *
+ * info - (output) int*
+ * If *info = -i, the i-th argument had an illegal value.
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ SCformat *Lstore;
+ NCformat *Ustore;
+ float *Lval, *Uval;
+ int incx = 1;
+ int nrow;
+ int fsupc, nsupr, nsupc, luptr, istart, irow;
+ int i, k, iptr, jcol;
+ float *work;
+ flops_t solve_ops;
+
+ /* Test the input parameters */
+ *info = 0;
+ if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
+ else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
+ !lsame_(trans, "C")) *info = -2;
+ else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
+ else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
+ else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
+ if ( *info ) {
+ i = -(*info);
+ xerbla_("sp_strsv", &i);
+ return 0;
+ }
+
+ Lstore = L->Store;
+ Lval = Lstore->nzval;
+ Ustore = U->Store;
+ Uval = Ustore->nzval;
+ solve_ops = 0;
+
+ if ( !(work = floatCalloc(L->nrow)) )
+ ABORT("Malloc fails for work in sp_strsv().");
+
+ if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
+
+ if ( lsame_(uplo, "L") ) {
+ /* Form x := inv(L)*x */
+ if ( L->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+ nrow = nsupr - nsupc;
+
+ solve_ops += nsupc * (nsupc - 1);
+ solve_ops += 2 * nrow * nsupc;
+
+ if ( nsupc == 1 ) {
+ for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
+ irow = L_SUB(iptr);
+ ++luptr;
+ x[irow] -= x[fsupc] * Lval[luptr];
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+
+ SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
+ &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#else
+ strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+
+ sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
+ &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#endif
+#else
+ slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
+
+ smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
+ &x[fsupc], &work[0] );
+#endif
+
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; ++i, ++iptr) {
+ irow = L_SUB(iptr);
+ x[irow] -= work[i]; /* Scatter */
+ work[i] = 0.0;
+
+ }
+ }
+ } /* for k ... */
+
+ } else {
+ /* Form x := inv(U)*x */
+
+ if ( U->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = Lstore->nsuper; k >= 0; k--) {
+ fsupc = L_FST_SUPC(k);
+ nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += nsupc * (nsupc + 1);
+
+ if ( nsupc == 1 ) {
+ x[fsupc] /= Lval[luptr];
+ for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
+ irow = U_SUB(i);
+ x[irow] -= x[fsupc] * Uval[i];
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+#else
+ susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
+#endif
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
+ i++) {
+ irow = U_SUB(i);
+ x[irow] -= x[jcol] * Uval[i];
+ }
+ }
+ }
+ } /* for k ... */
+
+ }
+ } else { /* Form x := inv(A')*x */
+
+ if ( lsame_(uplo, "L") ) {
+ /* Form x := inv(L')*x */
+ if ( L->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = Lstore->nsuper; k >= 0; --k) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += 2 * (nsupr - nsupc) * nsupc;
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ iptr = istart + nsupc;
+ for (i = L_NZ_START(jcol) + nsupc;
+ i < L_NZ_START(jcol+1); i++) {
+ irow = L_SUB(iptr);
+ x[jcol] -= x[irow] * Lval[i];
+ iptr++;
+ }
+ }
+
+ if ( nsupc > 1 ) {
+ solve_ops += nsupc * (nsupc - 1);
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("T", strlen("T"));
+ ftcs3 = _cptofcd("U", strlen("U"));
+ STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+ }
+ }
+ } else {
+ /* Form x := inv(U')*x */
+ if ( U->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
+ irow = U_SUB(i);
+ x[jcol] -= x[irow] * Uval[i];
+ }
+ }
+
+ solve_ops += nsupc * (nsupc + 1);
+
+ if ( nsupc == 1 ) {
+ x[fsupc] /= Lval[luptr];
+ } else {
+#ifdef _CRAY
+ ftcs1 = _cptofcd("U", strlen("U"));
+ ftcs2 = _cptofcd("T", strlen("T"));
+ ftcs3 = _cptofcd("N", strlen("N"));
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+ }
+ } /* for k ... */
+ }
+ }
+
+ stat->ops[SOLVE] += solve_ops;
+ SUPERLU_FREE(work);
+ return 0;
+}
+
+
+
+
+int
+sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
+ int incx, float beta, float *y, int incy)
+{
+/* Purpose
+ =======
+
+ sp_sgemv() performs one of the matrix-vector operations
+ y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
+ where alpha and beta are scalars, x and y are vectors and A is a
+ sparse A->nrow by A->ncol matrix.
+
+ Parameters
+ ==========
+
+ TRANS - (input) char*
+ On entry, TRANS specifies the operation to be performed as
+ follows:
+ TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
+ TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
+ TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
+
+ ALPHA - (input) float
+ On entry, ALPHA specifies the scalar alpha.
+
+ A - (input) SuperMatrix*
+ Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ Currently, the type of A can be:
+ Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
+ In the future, more general A can be handled.
+
+ X - (input) float*, array of DIMENSION at least
+ ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+ and at least
+ ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+ Before entry, the incremented array X must contain the
+ vector x.
+
+ INCX - (input) int
+ On entry, INCX specifies the increment for the elements of
+ X. INCX must not be zero.
+
+ BETA - (input) float
+ On entry, BETA specifies the scalar beta. When BETA is
+ supplied as zero then Y need not be set on input.
+
+ Y - (output) float*, array of DIMENSION at least
+ ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+ and at least
+ ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+ Before entry with BETA non-zero, the incremented array Y
+ must contain the vector y. On exit, Y is overwritten by the
+ updated vector y.
+
+ INCY - (input) int
+ On entry, INCY specifies the increment for the elements of
+ Y. INCY must not be zero.
+
+ ==== Sparse Level 2 Blas routine.
+*/
+
+ /* Local variables */
+ NCformat *Astore;
+ float *Aval;
+ int info;
+ float temp;
+ int lenx, leny, i, j, irow;
+ int iy, jx, jy, kx, ky;
+ int notran;
+
+ notran = lsame_(trans, "N");
+ Astore = A->Store;
+ Aval = Astore->nzval;
+
+ /* Test the input parameters */
+ info = 0;
+ if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
+ else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
+ else if (incx == 0) info = 5;
+ else if (incy == 0) info = 8;
+ if (info != 0) {
+ xerbla_("sp_sgemv ", &info);
+ return 0;
+ }
+
+ /* Quick return if possible. */
+ if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
+ return 0;
+
+ /* Set LENX and LENY, the lengths of the vectors x and y, and set
+ up the start points in X and Y. */
+ if (lsame_(trans, "N")) {
+ lenx = A->ncol;
+ leny = A->nrow;
+ } else {
+ lenx = A->nrow;
+ leny = A->ncol;
+ }
+ if (incx > 0) kx = 0;
+ else kx = - (lenx - 1) * incx;
+ if (incy > 0) ky = 0;
+ else ky = - (leny - 1) * incy;
+
+ /* Start the operations. In this version the elements of A are
+ accessed sequentially with one pass through A. */
+ /* First form y := beta*y. */
+ if (beta != 1.) {
+ if (incy == 1) {
+ if (beta == 0.)
+ for (i = 0; i < leny; ++i) y[i] = 0.;
+ else
+ for (i = 0; i < leny; ++i) y[i] = beta * y[i];
+ } else {
+ iy = ky;
+ if (beta == 0.)
+ for (i = 0; i < leny; ++i) {
+ y[iy] = 0.;
+ iy += incy;
+ }
+ else
+ for (i = 0; i < leny; ++i) {
+ y[iy] = beta * y[iy];
+ iy += incy;
+ }
+ }
+ }
+
+ if (alpha == 0.) return 0;
+
+ if ( notran ) {
+ /* Form y := alpha*A*x + y. */
+ jx = kx;
+ if (incy == 1) {
+ for (j = 0; j < A->ncol; ++j) {
+ if (x[jx] != 0.) {
+ temp = alpha * x[jx];
+ for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+ irow = Astore->rowind[i];
+ y[irow] += temp * Aval[i];
+ }
+ }
+ jx += incx;
+ }
+ } else {
+ ABORT("Not implemented.");
+ }
+ } else {
+ /* Form y := alpha*A'*x + y. */
+ jy = ky;
+ if (incx == 1) {
+ for (j = 0; j < A->ncol; ++j) {
+ temp = 0.;
+ for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+ irow = Astore->rowind[i];
+ temp += Aval[i] * x[irow];
+ }
+ y[jy] += alpha * temp;
+ jy += incy;
+ }
+ } else {
+ ABORT("Not implemented.");
+ }
+ }
+ return 0;
+} /* sp_sgemv */
+
+
+
diff --git a/intern/opennl/superlu/ssp_blas3.c b/intern/opennl/superlu/ssp_blas3.c
new file mode 100644
index 00000000000..19086077c4c
--- /dev/null
+++ b/intern/opennl/superlu/ssp_blas3.c
@@ -0,0 +1,121 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name: sp_blas3.c
+ * Purpose: Sparse BLAS3, using some dense BLAS3 operations.
+ */
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+sp_sgemm(char *transa, int n,
+ float alpha, SuperMatrix *A, float *b, int ldb,
+ float beta, float *c, int ldc)
+{
+/* Purpose
+ =======
+
+ sp_s performs one of the matrix-matrix operations
+
+ C := alpha*op( A )*op( B ) + beta*C,
+
+ where op( X ) is one of
+
+ op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ),
+
+ alpha and beta are scalars, and A, B and C are matrices, with op( A )
+ an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+
+
+ Parameters
+ ==========
+
+ TRANSA - (input) char*
+ On entry, TRANSA specifies the form of op( A ) to be used in
+ the matrix multiplication as follows:
+ TRANSA = 'N' or 'n', op( A ) = A.
+ TRANSA = 'T' or 't', op( A ) = A'.
+ TRANSA = 'C' or 'c', op( A ) = conjg( A' ).
+ Unchanged on exit.
+
+ TRANSB - (input) char*
+ On entry, TRANSB specifies the form of op( B ) to be used in
+ the matrix multiplication as follows:
+ TRANSB = 'N' or 'n', op( B ) = B.
+ TRANSB = 'T' or 't', op( B ) = B'.
+ TRANSB = 'C' or 'c', op( B ) = conjg( B' ).
+ Unchanged on exit.
+
+ M - (input) int
+ On entry, M specifies the number of rows of the matrix
+ op( A ) and of the matrix C. M must be at least zero.
+ Unchanged on exit.
+
+ N - (input) int
+ On entry, N specifies the number of columns of the matrix
+ op( B ) and the number of columns of the matrix C. N must be
+ at least zero.
+ Unchanged on exit.
+
+ K - (input) int
+ On entry, K specifies the number of columns of the matrix
+ op( A ) and the number of rows of the matrix op( B ). K must
+ be at least zero.
+ Unchanged on exit.
+
+ ALPHA - (input) float
+ On entry, ALPHA specifies the scalar alpha.
+
+ A - (input) SuperMatrix*
+ Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ Currently, the type of A can be:
+ Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
+ In the future, more general A can be handled.
+
+ B - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is
+ n when TRANSB = 'N' or 'n', and is k otherwise.
+ Before entry with TRANSB = 'N' or 'n', the leading k by n
+ part of the array B must contain the matrix B, otherwise
+ the leading n by k part of the array B must contain the
+ matrix B.
+ Unchanged on exit.
+
+ LDB - (input) int
+ On entry, LDB specifies the first dimension of B as declared
+ in the calling (sub) program. LDB must be at least max( 1, n ).
+ Unchanged on exit.
+
+ BETA - (input) float
+ On entry, BETA specifies the scalar beta. When BETA is
+ supplied as zero then C need not be set on input.
+
+ C - FLOAT PRECISION array of DIMENSION ( LDC, n ).
+ Before entry, the leading m by n part of the array C must
+ contain the matrix C, except when beta is zero, in which
+ case C need not be set on entry.
+ On exit, the array C is overwritten by the m by n matrix
+ ( alpha*op( A )*B + beta*C ).
+
+ LDC - (input) int
+ On entry, LDC specifies the first dimension of C as declared
+ in the calling (sub)program. LDC must be at least max(1,m).
+ Unchanged on exit.
+
+ ==== Sparse Level 3 Blas routine.
+*/
+ int incx = 1, incy = 1;
+ int j;
+
+ for (j = 0; j < n; ++j) {
+ sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
+ }
+ return 0;
+}
diff --git a/intern/opennl/superlu/ssp_defs.h b/intern/opennl/superlu/ssp_defs.h
new file mode 100644
index 00000000000..5b4e86b175b
--- /dev/null
+++ b/intern/opennl/superlu/ssp_defs.h
@@ -0,0 +1,234 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */
+#define __SUPERLU_sSP_DEFS
+
+/*
+ * File name: ssp_defs.h
+ * Purpose: Sparse matrix types and function prototypes
+ * History:
+ */
+
+#ifdef _CRAY
+#include <fortran.h>
+#include <string.h>
+#endif
+
+/* Define my integer type int_t */
+typedef int int_t; /* default */
+
+#include "Cnames.h"
+#include "supermatrix.h"
+#include "util.h"
+
+
+/*
+ * Global data structures used in LU factorization -
+ *
+ * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ * (xsup,supno): supno[i] is the supernode no to which i belongs;
+ * xsup(s) points to the beginning of the s-th supernode.
+ * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
+ * xsup 0 1 2 4 7 12
+ * Note: dfs will be performed on supernode rep. relative to the new
+ * row pivoting ordering
+ *
+ * (xlsub,lsub): lsub[*] contains the compressed subscript of
+ * rectangular supernodes; xlsub[j] points to the starting
+ * location of the j-th column in lsub[*]. Note that xlsub
+ * is indexed by column.
+ * Storage: original row subscripts
+ *
+ * During the course of sparse LU factorization, we also use
+ * (xlsub,lsub) for the purpose of symmetric pruning. For each
+ * supernode {s,s+1,...,t=s+r} with first column s and last
+ * column t, the subscript set
+ * lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ * is the structure of column s (i.e. structure of this supernode).
+ * It is used for the storage of numerical values.
+ * Furthermore,
+ * lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ * is the structure of the last column t of this supernode.
+ * It is for the purpose of symmetric pruning. Therefore, the
+ * structural subscripts can be rearranged without making physical
+ * interchanges among the numerical values.
+ *
+ * However, if the supernode has only one column, then we
+ * only keep one set of subscripts. For any subscript interchange
+ * performed, similar interchange must be done on the numerical
+ * values.
+ *
+ * The last column structures (for pruning) will be removed
+ * after the numercial LU factorization phase.
+ *
+ * (xlusup,lusup): lusup[*] contains the numerical values of the
+ * rectangular supernodes; xlusup[j] points to the starting
+ * location of the j-th column in storage vector lusup[*]
+ * Note: xlusup is indexed by column.
+ * Each rectangular supernode is stored by column-major
+ * scheme, consistent with Fortran 2-dim array storage.
+ *
+ * (xusub,ucol,usub): ucol[*] stores the numerical values of
+ * U-columns outside the rectangular supernodes. The row
+ * subscript of nonzero ucol[k] is stored in usub[k].
+ * xusub[i] points to the starting location of column i in ucol.
+ * Storage: new row subscripts; that is subscripts of PA.
+ */
+typedef struct {
+ int *xsup; /* supernode and column mapping */
+ int *supno;
+ int *lsub; /* compressed L subscripts */
+ int *xlsub;
+ float *lusup; /* L supernodes */
+ int *xlusup;
+ float *ucol; /* U columns */
+ int *usub;
+ int *xusub;
+ int nzlmax; /* current max size of lsub */
+ int nzumax; /* " " " ucol */
+ int nzlumax; /* " " " lusup */
+ int n; /* number of columns in the matrix */
+ LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
+} GlobalLU_t;
+
+typedef struct {
+ float for_lu;
+ float total_needed;
+ int expansions;
+} mem_usage_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Driver routines */
+extern void
+sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
+ SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
+extern void
+sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
+ char *, float *, float *, SuperMatrix *, SuperMatrix *,
+ void *, int, SuperMatrix *, SuperMatrix *,
+ float *, float *, float *, float *,
+ mem_usage_t *, SuperLUStat_t *, int *);
+
+/* Supernodal LU factor related */
+extern void
+sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
+ int *, int *, Stype_t, Dtype_t, Mtype_t);
+extern void
+sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
+ int *, int *, Stype_t, Dtype_t, Mtype_t);
+extern void
+sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
+extern void
+sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
+ Stype_t, Dtype_t, Mtype_t);
+extern void
+sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *,
+ int *, int *, int *, int *, int *,
+ Stype_t, Dtype_t, Mtype_t);
+extern void
+sCopy_Dense_Matrix(int, int, float *, int, float *, int);
+
+extern void countnz (const int, int *, int *, int *, GlobalLU_t *);
+extern void fixupL (const int, const int *, GlobalLU_t *);
+
+extern void sallocateA (int, int, float **, int **, int **);
+extern void sgstrf (superlu_options_t*, SuperMatrix*,
+ int, int, int*, void *, int, int *, int *,
+ SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
+extern int ssnode_dfs (const int, const int, const int *, const int *,
+ const int *, int *, int *, GlobalLU_t *);
+extern int ssnode_bmod (const int, const int, float *,
+ float *, GlobalLU_t *, SuperLUStat_t*);
+extern void spanel_dfs (const int, const int, const int, SuperMatrix *,
+ int *, int *, float *, int *, int *, int *,
+ int *, int *, int *, int *, GlobalLU_t *);
+extern void spanel_bmod (const int, const int, const int, const int,
+ float *, float *, int *, int *,
+ GlobalLU_t *, SuperLUStat_t*);
+extern int scolumn_dfs (const int, const int, int *, int *, int *, int *,
+ int *, int *, int *, int *, int *, GlobalLU_t *);
+extern int scolumn_bmod (const int, const int, float *,
+ float *, int *, int *, int,
+ GlobalLU_t *, SuperLUStat_t*);
+extern int scopy_to_ucol (int, int, int *, int *, int *,
+ float *, GlobalLU_t *);
+extern int spivotL (const int, const float, int *, int *,
+ int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
+extern void spruneL (const int, const int *, const int, const int,
+ const int *, const int *, int *, GlobalLU_t *);
+extern void sreadmt (int *, int *, int *, float **, int **, int **);
+extern void sGenXtrue (int, int, float *, int);
+extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *,
+ SuperMatrix *);
+extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
+ SuperMatrix *, SuperLUStat_t*, int *);
+
+
+/* Driver related */
+
+extern void sgsequ (SuperMatrix *, float *, float *, float *,
+ float *, float *, int *);
+extern void slaqgs (SuperMatrix *, float *, float *, float,
+ float, float, char *);
+extern void sgscon (char *, SuperMatrix *, SuperMatrix *,
+ float, float *, SuperLUStat_t*, int *);
+extern float sPivotGrowth(int, SuperMatrix *, int *,
+ SuperMatrix *, SuperMatrix *);
+extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
+ SuperMatrix *, int *, int *, char *, float *,
+ float *, SuperMatrix *, SuperMatrix *,
+ float *, float *, SuperLUStat_t*, int *);
+
+extern int sp_strsv (char *, char *, char *, SuperMatrix *,
+ SuperMatrix *, float *, SuperLUStat_t*, int *);
+extern int sp_sgemv (char *, float, SuperMatrix *, float *,
+ int, float, float *, int);
+
+extern int sp_sgemm (char *, int, float,
+ SuperMatrix *, float *, int, float,
+ float *, int);
+
+/* Memory-related */
+extern int sLUMemInit (fact_t, void *, int, int, int, int, int,
+ SuperMatrix *, SuperMatrix *,
+ GlobalLU_t *, int **, float **);
+extern void sSetRWork (int, int, float *, float **, float **);
+extern void sLUWorkFree (int *, float *, GlobalLU_t *);
+extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
+
+extern float *floatMalloc(int);
+extern float *floatCalloc(int);
+extern int smemory_usage(const int, const int, const int, const int);
+extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
+
+/* Auxiliary routines */
+extern void sreadhb(int *, int *, int *, float **, int **, int **);
+extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*,
+ float **, int **, int **);
+extern void sfill (float *, int, float);
+extern void sinf_norm_error (int, SuperMatrix *, float *);
+extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
+ float, float, float *, float *, char *);
+
+/* Routines for debugging */
+extern void sPrint_CompCol_Matrix(char *, SuperMatrix *);
+extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *);
+extern void sPrint_Dense_Matrix(char *, SuperMatrix *);
+extern void print_lu_col(char *, int, int, int *, GlobalLU_t *);
+extern void check_tempv(int, float *);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif /* __SUPERLU_sSP_DEFS */
+
diff --git a/intern/opennl/superlu/strsv.c b/intern/opennl/superlu/strsv.c
new file mode 100644
index 00000000000..2f6a92c0d0d
--- /dev/null
+++ b/intern/opennl/superlu/strsv.c
@@ -0,0 +1,331 @@
+
+/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n,
+ float *a, int *lda, float *x, int *incx)
+{
+
+
+ /* System generated locals */
+ int i__1, i__2;
+
+ /* Local variables */
+ static int info;
+ static float temp;
+ static int i, j;
+ extern int lsame_(char *, char *);
+ static int ix, jx, kx;
+ extern /* Subroutine */ int xerbla_(char *, int *);
+ static int nounit;
+
+
+/* Purpose
+ =======
+
+ STRSV solves one of the systems of equations
+
+ A*x = b, or A'*x = b,
+
+ where b and x are n element vectors and A is an n by n unit, or
+ non-unit, upper or lower triangular matrix.
+
+ No test for singularity or near-singularity is included in this
+ routine. Such tests must be performed before calling this routine.
+
+ Parameters
+ ==========
+
+ UPLO - CHARACTER*1.
+ On entry, UPLO specifies whether the matrix is an upper or
+ lower triangular matrix as follows:
+
+ UPLO = 'U' or 'u' A is an upper triangular matrix.
+
+ UPLO = 'L' or 'l' A is a lower triangular matrix.
+
+ Unchanged on exit.
+
+ TRANS - CHARACTER*1.
+ On entry, TRANS specifies the equations to be solved as
+ follows:
+
+ TRANS = 'N' or 'n' A*x = b.
+
+ TRANS = 'T' or 't' A'*x = b.
+
+ TRANS = 'C' or 'c' A'*x = b.
+
+ Unchanged on exit.
+
+ DIAG - CHARACTER*1.
+ On entry, DIAG specifies whether or not A is unit
+ triangular as follows:
+
+ DIAG = 'U' or 'u' A is assumed to be unit triangular.
+
+ DIAG = 'N' or 'n' A is not assumed to be unit
+ triangular.
+
+ Unchanged on exit.
+
+ N - INTEGER.
+ On entry, N specifies the order of the matrix A.
+ N must be at least zero.
+ Unchanged on exit.
+
+ A - REAL array of DIMENSION ( LDA, n ).
+ Before entry with UPLO = 'U' or 'u', the leading n by n
+ upper triangular part of the array A must contain the upper
+
+ triangular matrix and the strictly lower triangular part of
+
+ A is not referenced.
+ Before entry with UPLO = 'L' or 'l', the leading n by n
+ lower triangular part of the array A must contain the lower
+
+ triangular matrix and the strictly upper triangular part of
+
+ A is not referenced.
+ Note that when DIAG = 'U' or 'u', the diagonal elements of
+
+ A are not referenced either, but are assumed to be unity.
+ Unchanged on exit.
+
+ LDA - INTEGER.
+ On entry, LDA specifies the first dimension of A as declared
+
+ in the calling (sub) program. LDA must be at least
+ max( 1, n ).
+ Unchanged on exit.
+
+ X - REAL array of dimension at least
+ ( 1 + ( n - 1 )*abs( INCX ) ).
+ Before entry, the incremented array X must contain the n
+ element right-hand side vector b. On exit, X is overwritten
+
+ with the solution vector x.
+
+ INCX - INTEGER.
+ On entry, INCX specifies the increment for the elements of
+ X. INCX must not be zero.
+ Unchanged on exit.
+
+
+ Level 2 Blas routine.
+
+ -- Written on 22-October-1986.
+ Jack Dongarra, Argonne National Lab.
+ Jeremy Du Croz, Nag Central Office.
+ Sven Hammarling, Nag Central Office.
+ Richard Hanson, Sandia National Labs.
+
+
+
+ Test the input parameters.
+
+
+ Parameter adjustments
+ Function Body */
+#define X(I) x[(I)-1]
+
+#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
+
+ info = 0;
+ if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+ info = 1;
+ } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") &&
+ ! lsame_(trans, "C")) {
+ info = 2;
+ } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) {
+ info = 3;
+ } else if (*n < 0) {
+ info = 4;
+ } else if (*lda < ((1 > *n)? 1: *n)) {
+ info = 6;
+ } else if (*incx == 0) {
+ info = 8;
+ }
+ if (info != 0) {
+ xerbla_("STRSV ", &info);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+ nounit = lsame_(diag, "N");
+
+/* Set up the start point in X if the increment is not unity. This
+ will be ( N - 1 )*INCX too small for descending loops. */
+
+ if (*incx <= 0) {
+ kx = 1 - (*n - 1) * *incx;
+ } else if (*incx != 1) {
+ kx = 1;
+ }
+
+/* Start the operations. In this version the elements of A are
+ accessed sequentially with one pass through A. */
+
+ if (lsame_(trans, "N")) {
+
+/* Form x := inv( A )*x. */
+
+ if (lsame_(uplo, "U")) {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ if (X(j) != 0.f) {
+ if (nounit) {
+ X(j) /= A(j,j);
+ }
+ temp = X(j);
+ for (i = j - 1; i >= 1; --i) {
+ X(i) -= temp * A(i,j);
+/* L10: */
+ }
+ }
+/* L20: */
+ }
+ } else {
+ jx = kx + (*n - 1) * *incx;
+ for (j = *n; j >= 1; --j) {
+ if (X(jx) != 0.f) {
+ if (nounit) {
+ X(jx) /= A(j,j);
+ }
+ temp = X(jx);
+ ix = jx;
+ for (i = j - 1; i >= 1; --i) {
+ ix -= *incx;
+ X(ix) -= temp * A(i,j);
+/* L30: */
+ }
+ }
+ jx -= *incx;
+/* L40: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ if (X(j) != 0.f) {
+ if (nounit) {
+ X(j) /= A(j,j);
+ }
+ temp = X(j);
+ i__2 = *n;
+ for (i = j + 1; i <= *n; ++i) {
+ X(i) -= temp * A(i,j);
+/* L50: */
+ }
+ }
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ if (X(jx) != 0.f) {
+ if (nounit) {
+ X(jx) /= A(j,j);
+ }
+ temp = X(jx);
+ ix = jx;
+ i__2 = *n;
+ for (i = j + 1; i <= *n; ++i) {
+ ix += *incx;
+ X(ix) -= temp * A(i,j);
+/* L70: */
+ }
+ }
+ jx += *incx;
+/* L80: */
+ }
+ }
+ }
+ } else {
+
+/* Form x := inv( A' )*x. */
+
+ if (lsame_(uplo, "U")) {
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ temp = X(j);
+ i__2 = j - 1;
+ for (i = 1; i <= j-1; ++i) {
+ temp -= A(i,j) * X(i);
+/* L90: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(j) = temp;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ temp = X(jx);
+ ix = kx;
+ i__2 = j - 1;
+ for (i = 1; i <= j-1; ++i) {
+ temp -= A(i,j) * X(ix);
+ ix += *incx;
+/* L110: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(jx) = temp;
+ jx += *incx;
+/* L120: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ temp = X(j);
+ i__1 = j + 1;
+ for (i = *n; i >= j+1; --i) {
+ temp -= A(i,j) * X(i);
+/* L130: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(j) = temp;
+/* L140: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ temp = X(jx);
+ ix = kx;
+ i__1 = j + 1;
+ for (i = *n; i >= j+1; --i) {
+ temp -= A(i,j) * X(ix);
+ ix -= *incx;
+/* L150: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(jx) = temp;
+ jx -= *incx;
+/* L160: */
+ }
+ }
+ }
+ }
+
+ return 0;
+
+/* End of STRSV . */
+
+} /* strsv_ */
+
diff --git a/intern/opennl/superlu/superlu_timer.c b/intern/opennl/superlu/superlu_timer.c
new file mode 100644
index 00000000000..798fd59d4ea
--- /dev/null
+++ b/intern/opennl/superlu/superlu_timer.c
@@ -0,0 +1,55 @@
+/*
+ * Purpose
+ * =======
+ * Returns the time in seconds used by the process.
+ *
+ * Note: the timer function call is machine dependent. Use conditional
+ * compilation to choose the appropriate function.
+ *
+ */
+
+/* We want this flag, safer than putting in build system */
+#define NO_TIMER
+
+#ifdef SUN
+/*
+ * It uses the system call gethrtime(3C), which is accurate to
+ * nanoseconds.
+*/
+#include <sys/time.h>
+
+double SuperLU_timer_() {
+ return ( (double)gethrtime() / 1e9 );
+}
+
+#else
+
+#ifndef NO_TIMER
+#include <sys/types.h>
+#include <sys/times.h>
+#include <time.h>
+#include <sys/time.h>
+#endif
+
+#ifndef CLK_TCK
+#define CLK_TCK 60
+#endif
+
+double SuperLU_timer_()
+{
+#ifdef NO_TIMER
+ /* no sys/times.h on WIN32 */
+ double tmp;
+ tmp = 0.0;
+#else
+ struct tms use;
+ double tmp;
+ times(&use);
+ tmp = use.tms_utime;
+ tmp += use.tms_stime;
+#endif
+ return (double)(tmp) / CLK_TCK;
+}
+
+#endif
+
diff --git a/intern/opennl/superlu/supermatrix.h b/intern/opennl/superlu/supermatrix.h
new file mode 100644
index 00000000000..665e22dc91f
--- /dev/null
+++ b/intern/opennl/superlu/supermatrix.h
@@ -0,0 +1,140 @@
+#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */
+#define __SUPERLU_SUPERMATRIX
+
+/********************************************
+ * The matrix types are defined as follows. *
+ ********************************************/
+typedef enum {
+ SLU_NC, /* column-wise, no supernode */
+ SLU_NR, /* row-wize, no supernode */
+ SLU_SC, /* column-wise, supernode */
+ SLU_SR, /* row-wise, supernode */
+ SLU_NCP, /* column-wise, column-permuted, no supernode
+ (The consecutive columns of nonzeros, after permutation,
+ may not be stored contiguously.) */
+ SLU_DN /* Fortran style column-wise storage for dense matrix */
+} Stype_t;
+
+typedef enum {
+ SLU_S, /* single */
+ SLU_D, /* double */
+ SLU_C, /* single complex */
+ SLU_Z /* double complex */
+} Dtype_t;
+
+typedef enum {
+ SLU_GE, /* general */
+ SLU_TRLU, /* lower triangular, unit diagonal */
+ SLU_TRUU, /* upper triangular, unit diagonal */
+ SLU_TRL, /* lower triangular */
+ SLU_TRU, /* upper triangular */
+ SLU_SYL, /* symmetric, store lower half */
+ SLU_SYU, /* symmetric, store upper half */
+ SLU_HEL, /* Hermitian, store lower half */
+ SLU_HEU /* Hermitian, store upper half */
+} Mtype_t;
+
+typedef struct {
+ Stype_t Stype; /* Storage type: interprets the storage structure
+ pointed to by *Store. */
+ Dtype_t Dtype; /* Data type. */
+ Mtype_t Mtype; /* Matrix type: describes the mathematical property of
+ the matrix. */
+ int_t nrow; /* number of rows */
+ int_t ncol; /* number of columns */
+ void *Store; /* pointer to the actual storage of the matrix */
+} SuperMatrix;
+
+/***********************************************
+ * The storage schemes are defined as follows. *
+ ***********************************************/
+
+/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ void *nzval; /* pointer to array of nonzero values, packed by column */
+ int_t *rowind; /* pointer to array of row indices of the nonzeros */
+ int_t *colptr; /* pointer to array of beginning of columns in nzval[]
+ and rowind[] */
+ /* Note:
+ Zero-based indexing is used;
+ colptr[] has ncol+1 entries, the last one pointing
+ beyond the last column, so that colptr[ncol] = nnz. */
+} NCformat;
+
+/* Stype == NR (Also known as row compressed storage (RCS). */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ void *nzval; /* pointer to array of nonzero values, packed by row */
+ int_t *colind; /* pointer to array of column indices of the nonzeros */
+ int_t *rowptr; /* pointer to array of beginning of rows in nzval[]
+ and colind[] */
+ /* Note:
+ Zero-based indexing is used;
+ nzval[] and colind[] are of the same length, nnz;
+ rowptr[] has nrow+1 entries, the last one pointing
+ beyond the last column, so that rowptr[nrow] = nnz. */
+} NRformat;
+
+/* Stype == SC */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ int_t nsuper; /* number of supernodes, minus 1 */
+ void *nzval; /* pointer to array of nonzero values, packed by column */
+ int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */
+ int_t *rowind; /* pointer to array of compressed row indices of
+ rectangular supernodes */
+ int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */
+ int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column
+ j belongs; mapping from column to supernode number. */
+ int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th
+ supernode; mapping from supernode number to column.
+ e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12)
+ sup_to_col: 0 1 2 4 7 12 (nsuper=4) */
+ /* Note:
+ Zero-based indexing is used;
+ nzval_colptr[], rowind_colptr[], col_to_sup and
+ sup_to_col[] have ncol+1 entries, the last one
+ pointing beyond the last column.
+ For col_to_sup[], only the first ncol entries are
+ defined. For sup_to_col[], only the first nsuper+2
+ entries are defined. */
+} SCformat;
+
+/* Stype == NCP */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ void *nzval; /* pointer to array of nonzero values, packed by column */
+ int_t *rowind;/* pointer to array of row indices of the nonzeros */
+ /* Note: nzval[]/rowind[] always have the same length */
+ int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[]
+ and rowind[] */
+ int_t *colend;/* colend[j] points to one past the last element of column
+ j in nzval[] and rowind[] */
+ /* Note:
+ Zero-based indexing is used;
+ The consecutive columns of the nonzeros may not be
+ contiguous in storage, because the matrix has been
+ postmultiplied by a column permutation matrix. */
+} NCPformat;
+
+/* Stype == DN */
+typedef struct {
+ int_t lda; /* leading dimension */
+ void *nzval; /* array of size lda*ncol to represent a dense matrix */
+} DNformat;
+
+
+
+/*********************************************************
+ * Macros used for easy access of sparse matrix entries. *
+ *********************************************************/
+#define L_SUB_START(col) ( Lstore->rowind_colptr[col] )
+#define L_SUB(ptr) ( Lstore->rowind[ptr] )
+#define L_NZ_START(col) ( Lstore->nzval_colptr[col] )
+#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] )
+#define U_NZ_START(col) ( Ustore->colptr[col] )
+#define U_SUB(ptr) ( Ustore->rowind[ptr] )
+
+
+#endif /* __SUPERLU_SUPERMATRIX */
diff --git a/intern/opennl/superlu/sutil.c b/intern/opennl/superlu/sutil.c
new file mode 100644
index 00000000000..4689f34968a
--- /dev/null
+++ b/intern/opennl/superlu/sutil.c
@@ -0,0 +1,478 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include "ssp_defs.h"
+
+void
+sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+ float *nzval, int *rowind, int *colptr,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ NCformat *Astore;
+
+ A->Stype = stype;
+ A->Dtype = dtype;
+ A->Mtype = mtype;
+ A->nrow = m;
+ A->ncol = n;
+ A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
+ if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
+ Astore = A->Store;
+ Astore->nnz = nnz;
+ Astore->nzval = nzval;
+ Astore->rowind = rowind;
+ Astore->colptr = colptr;
+}
+
+void
+sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
+ float *nzval, int *colind, int *rowptr,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ NRformat *Astore;
+
+ A->Stype = stype;
+ A->Dtype = dtype;
+ A->Mtype = mtype;
+ A->nrow = m;
+ A->ncol = n;
+ A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
+ if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
+ Astore = A->Store;
+ Astore->nnz = nnz;
+ Astore->nzval = nzval;
+ Astore->colind = colind;
+ Astore->rowptr = rowptr;
+}
+
+/* Copy matrix A into matrix B. */
+void
+sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
+{
+ NCformat *Astore, *Bstore;
+ int ncol, nnz, i;
+
+ B->Stype = A->Stype;
+ B->Dtype = A->Dtype;
+ B->Mtype = A->Mtype;
+ B->nrow = A->nrow;;
+ B->ncol = ncol = A->ncol;
+ Astore = (NCformat *) A->Store;
+ Bstore = (NCformat *) B->Store;
+ Bstore->nnz = nnz = Astore->nnz;
+ for (i = 0; i < nnz; ++i)
+ ((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
+ for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
+ for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
+}
+
+
+void
+sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ DNformat *Xstore;
+
+ X->Stype = stype;
+ X->Dtype = dtype;
+ X->Mtype = mtype;
+ X->nrow = m;
+ X->ncol = n;
+ X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
+ if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
+ Xstore = (DNformat *) X->Store;
+ Xstore->lda = ldx;
+ Xstore->nzval = (float *) x;
+}
+
+void
+sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
+ float *Y, int ldy)
+{
+/*
+ *
+ * Purpose
+ * =======
+ *
+ * Copies a two-dimensional matrix X to another matrix Y.
+ */
+ int i, j;
+
+ for (j = 0; j < N; ++j)
+ for (i = 0; i < M; ++i)
+ Y[i + j*ldy] = X[i + j*ldx];
+}
+
+void
+sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
+ float *nzval, int *nzval_colptr, int *rowind,
+ int *rowind_colptr, int *col_to_sup, int *sup_to_col,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ SCformat *Lstore;
+
+ L->Stype = stype;
+ L->Dtype = dtype;
+ L->Mtype = mtype;
+ L->nrow = m;
+ L->ncol = n;
+ L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
+ if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
+ Lstore = L->Store;
+ Lstore->nnz = nnz;
+ Lstore->nsuper = col_to_sup[n];
+ Lstore->nzval = nzval;
+ Lstore->nzval_colptr = nzval_colptr;
+ Lstore->rowind = rowind;
+ Lstore->rowind_colptr = rowind_colptr;
+ Lstore->col_to_sup = col_to_sup;
+ Lstore->sup_to_col = sup_to_col;
+
+}
+
+
+/*
+ * Convert a row compressed storage into a column compressed storage.
+ */
+void
+sCompRow_to_CompCol(int m, int n, int nnz,
+ float *a, int *colind, int *rowptr,
+ float **at, int **rowind, int **colptr)
+{
+ register int i, j, col, relpos;
+ int *marker;
+
+ /* Allocate storage for another copy of the matrix. */
+ *at = (float *) floatMalloc(nnz);
+ *rowind = (int *) intMalloc(nnz);
+ *colptr = (int *) intMalloc(n+1);
+ marker = (int *) intCalloc(n);
+
+ /* Get counts of each column of A, and set up column pointers */
+ for (i = 0; i < m; ++i)
+ for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
+ (*colptr)[0] = 0;
+ for (j = 0; j < n; ++j) {
+ (*colptr)[j+1] = (*colptr)[j] + marker[j];
+ marker[j] = (*colptr)[j];
+ }
+
+ /* Transfer the matrix into the compressed column storage. */
+ for (i = 0; i < m; ++i) {
+ for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
+ col = colind[j];
+ relpos = marker[col];
+ (*rowind)[relpos] = i;
+ (*at)[relpos] = a[j];
+ ++marker[col];
+ }
+ }
+
+ SUPERLU_FREE(marker);
+}
+
+
+void
+sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
+{
+ NCformat *Astore;
+ register int i,n;
+ float *dp;
+
+ printf("\nCompCol matrix %s:\n", what);
+ printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+ n = A->ncol;
+ Astore = (NCformat *) A->Store;
+ dp = (float *) Astore->nzval;
+ printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
+ printf("nzval: ");
+ for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
+ printf("\nrowind: ");
+ for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]);
+ printf("\ncolptr: ");
+ for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]);
+ printf("\n");
+ fflush(stdout);
+}
+
+void
+sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
+{
+ SCformat *Astore;
+ register int i, j, k, c, d, n, nsup;
+ float *dp;
+ int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
+
+ printf("\nSuperNode matrix %s:\n", what);
+ printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+ n = A->ncol;
+ Astore = (SCformat *) A->Store;
+ dp = (float *) Astore->nzval;
+ col_to_sup = Astore->col_to_sup;
+ sup_to_col = Astore->sup_to_col;
+ rowind_colptr = Astore->rowind_colptr;
+ rowind = Astore->rowind;
+ printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
+ A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
+ printf("nzval:\n");
+ for (k = 0; k <= Astore->nsuper; ++k) {
+ c = sup_to_col[k];
+ nsup = sup_to_col[k+1] - c;
+ for (j = c; j < c + nsup; ++j) {
+ d = Astore->nzval_colptr[j];
+ for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
+ printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
+ }
+ }
+ }
+#if 0
+ for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]);
+#endif
+ printf("\nnzval_colptr: ");
+ for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]);
+ printf("\nrowind: ");
+ for (i = 0; i < Astore->rowind_colptr[n]; ++i)
+ printf("%d ", Astore->rowind[i]);
+ printf("\nrowind_colptr: ");
+ for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]);
+ printf("\ncol_to_sup: ");
+ for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]);
+ printf("\nsup_to_col: ");
+ for (i = 0; i <= Astore->nsuper+1; ++i)
+ printf("%d ", sup_to_col[i]);
+ printf("\n");
+ fflush(stdout);
+}
+
+void
+sPrint_Dense_Matrix(char *what, SuperMatrix *A)
+{
+ DNformat *Astore;
+ register int i;
+ float *dp;
+
+ printf("\nDense matrix %s:\n", what);
+ printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+ Astore = (DNformat *) A->Store;
+ dp = (float *) Astore->nzval;
+ printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+ printf("\nnzval: ");
+ for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
+ printf("\n");
+ fflush(stdout);
+}
+
+/*
+ * Diagnostic print of column "jcol" in the U/L factor.
+ */
+void
+sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
+{
+ int i, k, fsupc;
+ int *xsup, *supno;
+ int *xlsub, *lsub;
+ float *lusup;
+ int *xlusup;
+ float *ucol;
+ int *usub, *xusub;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ ucol = Glu->ucol;
+ usub = Glu->usub;
+ xusub = Glu->xusub;
+
+ printf("%s", msg);
+ printf("col %d: pivrow %d, supno %d, xprune %d\n",
+ jcol, pivrow, supno[jcol], xprune[jcol]);
+
+ printf("\tU-col:\n");
+ for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
+ printf("\t%d%10.4f\n", usub[i], ucol[i]);
+ printf("\tL-col in rectangular snode:\n");
+ fsupc = xsup[supno[jcol]]; /* first col of the snode */
+ i = xlsub[fsupc];
+ k = xlusup[jcol];
+ while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
+ printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
+ i++; k++;
+ }
+ fflush(stdout);
+}
+
+
+/*
+ * Check whether tempv[] == 0. This should be true before and after
+ * calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
+ */
+void scheck_tempv(int n, float *tempv)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (tempv[i] != 0.0)
+ {
+ fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
+ ABORT("scheck_tempv");
+ }
+ }
+}
+
+
+void
+sGenXtrue(int n, int nrhs, float *x, int ldx)
+{
+ int i, j;
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < n; ++i) {
+ x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
+ }
+}
+
+/*
+ * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
+ */
+void
+sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
+ SuperMatrix *A, SuperMatrix *B)
+{
+ NCformat *Astore;
+ float *Aval;
+ DNformat *Bstore;
+ float *rhs;
+ float one = 1.0;
+ float zero = 0.0;
+ int ldc;
+ char transc[1];
+
+ Astore = A->Store;
+ Aval = (float *) Astore->nzval;
+ Bstore = B->Store;
+ rhs = Bstore->nzval;
+ ldc = Bstore->lda;
+
+ if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
+ else *(unsigned char *)transc = 'T';
+
+ sp_sgemm(transc, nrhs, one, A,
+ x, ldx, zero, rhs, ldc);
+
+}
+
+/*
+ * Fills a float precision array with a given value.
+ */
+void
+sfill(float *a, int alen, float dval)
+{
+ register int i;
+ for (i = 0; i < alen; i++) a[i] = dval;
+}
+
+
+
+/*
+ * Check the inf-norm of the error vector
+ */
+void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
+{
+ DNformat *Xstore;
+ float err, xnorm;
+ float *Xmat, *soln_work;
+ int i, j;
+
+ Xstore = X->Store;
+ Xmat = Xstore->nzval;
+
+ for (j = 0; j < nrhs; j++) {
+ soln_work = &Xmat[j*Xstore->lda];
+ err = xnorm = 0.0;
+ for (i = 0; i < X->nrow; i++) {
+ err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
+ xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
+ }
+ err = err / xnorm;
+ printf("||X - Xtrue||/||X|| = %e\n", err);
+ }
+}
+
+
+
+/* Print performance of the code. */
+void
+sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
+ float rpg, float rcond, float *ferr,
+ float *berr, char *equed, SuperLUStat_t *stat)
+{
+ SCformat *Lstore;
+ NCformat *Ustore;
+ double *utime;
+ flops_t *ops;
+
+ utime = stat->utime;
+ ops = stat->ops;
+
+ if ( utime[FACT] != 0. )
+ printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
+ ops[FACT]*1e-6/utime[FACT]);
+ printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]);
+ if ( utime[SOLVE] != 0. )
+ printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
+ ops[SOLVE]*1e-6/utime[SOLVE]);
+
+ Lstore = (SCformat *) L->Store;
+ Ustore = (NCformat *) U->Store;
+ printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
+ printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
+ printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
+
+ printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
+ mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
+ mem_usage->expansions);
+
+ printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
+ printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
+ utime[FACT], ops[FACT]*1e-6/utime[FACT],
+ utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
+ utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
+
+ printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
+ printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
+ rpg, rcond, ferr[0], berr[0], equed);
+
+}
+
+
+
+
+int print_float_vec(char *what, int n, float *vec)
+{
+ int i;
+ printf("%s: n %d\n", what, n);
+ for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
+ return 0;
+}
+
diff --git a/intern/opennl/superlu/util.c b/intern/opennl/superlu/util.c
new file mode 100644
index 00000000000..3c49d714d1c
--- /dev/null
+++ b/intern/opennl/superlu/util.c
@@ -0,0 +1,391 @@
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include "ssp_defs.h"
+#include "util.h"
+
+/*
+ * Global statistics variale
+ */
+
+void superlu_abort_and_exit(char* msg)
+{
+ fprintf(stderr, msg);
+ exit (-1);
+}
+
+/*
+ * Set the default values for the options argument.
+ */
+void set_default_options(superlu_options_t *options)
+{
+ options->Fact = DOFACT;
+ options->Equil = YES;
+ options->ColPerm = COLAMD;
+ options->DiagPivotThresh = 1.0;
+ options->Trans = NOTRANS;
+ options->IterRefine = NOREFINE;
+ options->SymmetricMode = NO;
+ options->PivotGrowth = NO;
+ options->ConditionNumber = NO;
+ options->PrintStat = YES;
+}
+
+/* Deallocate the structure pointing to the actual storage of the matrix. */
+void
+Destroy_SuperMatrix_Store(SuperMatrix *A)
+{
+ SUPERLU_FREE ( A->Store );
+}
+
+void
+Destroy_CompCol_Matrix(SuperMatrix *A)
+{
+ SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
+ SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
+ SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
+ SUPERLU_FREE( A->Store );
+}
+
+void
+Destroy_CompRow_Matrix(SuperMatrix *A)
+{
+ SUPERLU_FREE( ((NRformat *)A->Store)->colind );
+ SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
+ SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
+ SUPERLU_FREE( A->Store );
+}
+
+void
+Destroy_SuperNode_Matrix(SuperMatrix *A)
+{
+ SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
+ SUPERLU_FREE ( A->Store );
+}
+
+/* A is of type Stype==NCP */
+void
+Destroy_CompCol_Permuted(SuperMatrix *A)
+{
+ SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
+ SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
+ SUPERLU_FREE ( A->Store );
+}
+
+/* A is of type Stype==DN */
+void
+Destroy_Dense_Matrix(SuperMatrix *A)
+{
+ DNformat* Astore = A->Store;
+ SUPERLU_FREE (Astore->nzval);
+ SUPERLU_FREE ( A->Store );
+}
+
+/*
+ * Reset repfnz[] for the current column
+ */
+void
+resetrep_col (const int nseg, const int *segrep, int *repfnz)
+{
+ int i, irep;
+
+ for (i = 0; i < nseg; i++) {
+ irep = segrep[i];
+ repfnz[irep] = EMPTY;
+ }
+}
+
+
+/*
+ * Count the total number of nonzeros in factors L and U, and in the
+ * symmetrically reduced L.
+ */
+void
+countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
+{
+ int nsuper, fsupc, i, j;
+ int nnzL0, jlen, irep;
+ int *xsup, *xlsub;
+
+ xsup = Glu->xsup;
+ xlsub = Glu->xlsub;
+ *nnzL = 0;
+ *nnzU = (Glu->xusub)[n];
+ nnzL0 = 0;
+ nsuper = (Glu->supno)[n];
+
+ if ( n <= 0 ) return;
+
+ /*
+ * For each supernode
+ */
+ for (i = 0; i <= nsuper; i++) {
+ fsupc = xsup[i];
+ jlen = xlsub[fsupc+1] - xlsub[fsupc];
+
+ for (j = fsupc; j < xsup[i+1]; j++) {
+ *nnzL += jlen;
+ *nnzU += j - fsupc + 1;
+ jlen--;
+ }
+ irep = xsup[i+1] - 1;
+ nnzL0 += xprune[irep] - xlsub[irep];
+ }
+
+ /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
+}
+
+
+
+/*
+ * Fix up the data storage lsub for L-subscripts. It removes the subscript
+ * sets for structural pruning, and applies permuation to the remaining
+ * subscripts.
+ */
+void
+fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
+{
+ register int nsuper, fsupc, nextl, i, j, k, jstrt;
+ int *xsup, *lsub, *xlsub;
+
+ if ( n <= 1 ) return;
+
+ xsup = Glu->xsup;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ nextl = 0;
+ nsuper = (Glu->supno)[n];
+
+ /*
+ * For each supernode ...
+ */
+ for (i = 0; i <= nsuper; i++) {
+ fsupc = xsup[i];
+ jstrt = xlsub[fsupc];
+ xlsub[fsupc] = nextl;
+ for (j = jstrt; j < xlsub[fsupc+1]; j++) {
+ lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
+ nextl++;
+ }
+ for (k = fsupc+1; k < xsup[i+1]; k++)
+ xlsub[k] = nextl; /* Other columns in supernode i */
+
+ }
+
+ xlsub[n] = nextl;
+}
+
+
+/*
+ * Diagnostic print of segment info after panel_dfs().
+ */
+void print_panel_seg(int n, int w, int jcol, int nseg,
+ int *segrep, int *repfnz)
+{
+ int j, k;
+
+ for (j = jcol; j < jcol+w; j++) {
+ printf("\tcol %d:\n", j);
+ for (k = 0; k < nseg; k++)
+ printf("\t\tseg %d, segrep %d, repfnz %d\n", k,
+ segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
+ }
+
+}
+
+
+void
+StatInit(SuperLUStat_t *stat)
+{
+ register int i, w, panel_size, relax;
+
+ panel_size = sp_ienv(1);
+ relax = sp_ienv(2);
+ w = SUPERLU_MAX(panel_size, relax);
+ stat->panel_histo = intCalloc(w+1);
+ stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
+ if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
+ stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
+ if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
+ for (i = 0; i < NPHASES; ++i) {
+ stat->utime[i] = 0.;
+ stat->ops[i] = 0.;
+ }
+}
+
+
+void
+StatPrint(SuperLUStat_t *stat)
+{
+ double *utime;
+ flops_t *ops;
+
+ utime = stat->utime;
+ ops = stat->ops;
+ printf("Factor time = %8.2f\n", utime[FACT]);
+ if ( utime[FACT] != 0.0 )
+ printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
+ ops[FACT]*1e-6/utime[FACT]);
+
+ printf("Solve time = %8.2f\n", utime[SOLVE]);
+ if ( utime[SOLVE] != 0.0 )
+ printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
+ ops[SOLVE]*1e-6/utime[SOLVE]);
+
+}
+
+
+void
+StatFree(SuperLUStat_t *stat)
+{
+ SUPERLU_FREE(stat->panel_histo);
+ SUPERLU_FREE(stat->utime);
+ SUPERLU_FREE(stat->ops);
+}
+
+
+flops_t
+LUFactFlops(SuperLUStat_t *stat)
+{
+ return (stat->ops[FACT]);
+}
+
+flops_t
+LUSolveFlops(SuperLUStat_t *stat)
+{
+ return (stat->ops[SOLVE]);
+}
+
+
+
+
+
+/*
+ * Fills an integer array with a given value.
+ */
+void ifill(int *a, int alen, int ival)
+{
+ register int i;
+ for (i = 0; i < alen; i++) a[i] = ival;
+}
+
+
+
+/*
+ * Get the statistics of the supernodes
+ */
+#define NBUCKS 10
+static int max_sup_size;
+
+void super_stats(int nsuper, int *xsup)
+{
+ register int nsup1 = 0;
+ int i, isize, whichb, bl, bh;
+ int bucket[NBUCKS];
+
+ max_sup_size = 0;
+
+ for (i = 0; i <= nsuper; i++) {
+ isize = xsup[i+1] - xsup[i];
+ if ( isize == 1 ) nsup1++;
+ if ( max_sup_size < isize ) max_sup_size = isize;
+ }
+
+ printf(" Supernode statistics:\n\tno of super = %d\n", nsuper+1);
+ printf("\tmax supernode size = %d\n", max_sup_size);
+ printf("\tno of size 1 supernodes = %d\n", nsup1);
+
+ /* Histogram of the supernode sizes */
+ ifill (bucket, NBUCKS, 0);
+
+ for (i = 0; i <= nsuper; i++) {
+ isize = xsup[i+1] - xsup[i];
+ whichb = (float) isize / max_sup_size * NBUCKS;
+ if (whichb >= NBUCKS) whichb = NBUCKS - 1;
+ bucket[whichb]++;
+ }
+
+ printf("\tHistogram of supernode sizes:\n");
+ for (i = 0; i < NBUCKS; i++) {
+ bl = (float) i * max_sup_size / NBUCKS;
+ bh = (float) (i+1) * max_sup_size / NBUCKS;
+ printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
+ }
+
+}
+
+
+float SpaSize(int n, int np, float sum_npw)
+{
+ return (sum_npw*8 + np*8 + n*4)/1024.;
+}
+
+float DenseSize(int n, float sum_nw)
+{
+ return (sum_nw*8 + n*8)/1024.;;
+}
+
+
+
+/*
+ * Check whether repfnz[] == EMPTY after reset.
+ */
+void check_repfnz(int n, int w, int jcol, int *repfnz)
+{
+ int jj, k;
+
+ for (jj = jcol; jj < jcol+w; jj++)
+ for (k = 0; k < n; k++)
+ if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
+ fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
+ k, repfnz[(jj-jcol)*n + k]);
+ ABORT("check_repfnz");
+ }
+}
+
+
+/* Print a summary of the testing results. */
+void
+PrintSumm(char *type, int nfail, int nrun, int nerrs)
+{
+ if ( nfail > 0 )
+ printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
+ type, nfail, nrun);
+ else
+ printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun);
+
+ if ( nerrs > 0 )
+ printf("%6d error messages recorded\n", nerrs);
+}
+
+
+int print_int_vec(char *what, int n, int *vec)
+{
+ int i;
+ printf("%s\n", what);
+ for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
+ return 0;
+}
diff --git a/intern/opennl/superlu/util.h b/intern/opennl/superlu/util.h
new file mode 100644
index 00000000000..1a3526d4e7e
--- /dev/null
+++ b/intern/opennl/superlu/util.h
@@ -0,0 +1,267 @@
+#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
+#define __SUPERLU_UTIL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/*
+#ifndef __STDC__
+#include <malloc.h>
+#endif
+*/
+#include <assert.h>
+
+/***********************************************************************
+ * Macros
+ ***********************************************************************/
+#define FIRSTCOL_OF_SNODE(i) (xsup[i])
+/* No of marker arrays used in the symbolic factorization,
+ each of size n */
+#define NO_MARKER 3
+#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) )
+
+#ifndef USER_ABORT
+#define USER_ABORT(msg) superlu_abort_and_exit(msg)
+#endif
+
+#define ABORT(err_msg) \
+ { char msg[256];\
+ sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
+ USER_ABORT(msg); }
+
+
+#ifndef USER_MALLOC
+#if 1
+#define USER_MALLOC(size) superlu_malloc(size)
+#else
+/* The following may check out some uninitialized data */
+#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
+#endif
+#endif
+
+#define SUPERLU_MALLOC(size) USER_MALLOC(size)
+
+#ifndef USER_FREE
+#define USER_FREE(addr) superlu_free(addr)
+#endif
+
+#define SUPERLU_FREE(addr) USER_FREE(addr)
+
+#define CHECK_MALLOC(where) { \
+ extern int superlu_malloc_total; \
+ printf("%s: malloc_total %d Bytes\n", \
+ where, superlu_malloc_total); \
+}
+
+#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) )
+#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) )
+
+/***********************************************************************
+ * Constants
+ ***********************************************************************/
+#define EMPTY (-1)
+/*#define NO (-1)*/
+#define FALSE 0
+#define TRUE 1
+
+/***********************************************************************
+ * Enumerate types
+ ***********************************************************************/
+typedef enum {NO, YES} yes_no_t;
+typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;
+typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t;
+typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t;
+typedef enum {NOTRANS, TRANS, CONJ} trans_t;
+typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t;
+typedef enum {NOREFINE, SINGLE=1, SLU_DOUBLE, EXTRA} IterRefine_t;
+typedef enum {LUSUP, UCOL, LSUB, USUB} MemType;
+typedef enum {HEAD, TAIL} stack_end_t;
+typedef enum {SYSTEM, USER} LU_space_t;
+
+/*
+ * The following enumerate type is used by the statistics variable
+ * to keep track of flop count and time spent at various stages.
+ *
+ * Note that not all of the fields are disjoint.
+ */
+typedef enum {
+ COLPERM, /* find a column ordering that minimizes fills */
+ RELAX, /* find artificial supernodes */
+ ETREE, /* compute column etree */
+ EQUIL, /* equilibrate the original matrix */
+ FACT, /* perform LU factorization */
+ RCOND, /* estimate reciprocal condition number */
+ SOLVE, /* forward and back solves */
+ REFINE, /* perform iterative refinement */
+ SLU_FLOAT, /* time spent in floating-point operations */
+ TRSV, /* fraction of FACT spent in xTRSV */
+ GEMV, /* fraction of FACT spent in xGEMV */
+ FERR, /* estimate error bounds after iterative refinement */
+ NPHASES /* total number of phases */
+} PhaseType;
+
+
+/***********************************************************************
+ * Type definitions
+ ***********************************************************************/
+typedef float flops_t;
+typedef unsigned char Logical;
+
+/*
+ *-- This contains the options used to control the solve process.
+ *
+ * Fact (fact_t)
+ * Specifies whether or not the factored form of the matrix
+ * A is supplied on entry, and if not, how the matrix A should
+ * be factorizaed.
+ * = DOFACT: The matrix A will be factorized from scratch, and the
+ * factors will be stored in L and U.
+ * = SamePattern: The matrix A will be factorized assuming
+ * that a factorization of a matrix with the same sparsity
+ * pattern was performed prior to this one. Therefore, this
+ * factorization will reuse column permutation vector
+ * ScalePermstruct->perm_c and the column elimination tree
+ * LUstruct->etree.
+ * = SamePattern_SameRowPerm: The matrix A will be factorized
+ * assuming that a factorization of a matrix with the same
+ * sparsity pattern and similar numerical values was performed
+ * prior to this one. Therefore, this factorization will reuse
+ * both row and column scaling factors R and C, and the
+ * both row and column permutation vectors perm_r and perm_c,
+ * distributed data structure set up from the previous symbolic
+ * factorization.
+ * = FACTORED: On entry, L, U, perm_r and perm_c contain the
+ * factored form of A. If DiagScale is not NOEQUIL, the matrix
+ * A has been equilibrated with scaling factors R and C.
+ *
+ * Equil (yes_no_t)
+ * Specifies whether to equilibrate the system (scale A's row and
+ * columns to have unit norm).
+ *
+ * ColPerm (colperm_t)
+ * Specifies what type of column permutation to use to reduce fill.
+ * = NATURAL: use the natural ordering
+ * = MMD_ATA: use minimum degree ordering on structure of A'*A
+ * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
+ * = COLAMD: use approximate minimum degree column ordering
+ * = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[]
+ *
+ * Trans (trans_t)
+ * Specifies the form of the system of equations:
+ * = NOTRANS: A * X = B (No transpose)
+ * = TRANS: A**T * X = B (Transpose)
+ * = CONJ: A**H * X = B (Transpose)
+ *
+ * IterRefine (IterRefine_t)
+ * Specifies whether to perform iterative refinement.
+ * = NO: no iterative refinement
+ * = WorkingPrec: perform iterative refinement in working precision
+ * = ExtraPrec: perform iterative refinement in extra precision
+ *
+ * PrintStat (yes_no_t)
+ * Specifies whether to print the solver's statistics.
+ *
+ * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
+ * Specifies the threshold used for a diagonal entry to be an
+ * acceptable pivot.
+ *
+ * PivotGrowth (yes_no_t)
+ * Specifies whether to compute the reciprocal pivot growth.
+ *
+ * ConditionNumber (ues_no_t)
+ * Specifies whether to compute the reciprocal condition number.
+ *
+ * RowPerm (rowperm_t) (only for SuperLU_DIST)
+ * Specifies whether to permute rows of the original matrix.
+ * = NO: not to permute the rows
+ * = LargeDiag: make the diagonal large relative to the off-diagonal
+ * = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[]
+ *
+ * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
+ * Specifies whether to replace the tiny diagonals by
+ * sqrt(epsilon)*||A|| during LU factorization.
+ *
+ * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
+ * Specifies whether the initialization has been performed to the
+ * triangular solve.
+ *
+ * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
+ * Specifies whether the initialization has been performed to the
+ * sparse matrix-vector multiplication routine needed in iterative
+ * refinement.
+ */
+typedef struct {
+ fact_t Fact;
+ yes_no_t Equil;
+ colperm_t ColPerm;
+ trans_t Trans;
+ IterRefine_t IterRefine;
+ yes_no_t PrintStat;
+ yes_no_t SymmetricMode;
+ double DiagPivotThresh;
+ yes_no_t PivotGrowth;
+ yes_no_t ConditionNumber;
+ rowperm_t RowPerm;
+ yes_no_t ReplaceTinyPivot;
+ yes_no_t SolveInitialized;
+ yes_no_t RefineInitialized;
+} superlu_options_t;
+
+typedef struct {
+ int *panel_histo; /* histogram of panel size distribution */
+ double *utime; /* running time at various phases */
+ flops_t *ops; /* operation count at various phases */
+ int TinyPivots; /* number of tiny pivots */
+ int RefineSteps; /* number of iterative refinement steps */
+} SuperLUStat_t;
+
+
+/***********************************************************************
+ * Prototypes
+ ***********************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void Destroy_SuperMatrix_Store(SuperMatrix *);
+extern void Destroy_CompCol_Matrix(SuperMatrix *);
+extern void Destroy_CompRow_Matrix(SuperMatrix *);
+extern void Destroy_SuperNode_Matrix(SuperMatrix *);
+extern void Destroy_CompCol_Permuted(SuperMatrix *);
+extern void Destroy_Dense_Matrix(SuperMatrix *);
+extern void get_perm_c(int, SuperMatrix *, int *);
+extern void set_default_options(superlu_options_t *options);
+extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
+ SuperMatrix*);
+extern void superlu_abort_and_exit(char*);
+extern void *superlu_malloc (size_t);
+extern int *intMalloc (int);
+extern int *intCalloc (int);
+extern void superlu_free (void*);
+extern void SetIWork (int, int, int, int *, int **, int **, int **,
+ int **, int **, int **, int **);
+extern int sp_coletree (int *, int *, int *, int, int, int *);
+extern void relax_snode (const int, int *, const int, int *, int *);
+extern void heap_relax_snode (const int, int *, const int, int *, int *);
+extern void resetrep_col (const int, const int *, int *);
+extern int spcoletree (int *, int *, int *, int, int, int *);
+extern int *TreePostorder (int, int *);
+extern double SuperLU_timer_ ();
+extern int sp_ienv (int);
+extern int lsame_ (char *, char *);
+extern int xerbla_ (char *, int *);
+extern void ifill (int *, int, int);
+extern void snode_profile (int, int *);
+extern void super_stats (int, int *);
+extern void PrintSumm (char *, int, int, int);
+extern void StatInit(SuperLUStat_t *);
+extern void StatPrint (SuperLUStat_t *);
+extern void StatFree(SuperLUStat_t *);
+extern void print_panel_seg(int, int, int, int, int *, int *);
+extern void check_repfnz(int, int, int, int *);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif /* __SUPERLU_UTIL */
diff --git a/intern/opennl/superlu/xerbla.c b/intern/opennl/superlu/xerbla.c
new file mode 100644
index 00000000000..cb94fa71d95
--- /dev/null
+++ b/intern/opennl/superlu/xerbla.c
@@ -0,0 +1,43 @@
+
+#include <stdio.h>
+
+/* Subroutine */ int xerbla_(char *srname, int *info)
+{
+/* -- LAPACK auxiliary routine (version 2.0) --
+ Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+ Courant Institute, Argonne National Lab, and Rice University
+ September 30, 1994
+
+
+ Purpose
+ =======
+
+ XERBLA is an error handler for the LAPACK routines.
+ It is called by an LAPACK routine if an input parameter has an
+ invalid value. A message is printed and execution stops.
+
+ Installers may consider modifying the STOP statement in order to
+ call system-specific exception-handling facilities.
+
+ Arguments
+ =========
+
+ SRNAME (input) CHARACTER*6
+ The name of the routine which called XERBLA.
+
+ INFO (input) INT
+ The position of the invalid parameter in the parameter list
+
+ of the calling routine.
+
+ =====================================================================
+*/
+
+ printf("** On entry to %6s, parameter number %2d had an illegal value\n",
+ srname, *info);
+
+/* End of XERBLA */
+
+ return 0;
+} /* xerbla_ */
+