Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SConstruct3
-rw-r--r--intern/Makefile2
-rw-r--r--intern/SConscript3
-rw-r--r--intern/opennl/Makefile67
-rw-r--r--intern/opennl/SConscript43
-rw-r--r--intern/opennl/doc/OpenNL_License.txt341
-rw-r--r--intern/opennl/doc/OpenNL_Readme.txt13
-rw-r--r--intern/opennl/doc/SuperLU_License.txt31
-rw-r--r--intern/opennl/doc/SuperLU_Readme.txt52
-rw-r--r--intern/opennl/extern/ONL_opennl.h163
-rw-r--r--intern/opennl/intern/Makefile43
-rw-r--r--intern/opennl/intern/opennl.c1151
-rw-r--r--intern/opennl/superlu/Cnames.h281
-rw-r--r--intern/opennl/superlu/Makefile40
-rw-r--r--intern/opennl/superlu/colamd.c2583
-rw-r--r--intern/opennl/superlu/colamd.h67
-rw-r--r--intern/opennl/superlu/get_perm_c.c453
-rw-r--r--intern/opennl/superlu/heap_relax_snode.c116
-rw-r--r--intern/opennl/superlu/lsame.c70
-rw-r--r--intern/opennl/superlu/memory.c207
-rw-r--r--intern/opennl/superlu/mmd.c1012
-rw-r--r--intern/opennl/superlu/relax_snode.c71
-rw-r--r--intern/opennl/superlu/scolumn_bmod.c353
-rw-r--r--intern/opennl/superlu/scolumn_dfs.c270
-rw-r--r--intern/opennl/superlu/scopy_to_ucol.c105
-rw-r--r--intern/opennl/superlu/sgssv.c221
-rw-r--r--intern/opennl/superlu/sgstrf.c433
-rw-r--r--intern/opennl/superlu/sgstrs.c331
-rw-r--r--intern/opennl/superlu/smemory.c676
-rw-r--r--intern/opennl/superlu/smyblas2.c225
-rw-r--r--intern/opennl/superlu/sp_coletree.c332
-rw-r--r--intern/opennl/superlu/sp_ienv.c65
-rw-r--r--intern/opennl/superlu/sp_preorder.c203
-rw-r--r--intern/opennl/superlu/spanel_bmod.c449
-rw-r--r--intern/opennl/superlu/spanel_dfs.c249
-rw-r--r--intern/opennl/superlu/spivotL.c173
-rw-r--r--intern/opennl/superlu/spruneL.c149
-rw-r--r--intern/opennl/superlu/ssnode_bmod.c117
-rw-r--r--intern/opennl/superlu/ssnode_dfs.c106
-rw-r--r--intern/opennl/superlu/ssp_blas2.c469
-rw-r--r--intern/opennl/superlu/ssp_blas3.c121
-rw-r--r--intern/opennl/superlu/ssp_defs.h234
-rw-r--r--intern/opennl/superlu/strsv.c331
-rw-r--r--intern/opennl/superlu/superlu_timer.c55
-rw-r--r--intern/opennl/superlu/supermatrix.h140
-rw-r--r--intern/opennl/superlu/sutil.c478
-rw-r--r--intern/opennl/superlu/util.c391
-rw-r--r--intern/opennl/superlu/util.h267
-rw-r--r--intern/opennl/superlu/xerbla.c43
-rw-r--r--source/Makefile2
-rw-r--r--source/nan_definitions.mk2
51 files changed, 13799 insertions, 3 deletions
diff --git a/SConstruct b/SConstruct
index 8ff9d4f41eb..b5a6ce231bf 100644
--- a/SConstruct
+++ b/SConstruct
@@ -962,7 +962,8 @@ def blender_libs(env):
'blender_LOD',
'blender_BSP',
'blender_blenkernel',
- 'blender_IK'])
+ 'blender_IK',
+ 'blender_ONL'])
def ketsji_libs(env):
"""
diff --git a/intern/Makefile b/intern/Makefile
index 08ab03150fe..af64e44cdf4 100644
--- a/intern/Makefile
+++ b/intern/Makefile
@@ -35,7 +35,7 @@ SOURCEDIR = intern
# include nan_subdirs.mk
ALLDIRS = string ghost guardedalloc bmfont moto container memutil
-ALLDIRS += decimation iksolver bsp SoundSystem
+ALLDIRS += decimation iksolver bsp SoundSystem opennl
all::
@for i in $(ALLDIRS); do \
diff --git a/intern/SConscript b/intern/SConscript
index 833a0316634..afbcd24b8be 100644
--- a/intern/SConscript
+++ b/intern/SConscript
@@ -8,7 +8,8 @@ SConscript(['SoundSystem/SConscript',
'container/SConscript',
'memutil/SConscript/',
'decimation/SConscript',
- 'iksolver/SConscript'])
+ 'iksolver/SConscript',
+ 'opennl/SConscript'])
NEW_CSG='false'
diff --git a/intern/opennl/Makefile b/intern/opennl/Makefile
new file mode 100644
index 00000000000..8aa0d4f590b
--- /dev/null
+++ b/intern/opennl/Makefile
@@ -0,0 +1,67 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License. See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Hans Lambermont
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl main makefile.
+#
+
+include nan_definitions.mk
+
+LIBNAME = opennl
+LIBNAME_SLU = superlu
+SOURCEDIR = intern/$(LIBNAME)
+SOURCEDIR_SLU = intern/$(LIBNAME_SLU)
+DIR = $(OCGDIR)/$(SOURCEDIR)
+DIR_SLU = $(OCGDIR)/$(SOURCEDIR_SLU)
+DIRS = intern superlu
+
+include nan_subdirs.mk
+
+install: all debug
+ @[ -d $(NAN_OPENNL) ] || mkdir $(NAN_OPENNL)
+ @[ -d $(NAN_OPENNL)/include ] || mkdir $(NAN_OPENNL)/include
+ @[ -d $(NAN_OPENNL)/lib ] || mkdir $(NAN_OPENNL)/lib
+ @[ -d $(NAN_OPENNL)/lib/debug ] || mkdir $(NAN_OPENNL)/lib/debug
+ @../tools/cpifdiff.sh $(DIR)/libopennl.a $(NAN_OPENNL)/lib/
+ @../tools/cpifdiff.sh $(DIR)/debug/libopennl.a $(NAN_OPENNL)/lib/debug/
+ifeq ($(OS),darwin)
+ ranlib $(NAN_OPENNL)/lib/libopennl.a
+ ranlib $(NAN_OPENNL)/lib/debug/libopennl.a
+endif
+ @../tools/cpifdiff.sh extern/*.h $(NAN_OPENNL)/include/
+ @[ -d $(NAN_SUPERLU) ] || mkdir $(NAN_SUPERLU)
+ @[ -d $(NAN_SUPERLU)/lib ] || mkdir $(NAN_SUPERLU)/lib
+ @[ -d $(NAN_SUPERLU)/lib/debug ] || mkdir $(NAN_SUPERLU)/lib/debug
+ @../tools/cpifdiff.sh $(DIR_SLU)/libsuperlu.a $(NAN_SUPERLU)/lib/
+ @../tools/cpifdiff.sh $(DIR_SLU)/debug/libsuperlu.a $(NAN_SUPERLU)/lib/debug/
+ifeq ($(OS),darwin)
+ ranlib $(NAN_SUPERLU)/lib/libsuperlu.a
+ ranlib $(NAN_SUPERLU)/lib/debug/libsuperlu.a
+endif
+
diff --git a/intern/opennl/SConscript b/intern/opennl/SConscript
new file mode 100644
index 00000000000..4e0260c7f33
--- /dev/null
+++ b/intern/opennl/SConscript
@@ -0,0 +1,43 @@
+Import ('user_options_dict')
+Import ('library_env')
+
+opennl_env = library_env.Copy ()
+
+source_files = ['intern/opennl.c',
+ 'superlu/colamd.c',
+ 'superlu/get_perm_c.c',
+ 'superlu/heap_relax_snode.c',
+ 'superlu/lsame.c',
+ 'superlu/memory.c',
+ 'superlu/mmd.c',
+ 'superlu/relax_snode.c',
+ 'superlu/scolumn_bmod.c',
+ 'superlu/scolumn_dfs.c',
+ 'superlu/scopy_to_ucol.c',
+ 'superlu/sgssv.c',
+ 'superlu/sgstrf.c',
+ 'superlu/sgstrs.c',
+ 'superlu/smemory.c',
+ 'superlu/smyblas2.c',
+ 'superlu/sp_coletree.c',
+ 'superlu/sp_ienv.c',
+ 'superlu/sp_preorder.c',
+ 'superlu/spanel_bmod.c',
+ 'superlu/spanel_dfs.c',
+ 'superlu/spivotL.c',
+ 'superlu/spruneL.c',
+ 'superlu/ssnode_bmod.c',
+ 'superlu/ssnode_dfs.c',
+ 'superlu/ssp_blas2.c',
+ 'superlu/ssp_blas3.c',
+ 'superlu/strsv.c',
+ 'superlu/superlu_timer.c',
+ 'superlu/sutil.c',
+ 'superlu/util.c',
+ 'superlu/xerbla.c']
+
+opennl_env.Append (CPPPATH = ['extern',
+ 'superlu'])
+
+opennl_env.Library (target='#'+user_options_dict['BUILD_DIR']+'/lib/blender_ONL', source=source_files)
+
diff --git a/intern/opennl/doc/OpenNL_License.txt b/intern/opennl/doc/OpenNL_License.txt
new file mode 100644
index 00000000000..4e8d97fd526
--- /dev/null
+++ b/intern/opennl/doc/OpenNL_License.txt
@@ -0,0 +1,341 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
diff --git a/intern/opennl/doc/OpenNL_Readme.txt b/intern/opennl/doc/OpenNL_Readme.txt
new file mode 100644
index 00000000000..e6aea3c0286
--- /dev/null
+++ b/intern/opennl/doc/OpenNL_Readme.txt
@@ -0,0 +1,13 @@
+
+This is OpenNL, a library to easily construct and solve sparse linear systems.
+* OpenNL is supplied with a set of iterative solvers (Conjugate gradient,
+ BICGSTAB, GMRes) and preconditioners (Jacobi, SSOR).
+* OpenNL can also use other solvers (SuperLU 3.0 supported as an OpenNL
+ extension)
+
+Note that to be compatible with OpenNL, SuperLU 3.0 needs to be compiled with
+the following flag (see make.inc in SuperLU3.0):
+CDEFS = -DAdd_ (the default is -DAdd__, just remove the second underscore)
+
+OpenNL was modified for Blender to be used only as a wrapper for SuperLU.
+
diff --git a/intern/opennl/doc/SuperLU_License.txt b/intern/opennl/doc/SuperLU_License.txt
new file mode 100644
index 00000000000..f31a01782e2
--- /dev/null
+++ b/intern/opennl/doc/SuperLU_License.txt
@@ -0,0 +1,31 @@
+Copyright (c) 2003, The Regents of the University of California, through
+Lawrence Berkeley National Laboratory (subject to receipt of any required
+approvals from U.S. Dept. of Energy)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification,
+are permitted provided that the following conditions are met:
+
+(1) Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+(2) Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
+Energy nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/intern/opennl/doc/SuperLU_Readme.txt b/intern/opennl/doc/SuperLU_Readme.txt
new file mode 100644
index 00000000000..c1cedd09893
--- /dev/null
+++ b/intern/opennl/doc/SuperLU_Readme.txt
@@ -0,0 +1,52 @@
+ SuperLU (Version 3.0)
+ =====================
+
+Copyright (c) 2003, The Regents of the University of California, through
+Lawrence Berkeley National Laboratory (subject to receipt of any required
+approvals from U.S. Dept. of Energy)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+(1) Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+(2) Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
+Energy nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+SuperLU contains a set of subroutines to solve a sparse linear system
+A*X=B. It uses Gaussian elimination with partial pivoting (GEPP).
+The columns of A may be preordered before factorization; the
+preordering for sparsity is completely separate from the factorization.
+
+SuperLU is implemented in ANSI C, and must be compiled with standard
+ANSI C compilers. It provides functionality for both real and complex
+matrices, in both single and double precision. The file names for the
+single-precision real version start with letter "s" (such as sgstrf.c);
+the file names for the double-precision real version start with letter "d"
+(such as dgstrf.c); the file names for the single-precision complex
+version start with letter "c" (such as cgstrf.c); the file names
+for the double-precision complex version start with letter "z"
+(such as zgstrf.c).
+
+SuperLU was modified for Blender to only include single-precision
+functionality.
+
diff --git a/intern/opennl/extern/ONL_opennl.h b/intern/opennl/extern/ONL_opennl.h
new file mode 100644
index 00000000000..5e4bd24313c
--- /dev/null
+++ b/intern/opennl/extern/ONL_opennl.h
@@ -0,0 +1,163 @@
+/*
+ * $Id$
+ *
+ * OpenNL: Numerical Library
+ * Copyright (C) 2004 Bruno Levy
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * If you modify this software, you should include a notice giving the
+ * name of the person performing the modification, the date of modification,
+ * and the reason for such modification.
+ *
+ * Contact: Bruno Levy
+ *
+ * levy@loria.fr
+ *
+ * ISA Project
+ * LORIA, INRIA Lorraine,
+ * Campus Scientifique, BP 239
+ * 54506 VANDOEUVRE LES NANCY CEDEX
+ * FRANCE
+ *
+ * Note that the GNU General Public License does not permit incorporating
+ * the Software into proprietary programs.
+ */
+
+/*
+#define NL_DEBUG
+#define NL_PARANOID
+*/
+
+#define NL_USE_SUPERLU
+
+#ifndef nlOPENNL_H
+#define nlOPENNL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NL_VERSION_0_0 1
+
+/*
+ *
+ * Datatypes
+ *
+ */
+
+typedef unsigned int NLenum;
+typedef unsigned char NLboolean;
+typedef unsigned int NLbitfield;
+typedef void NLvoid;
+typedef signed char NLbyte; /* 1-byte signed */
+typedef short NLshort; /* 2-byte signed */
+typedef int NLint; /* 4-byte signed */
+typedef unsigned char NLubyte; /* 1-byte unsigned */
+typedef unsigned short NLushort; /* 2-byte unsigned */
+typedef unsigned int NLuint; /* 4-byte unsigned */
+typedef int NLsizei; /* 4-byte signed */
+typedef float NLfloat; /* single precision float */
+typedef double NLdouble; /* double precision float */
+
+typedef void* NLContext ;
+
+/*
+ *
+ * Constants
+ *
+ */
+
+#define NL_FALSE 0x0
+#define NL_TRUE 0x1
+
+/* Primitives */
+
+#define NL_SYSTEM 0x0
+#define NL_MATRIX 0x1
+#define NL_ROW 0x2
+
+/* Solver Parameters */
+
+#define NL_SOLVER 0x100
+#define NL_NB_VARIABLES 0x101
+#define NL_LEAST_SQUARES 0x102
+#define NL_SYMMETRIC 0x106
+#define NL_ERROR 0x108
+
+/* Enable / Disable */
+
+#define NL_NORMALIZE_ROWS 0x400
+
+/* Row parameters */
+
+#define NL_RIGHT_HAND_SIDE 0x500
+#define NL_ROW_SCALING 0x501
+
+/*
+ * Contexts
+ */
+ NLContext nlNewContext() ;
+ void nlDeleteContext(NLContext context) ;
+ void nlMakeCurrent(NLContext context) ;
+ NLContext nlGetCurrent() ;
+
+/*
+ * State set/get
+ */
+
+ void nlSolverParameterf(NLenum pname, NLfloat param) ;
+ void nlSolverParameteri(NLenum pname, NLint param) ;
+
+ void nlRowParameterf(NLenum pname, NLfloat param) ;
+ void nlRowParameteri(NLenum pname, NLint param) ;
+
+ void nlGetBooleanv(NLenum pname, NLboolean* params) ;
+ void nlGetFloatv(NLenum pname, NLfloat* params) ;
+ void nlGetIntergerv(NLenum pname, NLint* params) ;
+
+ void nlEnable(NLenum pname) ;
+ void nlDisable(NLenum pname) ;
+ NLboolean nlIsEnabled(NLenum pname) ;
+
+/*
+ * Variables
+ */
+ void nlSetVariable(NLuint index, NLfloat value) ;
+ NLfloat nlGetVariable(NLuint index) ;
+ void nlLockVariable(NLuint index) ;
+ void nlUnlockVariable(NLuint index) ;
+ NLboolean nlVariableIsLocked(NLuint index) ;
+
+/*
+ * Begin/End
+ */
+
+ void nlBegin(NLenum primitive) ;
+ void nlEnd(NLenum primitive) ;
+ void nlCoefficient(NLuint index, NLfloat value) ;
+
+/*
+ * Solve
+ */
+
+ NLboolean nlSolve() ;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/intern/opennl/intern/Makefile b/intern/opennl/intern/Makefile
new file mode 100644
index 00000000000..2e57905d931
--- /dev/null
+++ b/intern/opennl/intern/Makefile
@@ -0,0 +1,43 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License. See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): none yet.
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl intern Makefile
+#
+
+LIBNAME = opennl
+DIR = $(OCGDIR)/intern/$(LIBNAME)
+
+include nan_compile.mk
+
+CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
+
+CPPFLAGS += -I../superlu -I../extern
+
+
diff --git a/intern/opennl/intern/opennl.c b/intern/opennl/intern/opennl.c
new file mode 100644
index 00000000000..be797223f51
--- /dev/null
+++ b/intern/opennl/intern/opennl.c
@@ -0,0 +1,1151 @@
+/*
+ * $Id$
+ *
+ * OpenNL: Numerical Library
+ * Copyright (C) 2004 Bruno Levy
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * If you modify this software, you should include a notice giving the
+ * name of the person performing the modification, the date of modification,
+ * and the reason for such modification.
+ *
+ * Contact: Bruno Levy
+ *
+ * levy@loria.fr
+ *
+ * ISA Project
+ * LORIA, INRIA Lorraine,
+ * Campus Scientifique, BP 239
+ * 54506 VANDOEUVRE LES NANCY CEDEX
+ * FRANCE
+ *
+ * Note that the GNU General Public License does not permit incorporating
+ * the Software into proprietary programs.
+ */
+
+#include "ONL_opennl.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#ifdef NL_PARANOID
+#ifndef NL_DEBUG
+#define NL_DEBUG
+#endif
+#endif
+
+/* SuperLU includes */
+#include <ssp_defs.h>
+#include <util.h>
+
+/************************************************************************************/
+/* Assertions */
+
+
+static void __nl_assertion_failed(char* cond, char* file, int line) {
+ fprintf(
+ stderr,
+ "OpenNL assertion failed: %s, file:%s, line:%d\n",
+ cond,file,line
+ ) ;
+ abort() ;
+}
+
+static void __nl_range_assertion_failed(
+ float x, float min_val, float max_val, char* file, int line
+) {
+ fprintf(
+ stderr,
+ "OpenNL range assertion failed: %f in [ %f ... %f ], file:%s, line:%d\n",
+ x, min_val, max_val, file,line
+ ) ;
+ abort() ;
+}
+
+static void __nl_should_not_have_reached(char* file, int line) {
+ fprintf(
+ stderr,
+ "OpenNL should not have reached this point: file:%s, line:%d\n",
+ file,line
+ ) ;
+ abort() ;
+}
+
+
+#define __nl_assert(x) { \
+ if(!(x)) { \
+ __nl_assertion_failed(#x,__FILE__, __LINE__) ; \
+ } \
+}
+
+#define __nl_range_assert(x,min_val,max_val) { \
+ if(((x) < (min_val)) || ((x) > (max_val))) { \
+ __nl_range_assertion_failed(x, min_val, max_val, \
+ __FILE__, __LINE__ \
+ ) ; \
+ } \
+}
+
+#define __nl_assert_not_reached { \
+ __nl_should_not_have_reached(__FILE__, __LINE__) ; \
+}
+
+#ifdef NL_DEBUG
+#define __nl_debug_assert(x) __nl_assert(x)
+#define __nl_debug_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
+#else
+#define __nl_debug_assert(x)
+#define __nl_debug_range_assert(x,min_val,max_val)
+#endif
+
+#ifdef NL_PARANOID
+#define __nl_parano_assert(x) __nl_assert(x)
+#define __nl_parano_range_assert(x,min_val,max_val) __nl_range_assert(x,min_val,max_val)
+#else
+#define __nl_parano_assert(x)
+#define __nl_parano_range_assert(x,min_val,max_val)
+#endif
+
+/************************************************************************************/
+/* classic macros */
+
+#ifndef MIN
+#define MIN(x,y) (((x) < (y)) ? (x) : (y))
+#endif
+
+#ifndef MAX
+#define MAX(x,y) (((x) > (y)) ? (x) : (y))
+#endif
+
+/************************************************************************************/
+/* memory management */
+
+#define __NL_NEW(T) (T*)(calloc(1, sizeof(T)))
+#define __NL_NEW_ARRAY(T,NB) (T*)(calloc((NB),sizeof(T)))
+#define __NL_RENEW_ARRAY(T,x,NB) (T*)(realloc(x,(NB)*sizeof(T)))
+#define __NL_DELETE(x) free(x); x = NULL
+#define __NL_DELETE_ARRAY(x) free(x); x = NULL
+
+#define __NL_CLEAR(T, x) memset(x, 0, sizeof(T))
+#define __NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (NB)*sizeof(T))
+
+/************************************************************************************/
+/* Dynamic arrays for sparse row/columns */
+
+typedef struct {
+ NLuint index ;
+ NLfloat value ;
+} __NLCoeff ;
+
+typedef struct {
+ NLuint size ;
+ NLuint capacity ;
+ __NLCoeff* coeff ;
+} __NLRowColumn ;
+
+static void __nlRowColumnConstruct(__NLRowColumn* c) {
+ c->size = 0 ;
+ c->capacity = 0 ;
+ c->coeff = NULL ;
+}
+
+static void __nlRowColumnDestroy(__NLRowColumn* c) {
+ __NL_DELETE_ARRAY(c->coeff) ;
+#ifdef NL_PARANOID
+ __NL_CLEAR(__NLRowColumn, c) ;
+#endif
+}
+
+static void __nlRowColumnGrow(__NLRowColumn* c) {
+ if(c->capacity != 0) {
+ c->capacity = 2 * c->capacity ;
+ c->coeff = __NL_RENEW_ARRAY(__NLCoeff, c->coeff, c->capacity) ;
+ } else {
+ c->capacity = 4 ;
+ c->coeff = __NL_NEW_ARRAY(__NLCoeff, c->capacity) ;
+ }
+}
+
+static void __nlRowColumnAdd(__NLRowColumn* c, NLint index, NLfloat value) {
+ NLuint i ;
+ for(i=0; i<c->size; i++) {
+ if(c->coeff[i].index == (NLuint)index) {
+ c->coeff[i].value += value ;
+ return ;
+ }
+ }
+ if(c->size == c->capacity) {
+ __nlRowColumnGrow(c) ;
+ }
+ c->coeff[c->size].index = index ;
+ c->coeff[c->size].value = value ;
+ c->size++ ;
+}
+
+/* Does not check whether the index already exists */
+static void __nlRowColumnAppend(__NLRowColumn* c, NLint index, NLfloat value) {
+ if(c->size == c->capacity) {
+ __nlRowColumnGrow(c) ;
+ }
+ c->coeff[c->size].index = index ;
+ c->coeff[c->size].value = value ;
+ c->size++ ;
+}
+
+static void __nlRowColumnZero(__NLRowColumn* c) {
+ c->size = 0 ;
+}
+
+static void __nlRowColumnClear(__NLRowColumn* c) {
+ c->size = 0 ;
+ c->capacity = 0 ;
+ __NL_DELETE_ARRAY(c->coeff) ;
+}
+
+/************************************************************************************/
+/* SparseMatrix data structure */
+
+#define __NL_ROWS 1
+#define __NL_COLUMNS 2
+#define __NL_SYMMETRIC 4
+
+typedef struct {
+ NLuint m ;
+ NLuint n ;
+ NLuint diag_size ;
+ NLenum storage ;
+ __NLRowColumn* row ;
+ __NLRowColumn* column ;
+ NLfloat* diag ;
+} __NLSparseMatrix ;
+
+
+static void __nlSparseMatrixConstruct(
+ __NLSparseMatrix* M, NLuint m, NLuint n, NLenum storage
+) {
+ NLuint i ;
+ M->m = m ;
+ M->n = n ;
+ M->storage = storage ;
+ if(storage & __NL_ROWS) {
+ M->row = __NL_NEW_ARRAY(__NLRowColumn, m) ;
+ for(i=0; i<n; i++) {
+ __nlRowColumnConstruct(&(M->row[i])) ;
+ }
+ } else {
+ M->row = NULL ;
+ }
+
+ if(storage & __NL_COLUMNS) {
+ M->column = __NL_NEW_ARRAY(__NLRowColumn, n) ;
+ for(i=0; i<n; i++) {
+ __nlRowColumnConstruct(&(M->column[i])) ;
+ }
+ } else {
+ M->column = NULL ;
+ }
+
+ M->diag_size = MIN(m,n) ;
+ M->diag = __NL_NEW_ARRAY(NLfloat, M->diag_size) ;
+}
+
+static void __nlSparseMatrixDestroy(__NLSparseMatrix* M) {
+ NLuint i ;
+ __NL_DELETE_ARRAY(M->diag) ;
+ if(M->storage & __NL_ROWS) {
+ for(i=0; i<M->m; i++) {
+ __nlRowColumnDestroy(&(M->row[i])) ;
+ }
+ __NL_DELETE_ARRAY(M->row) ;
+ }
+ if(M->storage & __NL_COLUMNS) {
+ for(i=0; i<M->n; i++) {
+ __nlRowColumnDestroy(&(M->column[i])) ;
+ }
+ __NL_DELETE_ARRAY(M->column) ;
+ }
+#ifdef NL_PARANOID
+ __NL_CLEAR(__NLSparseMatrix,M) ;
+#endif
+}
+
+static void __nlSparseMatrixAdd(
+ __NLSparseMatrix* M, NLuint i, NLuint j, NLfloat value
+) {
+ __nl_parano_range_assert(i, 0, M->m - 1) ;
+ __nl_parano_range_assert(j, 0, M->n - 1) ;
+ if((M->storage & __NL_SYMMETRIC) && (j > i)) {
+ return ;
+ }
+ if(i == j) {
+ M->diag[i] += value ;
+ }
+ if(M->storage & __NL_ROWS) {
+ __nlRowColumnAdd(&(M->row[i]), j, value) ;
+ }
+ if(M->storage & __NL_COLUMNS) {
+ __nlRowColumnAdd(&(M->column[j]), i, value) ;
+ }
+}
+
+static void __nlSparseMatrixClear( __NLSparseMatrix* M) {
+ NLuint i ;
+ if(M->storage & __NL_ROWS) {
+ for(i=0; i<M->m; i++) {
+ __nlRowColumnClear(&(M->row[i])) ;
+ }
+ }
+ if(M->storage & __NL_COLUMNS) {
+ for(i=0; i<M->n; i++) {
+ __nlRowColumnClear(&(M->column[i])) ;
+ }
+ }
+ __NL_CLEAR_ARRAY(NLfloat, M->diag, M->diag_size) ;
+}
+
+/* Returns the number of non-zero coefficients */
+static NLuint __nlSparseMatrixNNZ( __NLSparseMatrix* M) {
+ NLuint nnz = 0 ;
+ NLuint i ;
+ if(M->storage & __NL_ROWS) {
+ for(i = 0; i<M->m; i++) {
+ nnz += M->row[i].size ;
+ }
+ } else if (M->storage & __NL_COLUMNS) {
+ for(i = 0; i<M->n; i++) {
+ nnz += M->column[i].size ;
+ }
+ } else {
+ __nl_assert_not_reached ;
+ }
+ return nnz ;
+}
+
+/************************************************************************************/
+/* SparseMatrix x Vector routines, internal helper routines */
+
+static void __nlSparseMatrix_mult_rows_symmetric(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint m = A->m ;
+ NLuint i,ij ;
+ __NLRowColumn* Ri = NULL ;
+ __NLCoeff* c = NULL ;
+ for(i=0; i<m; i++) {
+ y[i] = 0 ;
+ Ri = &(A->row[i]) ;
+ for(ij=0; ij<Ri->size; ij++) {
+ c = &(Ri->coeff[ij]) ;
+ y[i] += c->value * x[c->index] ;
+ if(i != c->index) {
+ y[c->index] += c->value * x[i] ;
+ }
+ }
+ }
+}
+
+static void __nlSparseMatrix_mult_rows(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint m = A->m ;
+ NLuint i,ij ;
+ __NLRowColumn* Ri = NULL ;
+ __NLCoeff* c = NULL ;
+ for(i=0; i<m; i++) {
+ y[i] = 0 ;
+ Ri = &(A->row[i]) ;
+ for(ij=0; ij<Ri->size; ij++) {
+ c = &(Ri->coeff[ij]) ;
+ y[i] += c->value * x[c->index] ;
+ }
+ }
+}
+
+static void __nlSparseMatrix_mult_cols_symmetric(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint n = A->n ;
+ NLuint j,ii ;
+ __NLRowColumn* Cj = NULL ;
+ __NLCoeff* c = NULL ;
+ for(j=0; j<n; j++) {
+ y[j] = 0 ;
+ Cj = &(A->column[j]) ;
+ for(ii=0; ii<Cj->size; ii++) {
+ c = &(Cj->coeff[ii]) ;
+ y[c->index] += c->value * x[j] ;
+ if(j != c->index) {
+ y[j] += c->value * x[c->index] ;
+ }
+ }
+ }
+}
+
+static void __nlSparseMatrix_mult_cols(
+ __NLSparseMatrix* A, NLfloat* x, NLfloat* y
+) {
+ NLuint n = A->n ;
+ NLuint j,ii ;
+ __NLRowColumn* Cj = NULL ;
+ __NLCoeff* c = NULL ;
+ __NL_CLEAR_ARRAY(NLfloat, y, A->m) ;
+ for(j=0; j<n; j++) {
+ Cj = &(A->column[j]) ;
+ for(ii=0; ii<Cj->size; ii++) {
+ c = &(Cj->coeff[ii]) ;
+ y[c->index] += c->value * x[j] ;
+ }
+ }
+}
+
+/************************************************************************************/
+/* SparseMatrix x Vector routines, main driver routine */
+
+void __nlSparseMatrixMult(__NLSparseMatrix* A, NLfloat* x, NLfloat* y) {
+ if(A->storage & __NL_ROWS) {
+ if(A->storage & __NL_SYMMETRIC) {
+ __nlSparseMatrix_mult_rows_symmetric(A, x, y) ;
+ } else {
+ __nlSparseMatrix_mult_rows(A, x, y) ;
+ }
+ } else {
+ if(A->storage & __NL_SYMMETRIC) {
+ __nlSparseMatrix_mult_cols_symmetric(A, x, y) ;
+ } else {
+ __nlSparseMatrix_mult_cols(A, x, y) ;
+ }
+ }
+}
+
+/************************************************************************************/
+/* NLContext data structure */
+
+typedef void(*__NLMatrixFunc)(float* x, float* y) ;
+
+typedef struct {
+ NLfloat value ;
+ NLboolean locked ;
+ NLuint index ;
+} __NLVariable ;
+
+#define __NL_STATE_INITIAL 0
+#define __NL_STATE_SYSTEM 1
+#define __NL_STATE_MATRIX 2
+#define __NL_STATE_ROW 3
+#define __NL_STATE_MATRIX_CONSTRUCTED 4
+#define __NL_STATE_SYSTEM_CONSTRUCTED 5
+#define __NL_STATE_SOLVED 6
+
+typedef struct {
+ NLenum state ;
+ __NLVariable* variable ;
+ NLuint n ;
+ __NLSparseMatrix M ;
+ __NLRowColumn af ;
+ __NLRowColumn al ;
+ __NLRowColumn xl ;
+ NLfloat* x ;
+ NLfloat* b ;
+ NLfloat right_hand_side ;
+ NLfloat row_scaling ;
+ NLuint nb_variables ;
+ NLuint current_row ;
+ NLboolean least_squares ;
+ NLboolean symmetric ;
+ NLboolean normalize_rows ;
+ NLboolean alloc_M ;
+ NLboolean alloc_af ;
+ NLboolean alloc_al ;
+ NLboolean alloc_xl ;
+ NLboolean alloc_variable ;
+ NLboolean alloc_x ;
+ NLboolean alloc_b ;
+ NLfloat error ;
+ __NLMatrixFunc matrix_vector_prod ;
+} __NLContext ;
+
+static __NLContext* __nlCurrentContext = NULL ;
+
+void __nlMatrixVectorProd_default(NLfloat* x, NLfloat* y) {
+ __nlSparseMatrixMult(&(__nlCurrentContext->M), x, y) ;
+}
+
+
+NLContext nlNewContext() {
+ __NLContext* result = __NL_NEW(__NLContext) ;
+ result->state = __NL_STATE_INITIAL ;
+ result->row_scaling = 1.0 ;
+ result->right_hand_side = 0.0 ;
+ result->matrix_vector_prod = __nlMatrixVectorProd_default ;
+ nlMakeCurrent(result) ;
+ return result ;
+}
+
+void nlDeleteContext(NLContext context_in) {
+ __NLContext* context = (__NLContext*)(context_in) ;
+ if(__nlCurrentContext == context) {
+ __nlCurrentContext = NULL ;
+ }
+ if(context->alloc_M) {
+ __nlSparseMatrixDestroy(&context->M) ;
+ }
+ if(context->alloc_af) {
+ __nlRowColumnDestroy(&context->af) ;
+ }
+ if(context->alloc_al) {
+ __nlRowColumnDestroy(&context->al) ;
+ }
+ if(context->alloc_xl) {
+ __nlRowColumnDestroy(&context->xl) ;
+ }
+ if(context->alloc_variable) {
+ __NL_DELETE_ARRAY(context->variable) ;
+ }
+ if(context->alloc_x) {
+ __NL_DELETE_ARRAY(context->x) ;
+ }
+ if(context->alloc_b) {
+ __NL_DELETE_ARRAY(context->b) ;
+ }
+
+#ifdef NL_PARANOID
+ __NL_CLEAR(__NLContext, context) ;
+#endif
+ __NL_DELETE(context) ;
+}
+
+void nlMakeCurrent(NLContext context) {
+ __nlCurrentContext = (__NLContext*)(context) ;
+}
+
+NLContext nlGetCurrent() {
+ return __nlCurrentContext ;
+}
+
+void __nlCheckState(NLenum state) {
+ __nl_assert(__nlCurrentContext->state == state) ;
+}
+
+void __nlTransition(NLenum from_state, NLenum to_state) {
+ __nlCheckState(from_state) ;
+ __nlCurrentContext->state = to_state ;
+}
+
+/************************************************************************************/
+/* Get/Set parameters */
+
+void nlSolverParameterf(NLenum pname, NLfloat param) {
+ __nlCheckState(__NL_STATE_INITIAL) ;
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ __nl_assert(param > 0) ;
+ __nlCurrentContext->nb_variables = (NLuint)param ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ __nlCurrentContext->least_squares = (NLboolean)param ;
+ } break ;
+ case NL_SYMMETRIC: {
+ __nlCurrentContext->symmetric = (NLboolean)param ;
+ }
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlSolverParameteri(NLenum pname, NLint param) {
+ __nlCheckState(__NL_STATE_INITIAL) ;
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ __nl_assert(param > 0) ;
+ __nlCurrentContext->nb_variables = (NLuint)param ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ __nlCurrentContext->least_squares = (NLboolean)param ;
+ } break ;
+ case NL_SYMMETRIC: {
+ __nlCurrentContext->symmetric = (NLboolean)param ;
+ }
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlRowParameterf(NLenum pname, NLfloat param) {
+ __nlCheckState(__NL_STATE_MATRIX) ;
+ switch(pname) {
+ case NL_RIGHT_HAND_SIDE: {
+ __nlCurrentContext->right_hand_side = param ;
+ } break ;
+ case NL_ROW_SCALING: {
+ __nlCurrentContext->row_scaling = param ;
+ } break ;
+ }
+}
+
+void nlRowParameteri(NLenum pname, NLint param) {
+ __nlCheckState(__NL_STATE_MATRIX) ;
+ switch(pname) {
+ case NL_RIGHT_HAND_SIDE: {
+ __nlCurrentContext->right_hand_side = (NLfloat)param ;
+ } break ;
+ case NL_ROW_SCALING: {
+ __nlCurrentContext->row_scaling = (NLfloat)param ;
+ } break ;
+ }
+}
+
+void nlGetBooleanv(NLenum pname, NLboolean* params) {
+ switch(pname) {
+ case NL_LEAST_SQUARES: {
+ *params = __nlCurrentContext->least_squares ;
+ } break ;
+ case NL_SYMMETRIC: {
+ *params = __nlCurrentContext->symmetric ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlGetFloatv(NLenum pname, NLfloat* params) {
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ *params = (NLfloat)(__nlCurrentContext->nb_variables) ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ *params = (NLfloat)(__nlCurrentContext->least_squares) ;
+ } break ;
+ case NL_SYMMETRIC: {
+ *params = (NLfloat)(__nlCurrentContext->symmetric) ;
+ } break ;
+ case NL_ERROR: {
+ *params = (NLfloat)(__nlCurrentContext->error) ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+void nlGetIntergerv(NLenum pname, NLint* params) {
+ switch(pname) {
+ case NL_NB_VARIABLES: {
+ *params = (NLint)(__nlCurrentContext->nb_variables) ;
+ } break ;
+ case NL_LEAST_SQUARES: {
+ *params = (NLint)(__nlCurrentContext->least_squares) ;
+ } break ;
+ case NL_SYMMETRIC: {
+ *params = (NLint)(__nlCurrentContext->symmetric) ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ } break ;
+ }
+}
+
+/************************************************************************************/
+/* Enable / Disable */
+
+void nlEnable(NLenum pname) {
+ switch(pname) {
+ case NL_NORMALIZE_ROWS: {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_ROW) ;
+ __nlCurrentContext->normalize_rows = NL_TRUE ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+void nlDisable(NLenum pname) {
+ switch(pname) {
+ case NL_NORMALIZE_ROWS: {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_ROW) ;
+ __nlCurrentContext->normalize_rows = NL_FALSE ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+NLboolean nlIsEnabled(NLenum pname) {
+ switch(pname) {
+ case NL_NORMALIZE_ROWS: {
+ return __nlCurrentContext->normalize_rows ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+ return NL_FALSE ;
+}
+
+/************************************************************************************/
+/* Get/Set Lock/Unlock variables */
+
+void nlSetVariable(NLuint index, NLfloat value) {
+ __nlCheckState(__NL_STATE_SYSTEM) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ __nlCurrentContext->variable[index].value = value ;
+}
+
+NLfloat nlGetVariable(NLuint index) {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ return __nlCurrentContext->variable[index].value ;
+}
+
+void nlLockVariable(NLuint index) {
+ __nlCheckState(__NL_STATE_SYSTEM) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ __nlCurrentContext->variable[index].locked = NL_TRUE ;
+}
+
+void nlUnlockVariable(NLuint index) {
+ __nlCheckState(__NL_STATE_SYSTEM) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ __nlCurrentContext->variable[index].locked = NL_FALSE ;
+}
+
+NLboolean nlVariableIsLocked(NLuint index) {
+ __nl_assert(__nlCurrentContext->state != __NL_STATE_INITIAL) ;
+ __nl_parano_range_assert(index, 0, __nlCurrentContext->nb_variables - 1) ;
+ return __nlCurrentContext->variable[index].locked ;
+}
+
+/************************************************************************************/
+/* System construction */
+
+void __nlVariablesToVector() {
+ NLuint i ;
+ __nl_assert(__nlCurrentContext->alloc_x) ;
+ __nl_assert(__nlCurrentContext->alloc_variable) ;
+ for(i=0; i<__nlCurrentContext->nb_variables; i++) {
+ __NLVariable* v = &(__nlCurrentContext->variable[i]) ;
+ if(!v->locked) {
+ __nl_assert(v->index < __nlCurrentContext->n) ;
+ __nlCurrentContext->x[v->index] = v->value ;
+ }
+ }
+}
+
+void __nlVectorToVariables() {
+ NLuint i ;
+ __nl_assert(__nlCurrentContext->alloc_x) ;
+ __nl_assert(__nlCurrentContext->alloc_variable) ;
+ for(i=0; i<__nlCurrentContext->nb_variables; i++) {
+ __NLVariable* v = &(__nlCurrentContext->variable[i]) ;
+ if(!v->locked) {
+ __nl_assert(v->index < __nlCurrentContext->n) ;
+ v->value = __nlCurrentContext->x[v->index] ;
+ }
+ }
+}
+
+
+void __nlBeginSystem() {
+ __nlTransition(__NL_STATE_INITIAL, __NL_STATE_SYSTEM) ;
+ __nl_assert(__nlCurrentContext->nb_variables > 0) ;
+ __nlCurrentContext->variable = __NL_NEW_ARRAY(
+ __NLVariable, __nlCurrentContext->nb_variables
+ ) ;
+ __nlCurrentContext->alloc_variable = NL_TRUE ;
+}
+
+void __nlEndSystem() {
+ __nlTransition(__NL_STATE_MATRIX_CONSTRUCTED, __NL_STATE_SYSTEM_CONSTRUCTED) ;
+}
+
+void __nlBeginMatrix() {
+ NLuint i ;
+ NLuint n = 0 ;
+ NLenum storage = __NL_ROWS ;
+
+ __nlTransition(__NL_STATE_SYSTEM, __NL_STATE_MATRIX) ;
+
+ for(i=0; i<__nlCurrentContext->nb_variables; i++) {
+ if(!__nlCurrentContext->variable[i].locked) {
+ __nlCurrentContext->variable[i].index = n ;
+ n++ ;
+ } else {
+ __nlCurrentContext->variable[i].index = ~0 ;
+ }
+ }
+
+ __nlCurrentContext->n = n ;
+
+ /* a least squares problem results in a symmetric matrix */
+ if(__nlCurrentContext->least_squares) {
+ __nlCurrentContext->symmetric = NL_TRUE ;
+ }
+
+ if(__nlCurrentContext->symmetric) {
+ storage = (storage | __NL_SYMMETRIC) ;
+ }
+
+ /* SuperLU storage does not support symmetric storage */
+ storage = (storage & ~__NL_SYMMETRIC) ;
+
+ __nlSparseMatrixConstruct(&__nlCurrentContext->M, n, n, storage) ;
+ __nlCurrentContext->alloc_M = NL_TRUE ;
+
+ __nlCurrentContext->x = __NL_NEW_ARRAY(NLfloat, n) ;
+ __nlCurrentContext->alloc_x = NL_TRUE ;
+
+ __nlCurrentContext->b = __NL_NEW_ARRAY(NLfloat, n) ;
+ __nlCurrentContext->alloc_b = NL_TRUE ;
+
+ __nlVariablesToVector() ;
+
+ __nlRowColumnConstruct(&__nlCurrentContext->af) ;
+ __nlCurrentContext->alloc_af = NL_TRUE ;
+ __nlRowColumnConstruct(&__nlCurrentContext->al) ;
+ __nlCurrentContext->alloc_al = NL_TRUE ;
+ __nlRowColumnConstruct(&__nlCurrentContext->xl) ;
+ __nlCurrentContext->alloc_xl = NL_TRUE ;
+
+ __nlCurrentContext->current_row = 0 ;
+}
+
+void __nlEndMatrix() {
+ __nlTransition(__NL_STATE_MATRIX, __NL_STATE_MATRIX_CONSTRUCTED) ;
+
+ __nlRowColumnDestroy(&__nlCurrentContext->af) ;
+ __nlCurrentContext->alloc_af = NL_FALSE ;
+ __nlRowColumnDestroy(&__nlCurrentContext->al) ;
+ __nlCurrentContext->alloc_al = NL_FALSE ;
+ __nlRowColumnDestroy(&__nlCurrentContext->xl) ;
+ __nlCurrentContext->alloc_al = NL_FALSE ;
+
+ if(!__nlCurrentContext->least_squares) {
+ __nl_assert(
+ __nlCurrentContext->current_row ==
+ __nlCurrentContext->n
+ ) ;
+ }
+}
+
+void __nlBeginRow() {
+ __nlTransition(__NL_STATE_MATRIX, __NL_STATE_ROW) ;
+ __nlRowColumnZero(&__nlCurrentContext->af) ;
+ __nlRowColumnZero(&__nlCurrentContext->al) ;
+ __nlRowColumnZero(&__nlCurrentContext->xl) ;
+}
+
+void __nlScaleRow(NLfloat s) {
+ __NLRowColumn* af = &__nlCurrentContext->af ;
+ __NLRowColumn* al = &__nlCurrentContext->al ;
+ NLuint nf = af->size ;
+ NLuint nl = al->size ;
+ NLuint i ;
+ for(i=0; i<nf; i++) {
+ af->coeff[i].value *= s ;
+ }
+ for(i=0; i<nl; i++) {
+ al->coeff[i].value *= s ;
+ }
+ __nlCurrentContext->right_hand_side *= s ;
+}
+
+void __nlNormalizeRow(NLfloat weight) {
+ __NLRowColumn* af = &__nlCurrentContext->af ;
+ __NLRowColumn* al = &__nlCurrentContext->al ;
+ NLuint nf = af->size ;
+ NLuint nl = al->size ;
+ NLuint i ;
+ NLfloat norm = 0.0 ;
+ for(i=0; i<nf; i++) {
+ norm += af->coeff[i].value * af->coeff[i].value ;
+ }
+ for(i=0; i<nl; i++) {
+ norm += al->coeff[i].value * al->coeff[i].value ;
+ }
+ norm = sqrt(norm) ;
+ __nlScaleRow(weight / norm) ;
+}
+
+void __nlEndRow() {
+ __NLRowColumn* af = &__nlCurrentContext->af ;
+ __NLRowColumn* al = &__nlCurrentContext->al ;
+ __NLRowColumn* xl = &__nlCurrentContext->xl ;
+ __NLSparseMatrix* M = &__nlCurrentContext->M ;
+ NLfloat* b = __nlCurrentContext->b ;
+ NLuint nf = af->size ;
+ NLuint nl = al->size ;
+ NLuint current_row = __nlCurrentContext->current_row ;
+ NLuint i ;
+ NLuint j ;
+ NLfloat S ;
+ __nlTransition(__NL_STATE_ROW, __NL_STATE_MATRIX) ;
+
+ if(__nlCurrentContext->normalize_rows) {
+ __nlNormalizeRow(__nlCurrentContext->row_scaling) ;
+ } else {
+ __nlScaleRow(__nlCurrentContext->row_scaling) ;
+ }
+
+ if(__nlCurrentContext->least_squares) {
+ for(i=0; i<nf; i++) {
+ for(j=0; j<nf; j++) {
+ __nlSparseMatrixAdd(
+ M, af->coeff[i].index, af->coeff[j].index,
+ af->coeff[i].value * af->coeff[j].value
+ ) ;
+ }
+ }
+ S = -__nlCurrentContext->right_hand_side ;
+ for(j=0; j<nl; j++) {
+ S += al->coeff[j].value * xl->coeff[j].value ;
+ }
+ for(i=0; i<nf; i++) {
+ b[ af->coeff[i].index ] -= af->coeff[i].value * S ;
+ }
+ } else {
+ for(i=0; i<nf; i++) {
+ __nlSparseMatrixAdd(
+ M, current_row, af->coeff[i].index, af->coeff[i].value
+ ) ;
+ }
+ b[current_row] = -__nlCurrentContext->right_hand_side ;
+ for(i=0; i<nl; i++) {
+ b[current_row] -= al->coeff[i].value * xl->coeff[i].value ;
+ }
+ }
+ __nlCurrentContext->current_row++ ;
+ __nlCurrentContext->right_hand_side = 0.0 ;
+ __nlCurrentContext->row_scaling = 1.0 ;
+}
+
+void nlCoefficient(NLuint index, NLfloat value) {
+ __NLVariable* v;
+ unsigned int zero= 0;
+ __nlCheckState(__NL_STATE_ROW) ;
+ __nl_range_assert(index, zero, __nlCurrentContext->nb_variables - 1) ;
+ v = &(__nlCurrentContext->variable[index]) ;
+ if(v->locked) {
+ __nlRowColumnAppend(&(__nlCurrentContext->al), 0, value) ;
+ __nlRowColumnAppend(&(__nlCurrentContext->xl), 0, v->value) ;
+ } else {
+ __nlRowColumnAppend(&(__nlCurrentContext->af), v->index, value) ;
+ }
+}
+
+void nlBegin(NLenum prim) {
+ switch(prim) {
+ case NL_SYSTEM: {
+ __nlBeginSystem() ;
+ } break ;
+ case NL_MATRIX: {
+ __nlBeginMatrix() ;
+ } break ;
+ case NL_ROW: {
+ __nlBeginRow() ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+void nlEnd(NLenum prim) {
+ switch(prim) {
+ case NL_SYSTEM: {
+ __nlEndSystem() ;
+ } break ;
+ case NL_MATRIX: {
+ __nlEndMatrix() ;
+ } break ;
+ case NL_ROW: {
+ __nlEndRow() ;
+ } break ;
+ default: {
+ __nl_assert_not_reached ;
+ }
+ }
+}
+
+/************************************************************************/
+/* SuperLU wrapper */
+
+/* Note: SuperLU is difficult to call, but it is worth it. */
+/* Here is a driver inspired by A. Sheffer's "cow flattener". */
+static NLboolean __nlSolve_SUPERLU( NLboolean do_perm) {
+
+ /* OpenNL Context */
+ __NLSparseMatrix* M = &(__nlCurrentContext->M) ;
+ NLfloat* b = __nlCurrentContext->b ;
+ NLfloat* x = __nlCurrentContext->x ;
+
+ /* Compressed Row Storage matrix representation */
+ NLuint n = __nlCurrentContext->n ;
+ NLuint nnz = __nlSparseMatrixNNZ(M) ; /* Number of Non-Zero coeffs */
+ NLint* xa = __NL_NEW_ARRAY(NLint, n+1) ;
+ NLfloat* rhs = __NL_NEW_ARRAY(NLfloat, n) ;
+ NLfloat* a = __NL_NEW_ARRAY(NLfloat, nnz) ;
+ NLint* asub = __NL_NEW_ARRAY(NLint, nnz) ;
+
+ /* Permutation vector */
+ NLint* perm_r = __NL_NEW_ARRAY(NLint, n) ;
+ NLint* perm = __NL_NEW_ARRAY(NLint, n) ;
+
+ /* SuperLU variables */
+ SuperMatrix A, B ; /* System */
+ SuperMatrix L, U ; /* Inverse of A */
+ NLint info ; /* status code */
+ DNformat *vals = NULL ; /* access to result */
+ float *rvals = NULL ; /* access to result */
+
+ /* SuperLU options and stats */
+ superlu_options_t options ;
+ SuperLUStat_t stat ;
+
+
+ /* Temporary variables */
+ __NLRowColumn* Ri = NULL ;
+ NLuint i,jj,count ;
+
+ __nl_assert(!(M->storage & __NL_SYMMETRIC)) ;
+ __nl_assert(M->storage & __NL_ROWS) ;
+ __nl_assert(M->m == M->n) ;
+
+
+ /*
+ * Step 1: convert matrix M into SuperLU compressed column
+ * representation.
+ * -------------------------------------------------------
+ */
+
+ count = 0 ;
+ for(i=0; i<n; i++) {
+ Ri = &(M->row[i]) ;
+ xa[i] = count ;
+ for(jj=0; jj<Ri->size; jj++) {
+ a[count] = Ri->coeff[jj].value ;
+ asub[count] = Ri->coeff[jj].index ;
+ count++ ;
+ }
+ }
+ xa[n] = nnz ;
+
+ /* Save memory for SuperLU */
+ __nlSparseMatrixClear(M) ;
+
+
+ /*
+ * Rem: symmetric storage does not seem to work with
+ * SuperLU ... (->deactivated in main SLS::Solver driver)
+ */
+ sCreate_CompCol_Matrix(
+ &A, n, n, nnz, a, asub, xa,
+ SLU_NR, /* Row_wise, no supernode */
+ SLU_S, /* floats */
+ SLU_GE /* general storage */
+ );
+
+ /* Step 2: create vector */
+ sCreate_Dense_Matrix(
+ &B, n, 1, b, n,
+ SLU_DN, /* Fortran-type column-wise storage */
+ SLU_S, /* floats */
+ SLU_GE /* general */
+ );
+
+
+ /* Step 3: get permutation matrix
+ * ------------------------------
+ * com_perm: 0 -> no re-ordering
+ * 1 -> re-ordering for A^t.A
+ * 2 -> re-ordering for A^t+A
+ * 3 -> approximate minimum degree ordering
+ */
+ get_perm_c(do_perm ? 3 : 0, &A, perm) ;
+
+ /* Step 4: call SuperLU main routine
+ * ---------------------------------
+ */
+
+ set_default_options(&options) ;
+ options.ColPerm = MY_PERMC ;
+ StatInit(&stat) ;
+
+ sgssv(&options, &A, perm, perm_r, &L, &U, &B, &stat, &info);
+
+ /* Step 5: get the solution
+ * ------------------------
+ * Fortran-type column-wise storage
+ */
+ vals = (DNformat*)B.Store;
+ rvals = (float*)(vals->nzval);
+ if(info == 0) {
+ for(i = 0; i < n; i++){
+ x[i] = rvals[i];
+ }
+ }
+
+ /* Step 6: cleanup
+ * ---------------
+ */
+
+ /*
+ * For these two ones, only the "store" structure
+ * needs to be deallocated (the arrays have been allocated
+ * by us).
+ */
+ Destroy_SuperMatrix_Store(&A) ;
+ Destroy_SuperMatrix_Store(&B) ;
+
+
+ /*
+ * These ones need to be fully deallocated (they have been
+ * allocated by SuperLU).
+ */
+ Destroy_SuperNode_Matrix(&L);
+ Destroy_CompCol_Matrix(&U);
+
+ __NL_DELETE_ARRAY(xa) ;
+ __NL_DELETE_ARRAY(rhs) ;
+ __NL_DELETE_ARRAY(a) ;
+ __NL_DELETE_ARRAY(asub) ;
+ __NL_DELETE_ARRAY(perm_r) ;
+ __NL_DELETE_ARRAY(perm) ;
+
+ return (info == 0) ;
+}
+
+
+/************************************************************************/
+/* nlSolve() driver routine */
+
+NLboolean nlSolve() {
+ NLboolean result = NL_TRUE ;
+
+ __nlCheckState(__NL_STATE_SYSTEM_CONSTRUCTED) ;
+ result = __nlSolve_SUPERLU(NL_TRUE) ;
+
+ __nlVectorToVariables() ;
+ __nlTransition(__NL_STATE_SYSTEM_CONSTRUCTED, __NL_STATE_SOLVED) ;
+
+ return result ;
+}
+
diff --git a/intern/opennl/superlu/Cnames.h b/intern/opennl/superlu/Cnames.h
new file mode 100644
index 00000000000..35ff7b0b665
--- /dev/null
+++ b/intern/opennl/superlu/Cnames.h
@@ -0,0 +1,281 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 1, 1997
+ *
+ */
+#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
+#define __SUPERLU_CNAMES
+
+/* We want this flag, safer than putting in build system */
+#define Add_
+
+/*
+ * These macros define how C routines will be called. ADD_ assumes that
+ * they will be called by fortran, which expects C routines to have an
+ * underscore postfixed to the name (Suns, and the Intel expect this).
+ * NOCHANGE indicates that fortran will be calling, and that it expects
+ * the name called by fortran to be identical to that compiled by the C
+ * (RS6K's do this). UPCASE says it expects C routines called by fortran
+ * to be in all upcase (CRAY wants this).
+ */
+
+#define ADD_ 0
+#define ADD__ 1
+#define NOCHANGE 2
+#define UPCASE 3
+#define C_CALL 4
+
+#ifdef UpCase
+#define F77_CALL_C UPCASE
+#endif
+
+#ifdef NoChange
+#define F77_CALL_C NOCHANGE
+#endif
+
+#ifdef Add_
+#define F77_CALL_C ADD_
+#endif
+
+#ifdef Add__
+#define F77_CALL_C ADD__
+#endif
+
+/* Default */
+#ifndef F77_CALL_C
+#define F77_CALL_C ADD_
+#endif
+
+
+#if (F77_CALL_C == ADD_)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * No redefinition necessary to have following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void dgemm_(...)
+ *
+ * This is the default.
+ */
+
+#endif
+
+#if (F77_CALL_C == ADD__)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * for following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void dgemm__(...)
+ */
+#define sasum_ sasum__
+#define isamax_ isamax__
+#define scopy_ scopy__
+#define sscal_ sscal__
+#define sger_ sger__
+#define snrm2_ snrm2__
+#define ssymv_ ssymv__
+#define sdot_ sdot__
+#define saxpy_ saxpy__
+#define ssyr2_ ssyr2__
+#define srot_ srot__
+#define sgemv_ sgemv__
+#define strsv_ strsv__
+#define sgemm_ sgemm__
+#define strsm_ strsm__
+
+#define dasum_ dasum__
+#define idamax_ idamax__
+#define dcopy_ dcopy__
+#define dscal_ dscal__
+#define dger_ dger__
+#define dnrm2_ dnrm2__
+#define dsymv_ dsymv__
+#define ddot_ ddot__
+#define daxpy_ daxpy__
+#define dsyr2_ dsyr2__
+#define drot_ drot__
+#define dgemv_ dgemv__
+#define dtrsv_ dtrsv__
+#define dgemm_ dgemm__
+#define dtrsm_ dtrsm__
+
+#define scasum_ scasum__
+#define icamax_ icamax__
+#define ccopy_ ccopy__
+#define cscal_ cscal__
+#define scnrm2_ scnrm2__
+#define caxpy_ caxpy__
+#define cgemv_ cgemv__
+#define ctrsv_ ctrsv__
+#define cgemm_ cgemm__
+#define ctrsm_ ctrsm__
+#define cgerc_ cgerc__
+#define chemv_ chemv__
+#define cher2_ cher2__
+
+#define dzasum_ dzasum__
+#define izamax_ izamax__
+#define zcopy_ zcopy__
+#define zscal_ zscal__
+#define dznrm2_ dznrm2__
+#define zaxpy_ zaxpy__
+#define zgemv_ zgemv__
+#define ztrsv_ ztrsv__
+#define zgemm_ zgemm__
+#define ztrsm_ ztrsm__
+#define zgerc_ zgerc__
+#define zhemv_ zhemv__
+#define zher2_ zher2__
+
+#define c_bridge_dgssv_ c_bridge_dgssv__
+#define c_fortran_dgssv_ c_fortran_dgssv__
+#endif
+
+#if (F77_CALL_C == UPCASE)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void DGEMM(...)
+ */
+#define sasum_ SASUM
+#define isamax_ ISAMAX
+#define scopy_ SCOPY
+#define sscal_ SSCAL
+#define sger_ SGER
+#define snrm2_ SNRM2
+#define ssymv_ SSYMV
+#define sdot_ SDOT
+#define saxpy_ SAXPY
+#define ssyr2_ SSYR2
+#define srot_ SROT
+#define sgemv_ SGEMV
+#define strsv_ STRSV
+#define sgemm_ SGEMM
+#define strsm_ STRSM
+
+#define dasum_ SASUM
+#define idamax_ ISAMAX
+#define dcopy_ SCOPY
+#define dscal_ SSCAL
+#define dger_ SGER
+#define dnrm2_ SNRM2
+#define dsymv_ SSYMV
+#define ddot_ SDOT
+#define daxpy_ SAXPY
+#define dsyr2_ SSYR2
+#define drot_ SROT
+#define dgemv_ SGEMV
+#define dtrsv_ STRSV
+#define dgemm_ SGEMM
+#define dtrsm_ STRSM
+
+#define scasum_ SCASUM
+#define icamax_ ICAMAX
+#define ccopy_ CCOPY
+#define cscal_ CSCAL
+#define scnrm2_ SCNRM2
+#define caxpy_ CAXPY
+#define cgemv_ CGEMV
+#define ctrsv_ CTRSV
+#define cgemm_ CGEMM
+#define ctrsm_ CTRSM
+#define cgerc_ CGERC
+#define chemv_ CHEMV
+#define cher2_ CHER2
+
+#define dzasum_ SCASUM
+#define izamax_ ICAMAX
+#define zcopy_ CCOPY
+#define zscal_ CSCAL
+#define dznrm2_ SCNRM2
+#define zaxpy_ CAXPY
+#define zgemv_ CGEMV
+#define ztrsv_ CTRSV
+#define zgemm_ CGEMM
+#define ztrsm_ CTRSM
+#define zgerc_ CGERC
+#define zhemv_ CHEMV
+#define zher2_ CHER2
+
+#define c_bridge_dgssv_ C_BRIDGE_DGSSV
+#define c_fortran_dgssv_ C_FORTRAN_DGSSV
+#endif
+
+#if (F77_CALL_C == NOCHANGE)
+/*
+ * These defines set up the naming scheme required to have a fortran 77
+ * routine call a C routine
+ * for following Fortran to C interface:
+ * FORTRAN CALL C DECLARATION
+ * call dgemm(...) void dgemm(...)
+ */
+#define sasum_ sasum
+#define isamax_ isamax
+#define scopy_ scopy
+#define sscal_ sscal
+#define sger_ sger
+#define snrm2_ snrm2
+#define ssymv_ ssymv
+#define sdot_ sdot
+#define saxpy_ saxpy
+#define ssyr2_ ssyr2
+#define srot_ srot
+#define sgemv_ sgemv
+#define strsv_ strsv
+#define sgemm_ sgemm
+#define strsm_ strsm
+
+#define dasum_ dasum
+#define idamax_ idamax
+#define dcopy_ dcopy
+#define dscal_ dscal
+#define dger_ dger
+#define dnrm2_ dnrm2
+#define dsymv_ dsymv
+#define ddot_ ddot
+#define daxpy_ daxpy
+#define dsyr2_ dsyr2
+#define drot_ drot
+#define dgemv_ dgemv
+#define dtrsv_ dtrsv
+#define dgemm_ dgemm
+#define dtrsm_ dtrsm
+
+#define scasum_ scasum
+#define icamax_ icamax
+#define ccopy_ ccopy
+#define cscal_ cscal
+#define scnrm2_ scnrm2
+#define caxpy_ caxpy
+#define cgemv_ cgemv
+#define ctrsv_ ctrsv
+#define cgemm_ cgemm
+#define ctrsm_ ctrsm
+#define cgerc_ cgerc
+#define chemv_ chemv
+#define cher2_ cher2
+
+#define dzasum_ dzasum
+#define izamax_ izamax
+#define zcopy_ zcopy
+#define zscal_ zscal
+#define dznrm2_ dznrm2
+#define zaxpy_ zaxpy
+#define zgemv_ zgemv
+#define ztrsv_ ztrsv
+#define zgemm_ zgemm
+#define ztrsm_ ztrsm
+#define zgerc_ zgerc
+#define zhemv_ zhemv
+#define zher2_ zher2
+
+#define c_bridge_dgssv_ c_bridge_dgssv
+#define c_fortran_dgssv_ c_fortran_dgssv
+#endif
+
+#endif /* __SUPERLU_CNAMES */
diff --git a/intern/opennl/superlu/Makefile b/intern/opennl/superlu/Makefile
new file mode 100644
index 00000000000..942ceebc79c
--- /dev/null
+++ b/intern/opennl/superlu/Makefile
@@ -0,0 +1,40 @@
+#
+# $Id$
+#
+# ***** BEGIN GPL/BL DUAL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version. The Blender
+# Foundation also sells licenses for use in proprietary software under
+# the Blender License. See http://www.blender.org/BL/ for information
+# about this.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): none yet.
+#
+# ***** END GPL/BL DUAL LICENSE BLOCK *****
+# opennl intern Makefile
+#
+
+LIBNAME = superlu
+DIR = $(OCGDIR)/intern/$(LIBNAME)
+
+include nan_compile.mk
+
+CCFLAGS += $(NAN_LEVEL_2_CPP_WARNINGS)
+
diff --git a/intern/opennl/superlu/colamd.c b/intern/opennl/superlu/colamd.c
new file mode 100644
index 00000000000..b60718f9938
--- /dev/null
+++ b/intern/opennl/superlu/colamd.c
@@ -0,0 +1,2583 @@
+/* ========================================================================== */
+/* === colamd - a sparse matrix column ordering algorithm =================== */
+/* ========================================================================== */
+
+/*
+ colamd: An approximate minimum degree column ordering algorithm.
+
+ Purpose:
+
+ Colamd computes a permutation Q such that the Cholesky factorization of
+ (AQ)'(AQ) has less fill-in and requires fewer floating point operations
+ than A'A. This also provides a good ordering for sparse partial
+ pivoting methods, P(AQ) = LU, where Q is computed prior to numerical
+ factorization, and P is computed during numerical factorization via
+ conventional partial pivoting with row interchanges. Colamd is the
+ column ordering method used in SuperLU, part of the ScaLAPACK library.
+ It is also available as user-contributed software for Matlab 5.2,
+ available from MathWorks, Inc. (http://www.mathworks.com). This
+ routine can be used in place of COLMMD in Matlab. By default, the \
+ and / operators in Matlab perform a column ordering (using COLMMD)
+ prior to LU factorization using sparse partial pivoting, in the
+ built-in Matlab LU(A) routine.
+
+ Authors:
+
+ The authors of the code itself are Stefan I. Larimore and Timothy A.
+ Davis (davis@cise.ufl.edu), University of Florida. The algorithm was
+ developed in collaboration with John Gilbert, Xerox PARC, and Esmond
+ Ng, Oak Ridge National Laboratory.
+
+ Date:
+
+ August 3, 1998. Version 1.0.
+
+ Acknowledgements:
+
+ This work was supported by the National Science Foundation, under
+ grants DMS-9504974 and DMS-9803599.
+
+ Notice:
+
+ Copyright (c) 1998 by the University of Florida. All Rights Reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ User documentation of any code that uses this code must cite the
+ Authors, the Copyright, and "Used by permission." If this code is
+ accessible from within Matlab, then typing "help colamd" or "colamd"
+ (with no arguments) must cite the Authors. Permission to modify the
+ code and to distribute modified code is granted, provided the above
+ notices are retained, and a notice that the code was modified is
+ included with the above copyright notice. You must also retain the
+ Availability information below, of the original version.
+
+ This software is provided free of charge.
+
+ Availability:
+
+ This file is located at
+
+ http://www.cise.ufl.edu/~davis/colamd/colamd.c
+
+ The colamd.h file is required, located in the same directory.
+ The colamdmex.c file provides a Matlab interface for colamd.
+ The symamdmex.c file provides a Matlab interface for symamd, which is
+ a symmetric ordering based on this code, colamd.c. All codes are
+ purely ANSI C compliant (they use no Unix-specific routines, include
+ files, etc.).
+*/
+
+/* ========================================================================== */
+/* === Description of user-callable routines ================================ */
+/* ========================================================================== */
+
+/*
+ Each user-callable routine (declared as PUBLIC) is briefly described below.
+ Refer to the comments preceding each routine for more details.
+
+ ----------------------------------------------------------------------------
+ colamd_recommended:
+ ----------------------------------------------------------------------------
+
+ Usage:
+
+ Alen = colamd_recommended (nnz, n_row, n_col) ;
+
+ Purpose:
+
+ Returns recommended value of Alen for use by colamd. Returns -1
+ if any input argument is negative.
+
+ Arguments:
+
+ int nnz ; Number of nonzeros in the matrix A. This must
+ be the same value as p [n_col] in the call to
+ colamd - otherwise you will get a wrong value
+ of the recommended memory to use.
+ int n_row ; Number of rows in the matrix A.
+ int n_col ; Number of columns in the matrix A.
+
+ ----------------------------------------------------------------------------
+ colamd_set_defaults:
+ ----------------------------------------------------------------------------
+
+ Usage:
+
+ colamd_set_defaults (knobs) ;
+
+ Purpose:
+
+ Sets the default parameters.
+
+ Arguments:
+
+ double knobs [COLAMD_KNOBS] ; Output only.
+
+ Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries
+ are removed prior to ordering. Columns with more than
+ (knobs [COLAMD_DENSE_COL] * n_row) entries are removed
+ prior to ordering, and placed last in the output column
+ ordering. Default values of these two knobs are both 0.5.
+ Currently, only knobs [0] and knobs [1] are used, but future
+ versions may use more knobs. If so, they will be properly set
+ to their defaults by the future version of colamd_set_defaults,
+ so that the code that calls colamd will not need to change,
+ assuming that you either use colamd_set_defaults, or pass a
+ (double *) NULL pointer as the knobs array to colamd.
+
+ ----------------------------------------------------------------------------
+ colamd:
+ ----------------------------------------------------------------------------
+
+ Usage:
+
+ colamd (n_row, n_col, Alen, A, p, knobs) ;
+
+ Purpose:
+
+ Computes a column ordering (Q) of A such that P(AQ)=LU or
+ (AQ)'AQ=LL' have less fill-in and require fewer floating point
+ operations than factorizing the unpermuted matrix A or A'A,
+ respectively.
+
+ Arguments:
+
+ int n_row ;
+
+ Number of rows in the matrix A.
+ Restriction: n_row >= 0.
+ Colamd returns FALSE if n_row is negative.
+
+ int n_col ;
+
+ Number of columns in the matrix A.
+ Restriction: n_col >= 0.
+ Colamd returns FALSE if n_col is negative.
+
+ int Alen ;
+
+ Restriction (see note):
+ Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS
+ Colamd returns FALSE if these conditions are not met.
+
+ Note: this restriction makes an modest assumption regarding
+ the size of the two typedef'd structures, below. We do,
+ however, guarantee that
+ Alen >= colamd_recommended (nnz, n_row, n_col)
+ will be sufficient.
+
+ int A [Alen] ; Input argument, stats on output.
+
+ A is an integer array of size Alen. Alen must be at least as
+ large as the bare minimum value given above, but this is very
+ low, and can result in excessive run time. For best
+ performance, we recommend that Alen be greater than or equal to
+ colamd_recommended (nnz, n_row, n_col), which adds
+ nnz/5 to the bare minimum value given above.
+
+ On input, the row indices of the entries in column c of the
+ matrix are held in A [(p [c]) ... (p [c+1]-1)]. The row indices
+ in a given column c need not be in ascending order, and
+ duplicate row indices may be be present. However, colamd will
+ work a little faster if both of these conditions are met
+ (Colamd puts the matrix into this format, if it finds that the
+ the conditions are not met).
+
+ The matrix is 0-based. That is, rows are in the range 0 to
+ n_row-1, and columns are in the range 0 to n_col-1. Colamd
+ returns FALSE if any row index is out of range.
+
+ The contents of A are modified during ordering, and are thus
+ undefined on output with the exception of a few statistics
+ about the ordering (A [0..COLAMD_STATS-1]):
+ A [0]: number of dense or empty rows ignored.
+ A [1]: number of dense or empty columns ignored (and ordered
+ last in the output permutation p)
+ A [2]: number of garbage collections performed.
+ A [3]: 0, if all row indices in each column were in sorted
+ order, and no duplicates were present.
+ 1, otherwise (in which case colamd had to do more work)
+ Note that a row can become "empty" if it contains only
+ "dense" and/or "empty" columns, and similarly a column can
+ become "empty" if it only contains "dense" and/or "empty" rows.
+ Future versions may return more statistics in A, but the usage
+ of these 4 entries in A will remain unchanged.
+
+ int p [n_col+1] ; Both input and output argument.
+
+ p is an integer array of size n_col+1. On input, it holds the
+ "pointers" for the column form of the matrix A. Column c of
+ the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first
+ entry, p [0], must be zero, and p [c] <= p [c+1] must hold
+ for all c in the range 0 to n_col-1. The value p [n_col] is
+ thus the total number of entries in the pattern of the matrix A.
+ Colamd returns FALSE if these conditions are not met.
+
+ On output, if colamd returns TRUE, the array p holds the column
+ permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is
+ the first column index in the new ordering, and p [n_col-1] is
+ the last. That is, p [k] = j means that column j of A is the
+ kth pivot column, in AQ, where k is in the range 0 to n_col-1
+ (p [0] = j means that column j of A is the first column in AQ).
+
+ If colamd returns FALSE, then no permutation is returned, and
+ p is undefined on output.
+
+ double knobs [COLAMD_KNOBS] ; Input only.
+
+ See colamd_set_defaults for a description. If the knobs array
+ is not present (that is, if a (double *) NULL pointer is passed
+ in its place), then the default values of the parameters are
+ used instead.
+
+*/
+
+
+/* ========================================================================== */
+/* === Include files ======================================================== */
+/* ========================================================================== */
+
+/* limits.h: the largest positive integer (INT_MAX) */
+#include <limits.h>
+
+/* colamd.h: knob array size, stats output size, and global prototypes */
+#include "colamd.h"
+
+/* ========================================================================== */
+/* === Scaffolding code definitions ======================================== */
+/* ========================================================================== */
+
+/* Ensure that debugging is turned off: */
+#ifndef NDEBUG
+#define NDEBUG
+#endif
+
+/* assert.h: the assert macro (no debugging if NDEBUG is defined) */
+#include <assert.h>
+
+/*
+ Our "scaffolding code" philosophy: In our opinion, well-written library
+ code should keep its "debugging" code, and just normally have it turned off
+ by the compiler so as not to interfere with performance. This serves
+ several purposes:
+
+ (1) assertions act as comments to the reader, telling you what the code
+ expects at that point. All assertions will always be true (unless
+ there really is a bug, of course).
+
+ (2) leaving in the scaffolding code assists anyone who would like to modify
+ the code, or understand the algorithm (by reading the debugging output,
+ one can get a glimpse into what the code is doing).
+
+ (3) (gasp!) for actually finding bugs. This code has been heavily tested
+ and "should" be fully functional and bug-free ... but you never know...
+
+ To enable debugging, comment out the "#define NDEBUG" above. The code will
+ become outrageously slow when debugging is enabled. To control the level of
+ debugging output, set an environment variable D to 0 (little), 1 (some),
+ 2, 3, or 4 (lots).
+*/
+
+/* ========================================================================== */
+/* === Row and Column structures ============================================ */
+/* ========================================================================== */
+
+typedef struct ColInfo_struct
+{
+ int start ; /* index for A of first row in this column, or DEAD */
+ /* if column is dead */
+ int length ; /* number of rows in this column */
+ union
+ {
+ int thickness ; /* number of original columns represented by this */
+ /* col, if the column is alive */
+ int parent ; /* parent in parent tree super-column structure, if */
+ /* the column is dead */
+ } shared1 ;
+ union
+ {
+ int score ; /* the score used to maintain heap, if col is alive */
+ int order ; /* pivot ordering of this column, if col is dead */
+ } shared2 ;
+ union
+ {
+ int headhash ; /* head of a hash bucket, if col is at the head of */
+ /* a degree list */
+ int hash ; /* hash value, if col is not in a degree list */
+ int prev ; /* previous column in degree list, if col is in a */
+ /* degree list (but not at the head of a degree list) */
+ } shared3 ;
+ union
+ {
+ int degree_next ; /* next column, if col is in a degree list */
+ int hash_next ; /* next column, if col is in a hash list */
+ } shared4 ;
+
+} ColInfo ;
+
+typedef struct RowInfo_struct
+{
+ int start ; /* index for A of first col in this row */
+ int length ; /* number of principal columns in this row */
+ union
+ {
+ int degree ; /* number of principal & non-principal columns in row */
+ int p ; /* used as a row pointer in init_rows_cols () */
+ } shared1 ;
+ union
+ {
+ int mark ; /* for computing set differences and marking dead rows*/
+ int first_column ;/* first column in row (used in garbage collection) */
+ } shared2 ;
+
+} RowInfo ;
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+#define ONES_COMPLEMENT(r) (-(r)-1)
+
+#define TRUE (1)
+#define FALSE (0)
+#define EMPTY (-1)
+
+/* Row and column status */
+#define ALIVE (0)
+#define DEAD (-1)
+
+/* Column status */
+#define DEAD_PRINCIPAL (-1)
+#define DEAD_NON_PRINCIPAL (-2)
+
+/* Macros for row and column status update and checking. */
+#define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark)
+#define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE)
+#define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE)
+#define COL_IS_DEAD(c) (Col [c].start < ALIVE)
+#define COL_IS_ALIVE(c) (Col [c].start >= ALIVE)
+#define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL)
+#define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; }
+#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; }
+#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; }
+
+/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
+#define PUBLIC
+#define PRIVATE static
+
+/* ========================================================================== */
+/* === Prototypes of PRIVATE routines ======================================= */
+/* ========================================================================== */
+
+PRIVATE int init_rows_cols
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int p []
+) ;
+
+PRIVATE void init_scoring
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int head [],
+ double knobs [COLAMD_KNOBS],
+ int *p_n_row2,
+ int *p_n_col2,
+ int *p_max_deg
+) ;
+
+PRIVATE int find_ordering
+(
+ int n_row,
+ int n_col,
+ int Alen,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int head [],
+ int n_col2,
+ int max_deg,
+ int pfree
+) ;
+
+PRIVATE void order_children
+(
+ int n_col,
+ ColInfo Col [],
+ int p []
+) ;
+
+PRIVATE void detect_super_cols
+(
+#ifndef NDEBUG
+ int n_col,
+ RowInfo Row [],
+#endif
+ ColInfo Col [],
+ int A [],
+ int head [],
+ int row_start,
+ int row_length
+) ;
+
+PRIVATE int garbage_collection
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int *pfree
+) ;
+
+PRIVATE int clear_mark
+(
+ int n_row,
+ RowInfo Row []
+) ;
+
+/* ========================================================================== */
+/* === Debugging definitions ================================================ */
+/* ========================================================================== */
+
+#ifndef NDEBUG
+
+/* === With debugging ======================================================= */
+
+/* stdlib.h: for getenv and atoi, to get debugging level from environment */
+#include <stdlib.h>
+
+/* stdio.h: for printf (no printing if debugging is turned off) */
+#include <stdio.h>
+
+PRIVATE void debug_deg_lists
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int head [],
+ int min_score,
+ int should,
+ int max_deg
+) ;
+
+PRIVATE void debug_mark
+(
+ int n_row,
+ RowInfo Row [],
+ int tag_mark,
+ int max_mark
+) ;
+
+PRIVATE void debug_matrix
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A []
+) ;
+
+PRIVATE void debug_structures
+(
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int n_col2
+) ;
+
+/* the following is the *ONLY* global variable in this file, and is only */
+/* present when debugging */
+
+PRIVATE int debug_colamd ; /* debug print level */
+
+#define DEBUG0(params) { (void) printf params ; }
+#define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; }
+#define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; }
+#define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; }
+#define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; }
+
+#else
+
+/* === No debugging ========================================================= */
+
+#define DEBUG0(params) ;
+#define DEBUG1(params) ;
+#define DEBUG2(params) ;
+#define DEBUG3(params) ;
+#define DEBUG4(params) ;
+
+#endif
+
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === USER-CALLABLE ROUTINES: ============================================== */
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === colamd_recommended =================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd_recommended routine returns the suggested size for Alen. This
+ value has been determined to provide good balance between the number of
+ garbage collections and the memory requirements for colamd.
+*/
+
+PUBLIC int colamd_recommended /* returns recommended value of Alen. */
+(
+ /* === Parameters ======================================================= */
+
+ int nnz, /* number of nonzeros in A */
+ int n_row, /* number of rows in A */
+ int n_col /* number of columns in A */
+)
+{
+ /* === Local variables ================================================== */
+
+ int minimum ; /* bare minimum requirements */
+ int recommended ; /* recommended value of Alen */
+
+ if (nnz < 0 || n_row < 0 || n_col < 0)
+ {
+ /* return -1 if any input argument is corrupted */
+ DEBUG0 (("colamd_recommended error!")) ;
+ DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ;
+ return (-1) ;
+ }
+
+ minimum =
+ 2 * (nnz) /* for A */
+ + (((n_col) + 1) * sizeof (ColInfo) / sizeof (int)) /* for Col */
+ + (((n_row) + 1) * sizeof (RowInfo) / sizeof (int)) /* for Row */
+ + n_col /* minimum elbow room to guarrantee success */
+ + COLAMD_STATS ; /* for output statistics */
+
+ /* recommended is equal to the minumum plus enough memory to keep the */
+ /* number garbage collections low */
+ recommended = minimum + nnz/5 ;
+
+ return (recommended) ;
+}
+
+
+/* ========================================================================== */
+/* === colamd_set_defaults ================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd_set_defaults routine sets the default values of the user-
+ controllable parameters for colamd:
+
+ knobs [0] rows with knobs[0]*n_col entries or more are removed
+ prior to ordering.
+
+ knobs [1] columns with knobs[1]*n_row entries or more are removed
+ prior to ordering, and placed last in the column
+ permutation.
+
+ knobs [2..19] unused, but future versions might use this
+*/
+
+PUBLIC void colamd_set_defaults
+(
+ /* === Parameters ======================================================= */
+
+ double knobs [COLAMD_KNOBS] /* knob array */
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ;
+
+ if (!knobs)
+ {
+ return ; /* no knobs to initialize */
+ }
+ for (i = 0 ; i < COLAMD_KNOBS ; i++)
+ {
+ knobs [i] = 0 ;
+ }
+ knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */
+ knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */
+}
+
+
+/* ========================================================================== */
+/* === colamd =============================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd routine computes a column ordering Q of a sparse matrix
+ A such that the LU factorization P(AQ) = LU remains sparse, where P is
+ selected via partial pivoting. The routine can also be viewed as
+ providing a permutation Q such that the Cholesky factorization
+ (AQ)'(AQ) = LL' remains sparse.
+
+ On input, the nonzero patterns of the columns of A are stored in the
+ array A, in order 0 to n_col-1. A is held in 0-based form (rows in the
+ range 0 to n_row-1 and columns in the range 0 to n_col-1). Row indices
+ for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0,
+ and thus p [n_col] is the number of entries in A. The matrix is
+ destroyed on output. The row indices within each column do not have to
+ be sorted (from small to large row indices), and duplicate row indices
+ may be present. However, colamd will work a little faster if columns are
+ sorted and no duplicates are present. Matlab 5.2 always passes the matrix
+ with sorted columns, and no duplicates.
+
+ The integer array A is of size Alen. Alen must be at least of size
+ (where nnz is the number of entries in A):
+
+ nnz for the input column form of A
+ + nnz for a row form of A that colamd generates
+ + 6*(n_col+1) for a ColInfo Col [0..n_col] array
+ (this assumes sizeof (ColInfo) is 6 int's).
+ + 4*(n_row+1) for a RowInfo Row [0..n_row] array
+ (this assumes sizeof (RowInfo) is 4 int's).
+ + elbow_room must be at least n_col. We recommend at least
+ nnz/5 in addition to that. If sufficient,
+ changes in the elbow room affect the ordering
+ time only, not the ordering itself.
+ + COLAMD_STATS for the output statistics
+
+ Colamd returns FALSE is memory is insufficient, or TRUE otherwise.
+
+ On input, the caller must specify:
+
+ n_row the number of rows of A
+ n_col the number of columns of A
+ Alen the size of the array A
+ A [0 ... nnz-1] the row indices, where nnz = p [n_col]
+ A [nnz ... Alen-1] (need not be initialized by the user)
+ p [0 ... n_col] the column pointers, p [0] = 0, and p [n_col]
+ is the number of entries in A. Column c of A
+ is stored in A [p [c] ... p [c+1]-1].
+ knobs [0 ... 19] a set of parameters that control the behavior
+ of colamd. If knobs is a NULL pointer the
+ defaults are used. The user-callable
+ colamd_set_defaults routine sets the default
+ parameters. See that routine for a description
+ of the user-controllable parameters.
+
+ If the return value of Colamd is TRUE, then on output:
+
+ p [0 ... n_col-1] the column permutation. p [0] is the first
+ column index, and p [n_col-1] is the last.
+ That is, p [k] = j means that column j of A
+ is the kth column of AQ.
+
+ A is undefined on output (the matrix pattern is
+ destroyed), except for the following statistics:
+
+ A [0] the number of dense (or empty) rows ignored
+ A [1] the number of dense (or empty) columms. These
+ are ordered last, in their natural order.
+ A [2] the number of garbage collections performed.
+ If this is excessive, then you would have
+ gotten your results faster if Alen was larger.
+ A [3] 0, if all row indices in each column were in
+ sorted order and no duplicates were present.
+ 1, if there were unsorted or duplicate row
+ indices in the input. You would have gotten
+ your results faster if A [3] was returned as 0.
+
+ If the return value of Colamd is FALSE, then A and p are undefined on
+ output.
+*/
+
+PUBLIC int colamd /* returns TRUE if successful */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows in A */
+ int n_col, /* number of columns in A */
+ int Alen, /* length of A */
+ int A [], /* row indices of A */
+ int p [], /* pointers to columns in A */
+ double knobs [COLAMD_KNOBS] /* parameters (uses defaults if NULL) */
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ; /* loop index */
+ int nnz ; /* nonzeros in A */
+ int Row_size ; /* size of Row [], in integers */
+ int Col_size ; /* size of Col [], in integers */
+ int elbow_room ; /* remaining free space */
+ RowInfo *Row ; /* pointer into A of Row [0..n_row] array */
+ ColInfo *Col ; /* pointer into A of Col [0..n_col] array */
+ int n_col2 ; /* number of non-dense, non-empty columns */
+ int n_row2 ; /* number of non-dense, non-empty rows */
+ int ngarbage ; /* number of garbage collections performed */
+ int max_deg ; /* maximum row degree */
+ double default_knobs [COLAMD_KNOBS] ; /* default knobs knobs array */
+ int init_result ; /* return code from initialization */
+
+#ifndef NDEBUG
+ debug_colamd = 0 ; /* no debug printing */
+ /* get "D" environment variable, which gives the debug printing level */
+ if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ;
+ DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ;
+#endif
+
+ /* === Check the input arguments ======================================== */
+
+ if (n_row < 0 || n_col < 0 || !A || !p)
+ {
+ /* n_row and n_col must be non-negative, A and p must be present */
+ DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ;
+ return (FALSE) ;
+ }
+ nnz = p [n_col] ;
+ if (nnz < 0 || p [0] != 0)
+ {
+ /* nnz must be non-negative, and p [0] must be zero */
+ DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ;
+ return (FALSE) ;
+ }
+
+ /* === If no knobs, set default parameters ============================== */
+
+ if (!knobs)
+ {
+ knobs = default_knobs ;
+ colamd_set_defaults (knobs) ;
+ }
+
+ /* === Allocate the Row and Col arrays from array A ===================== */
+
+ Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ;
+ Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ;
+ elbow_room = Alen - (2*nnz + Col_size + Row_size) ;
+ if (elbow_room < n_col + COLAMD_STATS)
+ {
+ /* not enough space in array A to perform the ordering */
+ DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ;
+ return (FALSE) ;
+ }
+ Alen = 2*nnz + elbow_room ;
+ Col = (ColInfo *) &A [Alen] ;
+ Row = (RowInfo *) &A [Alen + Col_size] ;
+
+ /* === Construct the row and column data structures ===================== */
+
+ init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ;
+ if (init_result == -1)
+ {
+ /* input matrix is invalid */
+ DEBUG0 (("colamd error! matrix invalid\n")) ;
+ return (FALSE) ;
+ }
+
+ /* === Initialize scores, kill dense rows/columns ======================= */
+
+ init_scoring (n_row, n_col, Row, Col, A, p, knobs,
+ &n_row2, &n_col2, &max_deg) ;
+
+ /* === Order the supercolumns =========================================== */
+
+ ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
+ n_col2, max_deg, 2*nnz) ;
+
+ /* === Order the non-principal columns ================================== */
+
+ order_children (n_col, Col, p) ;
+
+ /* === Return statistics in A =========================================== */
+
+ for (i = 0 ; i < COLAMD_STATS ; i++)
+ {
+ A [i] = 0 ;
+ }
+ A [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+ A [COLAMD_DENSE_COL] = n_col - n_col2 ;
+ A [COLAMD_DEFRAG_COUNT] = ngarbage ;
+ A [COLAMD_JUMBLED_COLS] = init_result ;
+
+ return (TRUE) ;
+}
+
+
+/* ========================================================================== */
+/* === NON-USER-CALLABLE ROUTINES: ========================================== */
+/* ========================================================================== */
+
+/* There are no user-callable routines beyond this point in the file */
+
+
+/* ========================================================================== */
+/* === init_rows_cols ======================================================= */
+/* ========================================================================== */
+
+/*
+ Takes the column form of the matrix in A and creates the row form of the
+ matrix. Also, row and column attributes are stored in the Col and Row
+ structs. If the columns are un-sorted or contain duplicate row indices,
+ this routine will also sort and remove duplicate row indices from the
+ column form of the matrix. Returns -1 on error, 1 if columns jumbled,
+ or 0 if columns not jumbled. Not user-callable.
+*/
+
+PRIVATE int init_rows_cols /* returns status code */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows of A */
+ int n_col, /* number of columns of A */
+ RowInfo Row [], /* of size n_row+1 */
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* row indices of A, of size Alen */
+ int p [] /* pointers to columns in A, of size n_col+1 */
+)
+{
+ /* === Local variables ================================================== */
+
+ int col ; /* a column index */
+ int row ; /* a row index */
+ int *cp ; /* a column pointer */
+ int *cp_end ; /* a pointer to the end of a column */
+ int *rp ; /* a row pointer */
+ int *rp_end ; /* a pointer to the end of a row */
+ int last_start ; /* start index of previous column in A */
+ int start ; /* start index of column in A */
+ int last_row ; /* previous row */
+ int jumbled_columns ; /* indicates if columns are jumbled */
+
+ /* === Initialize columns, and check column pointers ==================== */
+
+ last_start = 0 ;
+ for (col = 0 ; col < n_col ; col++)
+ {
+ start = p [col] ;
+ if (start < last_start)
+ {
+ /* column pointers must be non-decreasing */
+ DEBUG0 (("colamd error! last p %d p [col] %d\n",last_start,start));
+ return (-1) ;
+ }
+ Col [col].start = start ;
+ Col [col].length = p [col+1] - start ;
+ Col [col].shared1.thickness = 1 ;
+ Col [col].shared2.score = 0 ;
+ Col [col].shared3.prev = EMPTY ;
+ Col [col].shared4.degree_next = EMPTY ;
+ last_start = start ;
+ }
+ /* must check the end pointer for last column */
+ if (p [n_col] < last_start)
+ {
+ /* column pointers must be non-decreasing */
+ DEBUG0 (("colamd error! last p %d p [n_col] %d\n",p[col],last_start)) ;
+ return (-1) ;
+ }
+
+ /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
+
+ /* === Scan columns, compute row degrees, and check row indices ========= */
+
+ jumbled_columns = FALSE ;
+
+ for (row = 0 ; row < n_row ; row++)
+ {
+ Row [row].length = 0 ;
+ Row [row].shared2.mark = -1 ;
+ }
+
+ for (col = 0 ; col < n_col ; col++)
+ {
+ last_row = -1 ;
+
+ cp = &A [p [col]] ;
+ cp_end = &A [p [col+1]] ;
+
+ while (cp < cp_end)
+ {
+ row = *cp++ ;
+
+ /* make sure row indices within range */
+ if (row < 0 || row >= n_row)
+ {
+ DEBUG0 (("colamd error! col %d row %d last_row %d\n",
+ col, row, last_row)) ;
+ return (-1) ;
+ }
+ else if (row <= last_row)
+ {
+ /* row indices are not sorted or repeated, thus cols */
+ /* are jumbled */
+ jumbled_columns = TRUE ;
+ }
+ /* prevent repeated row from being counted */
+ if (Row [row].shared2.mark != col)
+ {
+ Row [row].length++ ;
+ Row [row].shared2.mark = col ;
+ last_row = row ;
+ }
+ else
+ {
+ /* this is a repeated entry in the column, */
+ /* it will be removed */
+ Col [col].length-- ;
+ }
+ }
+ }
+
+ /* === Compute row pointers ============================================= */
+
+ /* row form of the matrix starts directly after the column */
+ /* form of matrix in A */
+ Row [0].start = p [n_col] ;
+ Row [0].shared1.p = Row [0].start ;
+ Row [0].shared2.mark = -1 ;
+ for (row = 1 ; row < n_row ; row++)
+ {
+ Row [row].start = Row [row-1].start + Row [row-1].length ;
+ Row [row].shared1.p = Row [row].start ;
+ Row [row].shared2.mark = -1 ;
+ }
+
+ /* === Create row form ================================================== */
+
+ if (jumbled_columns)
+ {
+ /* if cols jumbled, watch for repeated row indices */
+ for (col = 0 ; col < n_col ; col++)
+ {
+ cp = &A [p [col]] ;
+ cp_end = &A [p [col+1]] ;
+ while (cp < cp_end)
+ {
+ row = *cp++ ;
+ if (Row [row].shared2.mark != col)
+ {
+ A [(Row [row].shared1.p)++] = col ;
+ Row [row].shared2.mark = col ;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* if cols not jumbled, we don't need the mark (this is faster) */
+ for (col = 0 ; col < n_col ; col++)
+ {
+ cp = &A [p [col]] ;
+ cp_end = &A [p [col+1]] ;
+ while (cp < cp_end)
+ {
+ A [(Row [*cp++].shared1.p)++] = col ;
+ }
+ }
+ }
+
+ /* === Clear the row marks and set row degrees ========================== */
+
+ for (row = 0 ; row < n_row ; row++)
+ {
+ Row [row].shared2.mark = 0 ;
+ Row [row].shared1.degree = Row [row].length ;
+ }
+
+ /* === See if we need to re-create columns ============================== */
+
+ if (jumbled_columns)
+ {
+
+#ifndef NDEBUG
+ /* make sure column lengths are correct */
+ for (col = 0 ; col < n_col ; col++)
+ {
+ p [col] = Col [col].length ;
+ }
+ for (row = 0 ; row < n_row ; row++)
+ {
+ rp = &A [Row [row].start] ;
+ rp_end = rp + Row [row].length ;
+ while (rp < rp_end)
+ {
+ p [*rp++]-- ;
+ }
+ }
+ for (col = 0 ; col < n_col ; col++)
+ {
+ assert (p [col] == 0) ;
+ }
+ /* now p is all zero (different than when debugging is turned off) */
+#endif
+
+ /* === Compute col pointers ========================================= */
+
+ /* col form of the matrix starts at A [0]. */
+ /* Note, we may have a gap between the col form and the row */
+ /* form if there were duplicate entries, if so, it will be */
+ /* removed upon the first garbage collection */
+ Col [0].start = 0 ;
+ p [0] = Col [0].start ;
+ for (col = 1 ; col < n_col ; col++)
+ {
+ /* note that the lengths here are for pruned columns, i.e. */
+ /* no duplicate row indices will exist for these columns */
+ Col [col].start = Col [col-1].start + Col [col-1].length ;
+ p [col] = Col [col].start ;
+ }
+
+ /* === Re-create col form =========================================== */
+
+ for (row = 0 ; row < n_row ; row++)
+ {
+ rp = &A [Row [row].start] ;
+ rp_end = rp + Row [row].length ;
+ while (rp < rp_end)
+ {
+ A [(p [*rp++])++] = row ;
+ }
+ }
+ return (1) ;
+ }
+ else
+ {
+ /* no columns jumbled (this is faster) */
+ return (0) ;
+ }
+}
+
+
+/* ========================================================================== */
+/* === init_scoring ========================================================= */
+/* ========================================================================== */
+
+/*
+ Kills dense or empty columns and rows, calculates an initial score for
+ each column, and places all columns in the degree lists. Not user-callable.
+*/
+
+PRIVATE void init_scoring
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows of A */
+ int n_col, /* number of columns of A */
+ RowInfo Row [], /* of size n_row+1 */
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* column form and row form of A */
+ int head [], /* of size n_col+1 */
+ double knobs [COLAMD_KNOBS],/* parameters */
+ int *p_n_row2, /* number of non-dense, non-empty rows */
+ int *p_n_col2, /* number of non-dense, non-empty columns */
+ int *p_max_deg /* maximum row degree */
+)
+{
+ /* === Local variables ================================================== */
+
+ int c ; /* a column index */
+ int r, row ; /* a row index */
+ int *cp ; /* a column pointer */
+ int deg ; /* degree (# entries) of a row or column */
+ int *cp_end ; /* a pointer to the end of a column */
+ int *new_cp ; /* new column pointer */
+ int col_length ; /* length of pruned column */
+ int score ; /* current column score */
+ int n_col2 ; /* number of non-dense, non-empty columns */
+ int n_row2 ; /* number of non-dense, non-empty rows */
+ int dense_row_count ; /* remove rows with more entries than this */
+ int dense_col_count ; /* remove cols with more entries than this */
+ int min_score ; /* smallest column score */
+ int max_deg ; /* maximum row degree */
+ int next_col ; /* Used to add to degree list.*/
+#ifndef NDEBUG
+ int debug_count ; /* debug only. */
+#endif
+
+ /* === Extract knobs ==================================================== */
+
+ dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
+ dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
+ DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+ max_deg = 0 ;
+ n_col2 = n_col ;
+ n_row2 = n_row ;
+
+ /* === Kill empty columns =============================================== */
+
+ /* Put the empty columns at the end in their natural, so that LU */
+ /* factorization can proceed as far as possible. */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ deg = Col [c].length ;
+ if (deg == 0)
+ {
+ /* this is a empty column, kill and order it last */
+ Col [c].shared2.order = --n_col2 ;
+ KILL_PRINCIPAL_COL (c) ;
+ }
+ }
+ DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ;
+
+ /* === Kill dense columns =============================================== */
+
+ /* Put the dense columns at the end, in their natural order */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ /* skip any dead columns */
+ if (COL_IS_DEAD (c))
+ {
+ continue ;
+ }
+ deg = Col [c].length ;
+ if (deg > dense_col_count)
+ {
+ /* this is a dense column, kill and order it last */
+ Col [c].shared2.order = --n_col2 ;
+ /* decrement the row degrees */
+ cp = &A [Col [c].start] ;
+ cp_end = cp + Col [c].length ;
+ while (cp < cp_end)
+ {
+ Row [*cp++].shared1.degree-- ;
+ }
+ KILL_PRINCIPAL_COL (c) ;
+ }
+ }
+ DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ;
+
+ /* === Kill dense and empty rows ======================================== */
+
+ for (r = 0 ; r < n_row ; r++)
+ {
+ deg = Row [r].shared1.degree ;
+ assert (deg >= 0 && deg <= n_col) ;
+ if (deg > dense_row_count || deg == 0)
+ {
+ /* kill a dense or empty row */
+ KILL_ROW (r) ;
+ --n_row2 ;
+ }
+ else
+ {
+ /* keep track of max degree of remaining rows */
+ max_deg = MAX (max_deg, deg) ;
+ }
+ }
+ DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ;
+
+ /* === Compute initial column scores ==================================== */
+
+ /* At this point the row degrees are accurate. They reflect the number */
+ /* of "live" (non-dense) columns in each row. No empty rows exist. */
+ /* Some "live" columns may contain only dead rows, however. These are */
+ /* pruned in the code below. */
+
+ /* now find the initial matlab score for each column */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ /* skip dead column */
+ if (COL_IS_DEAD (c))
+ {
+ continue ;
+ }
+ score = 0 ;
+ cp = &A [Col [c].start] ;
+ new_cp = cp ;
+ cp_end = cp + Col [c].length ;
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ /* skip if dead */
+ if (ROW_IS_DEAD (row))
+ {
+ continue ;
+ }
+ /* compact the column */
+ *new_cp++ = row ;
+ /* add row's external degree */
+ score += Row [row].shared1.degree - 1 ;
+ /* guard against integer overflow */
+ score = MIN (score, n_col) ;
+ }
+ /* determine pruned column length */
+ col_length = (int) (new_cp - &A [Col [c].start]) ;
+ if (col_length == 0)
+ {
+ /* a newly-made null column (all rows in this col are "dense" */
+ /* and have already been killed) */
+ DEBUG0 (("Newly null killed: %d\n", c)) ;
+ Col [c].shared2.order = --n_col2 ;
+ KILL_PRINCIPAL_COL (c) ;
+ }
+ else
+ {
+ /* set column length and set score */
+ assert (score >= 0) ;
+ assert (score <= n_col) ;
+ Col [c].length = col_length ;
+ Col [c].shared2.score = score ;
+ }
+ }
+ DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ;
+
+ /* At this point, all empty rows and columns are dead. All live columns */
+ /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
+ /* yet). Rows may contain dead columns, but all live rows contain at */
+ /* least one live column. */
+
+#ifndef NDEBUG
+ debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
+#endif
+
+ /* === Initialize degree lists ========================================== */
+
+#ifndef NDEBUG
+ debug_count = 0 ;
+#endif
+
+ /* clear the hash buckets */
+ for (c = 0 ; c <= n_col ; c++)
+ {
+ head [c] = EMPTY ;
+ }
+ min_score = n_col ;
+ /* place in reverse order, so low column indices are at the front */
+ /* of the lists. This is to encourage natural tie-breaking */
+ for (c = n_col-1 ; c >= 0 ; c--)
+ {
+ /* only add principal columns to degree lists */
+ if (COL_IS_ALIVE (c))
+ {
+ DEBUG4 (("place %d score %d minscore %d ncol %d\n",
+ c, Col [c].shared2.score, min_score, n_col)) ;
+
+ /* === Add columns score to DList =============================== */
+
+ score = Col [c].shared2.score ;
+
+ assert (min_score >= 0) ;
+ assert (min_score <= n_col) ;
+ assert (score >= 0) ;
+ assert (score <= n_col) ;
+ assert (head [score] >= EMPTY) ;
+
+ /* now add this column to dList at proper score location */
+ next_col = head [score] ;
+ Col [c].shared3.prev = EMPTY ;
+ Col [c].shared4.degree_next = next_col ;
+
+ /* if there already was a column with the same score, set its */
+ /* previous pointer to this new column */
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = c ;
+ }
+ head [score] = c ;
+
+ /* see if this score is less than current min */
+ min_score = MIN (min_score, score) ;
+
+#ifndef NDEBUG
+ debug_count++ ;
+#endif
+ }
+ }
+
+#ifndef NDEBUG
+ DEBUG0 (("Live cols %d out of %d, non-princ: %d\n",
+ debug_count, n_col, n_col-debug_count)) ;
+ assert (debug_count == n_col2) ;
+ debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ;
+#endif
+
+ /* === Return number of remaining columns, and max row degree =========== */
+
+ *p_n_col2 = n_col2 ;
+ *p_n_row2 = n_row2 ;
+ *p_max_deg = max_deg ;
+}
+
+
+/* ========================================================================== */
+/* === find_ordering ======================================================== */
+/* ========================================================================== */
+
+/*
+ Order the principal columns of the supercolumn form of the matrix
+ (no supercolumns on input). Uses a minimum approximate column minimum
+ degree ordering method. Not user-callable.
+*/
+
+PRIVATE int find_ordering /* return the number of garbage collections */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows of A */
+ int n_col, /* number of columns of A */
+ int Alen, /* size of A, 2*nnz + elbow_room or larger */
+ RowInfo Row [], /* of size n_row+1 */
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* column form and row form of A */
+ int head [], /* of size n_col+1 */
+ int n_col2, /* Remaining columns to order */
+ int max_deg, /* Maximum row degree */
+ int pfree /* index of first free slot (2*nnz on entry) */
+)
+{
+ /* === Local variables ================================================== */
+
+ int k ; /* current pivot ordering step */
+ int pivot_col ; /* current pivot column */
+ int *cp ; /* a column pointer */
+ int *rp ; /* a row pointer */
+ int pivot_row ; /* current pivot row */
+ int *new_cp ; /* modified column pointer */
+ int *new_rp ; /* modified row pointer */
+ int pivot_row_start ; /* pointer to start of pivot row */
+ int pivot_row_degree ; /* # of columns in pivot row */
+ int pivot_row_length ; /* # of supercolumns in pivot row */
+ int pivot_col_score ; /* score of pivot column */
+ int needed_memory ; /* free space needed for pivot row */
+ int *cp_end ; /* pointer to the end of a column */
+ int *rp_end ; /* pointer to the end of a row */
+ int row ; /* a row index */
+ int col ; /* a column index */
+ int max_score ; /* maximum possible score */
+ int cur_score ; /* score of current column */
+ unsigned int hash ; /* hash value for supernode detection */
+ int head_column ; /* head of hash bucket */
+ int first_col ; /* first column in hash bucket */
+ int tag_mark ; /* marker value for mark array */
+ int row_mark ; /* Row [row].shared2.mark */
+ int set_difference ; /* set difference size of row with pivot row */
+ int min_score ; /* smallest column score */
+ int col_thickness ; /* "thickness" (# of columns in a supercol) */
+ int max_mark ; /* maximum value of tag_mark */
+ int pivot_col_thickness ; /* number of columns represented by pivot col */
+ int prev_col ; /* Used by Dlist operations. */
+ int next_col ; /* Used by Dlist operations. */
+ int ngarbage ; /* number of garbage collections performed */
+#ifndef NDEBUG
+ int debug_d ; /* debug loop counter */
+ int debug_step = 0 ; /* debug loop counter */
+#endif
+
+ /* === Initialization and clear mark ==================================== */
+
+ max_mark = INT_MAX - n_col ; /* INT_MAX defined in <limits.h> */
+ tag_mark = clear_mark (n_row, Row) ;
+ min_score = 0 ;
+ ngarbage = 0 ;
+ DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ;
+
+ /* === Order the columns ================================================ */
+
+ for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
+ {
+
+#ifndef NDEBUG
+ if (debug_step % 100 == 0)
+ {
+ DEBUG0 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+ }
+ else
+ {
+ DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+ }
+ debug_step++ ;
+ debug_deg_lists (n_row, n_col, Row, Col, head,
+ min_score, n_col2-k, max_deg) ;
+ debug_matrix (n_row, n_col, Row, Col, A) ;
+#endif
+
+ /* === Select pivot column, and order it ============================ */
+
+ /* make sure degree list isn't empty */
+ assert (min_score >= 0) ;
+ assert (min_score <= n_col) ;
+ assert (head [min_score] >= EMPTY) ;
+
+#ifndef NDEBUG
+ for (debug_d = 0 ; debug_d < min_score ; debug_d++)
+ {
+ assert (head [debug_d] == EMPTY) ;
+ }
+#endif
+
+ /* get pivot column from head of minimum degree list */
+ while (head [min_score] == EMPTY && min_score < n_col)
+ {
+ min_score++ ;
+ }
+ pivot_col = head [min_score] ;
+ assert (pivot_col >= 0 && pivot_col <= n_col) ;
+ next_col = Col [pivot_col].shared4.degree_next ;
+ head [min_score] = next_col ;
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = EMPTY ;
+ }
+
+ assert (COL_IS_ALIVE (pivot_col)) ;
+ DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
+
+ /* remember score for defrag check */
+ pivot_col_score = Col [pivot_col].shared2.score ;
+
+ /* the pivot column is the kth column in the pivot order */
+ Col [pivot_col].shared2.order = k ;
+
+ /* increment order count by column thickness */
+ pivot_col_thickness = Col [pivot_col].shared1.thickness ;
+ k += pivot_col_thickness ;
+ assert (pivot_col_thickness > 0) ;
+
+ /* === Garbage_collection, if necessary ============================= */
+
+ needed_memory = MIN (pivot_col_score, n_col - k) ;
+ if (pfree + needed_memory >= Alen)
+ {
+ pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
+ ngarbage++ ;
+ /* after garbage collection we will have enough */
+ assert (pfree + needed_memory < Alen) ;
+ /* garbage collection has wiped out the Row[].shared2.mark array */
+ tag_mark = clear_mark (n_row, Row) ;
+#ifndef NDEBUG
+ debug_matrix (n_row, n_col, Row, Col, A) ;
+#endif
+ }
+
+ /* === Compute pivot row pattern ==================================== */
+
+ /* get starting location for this new merged row */
+ pivot_row_start = pfree ;
+
+ /* initialize new row counts to zero */
+ pivot_row_degree = 0 ;
+
+ /* tag pivot column as having been visited so it isn't included */
+ /* in merged pivot row */
+ Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
+
+ /* pivot row is the union of all rows in the pivot column pattern */
+ cp = &A [Col [pivot_col].start] ;
+ cp_end = cp + Col [pivot_col].length ;
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
+ /* skip if row is dead */
+ if (ROW_IS_DEAD (row))
+ {
+ continue ;
+ }
+ rp = &A [Row [row].start] ;
+ rp_end = rp + Row [row].length ;
+ while (rp < rp_end)
+ {
+ /* get a column */
+ col = *rp++ ;
+ /* add the column, if alive and untagged */
+ col_thickness = Col [col].shared1.thickness ;
+ if (col_thickness > 0 && COL_IS_ALIVE (col))
+ {
+ /* tag column in pivot row */
+ Col [col].shared1.thickness = -col_thickness ;
+ assert (pfree < Alen) ;
+ /* place column in pivot row */
+ A [pfree++] = col ;
+ pivot_row_degree += col_thickness ;
+ }
+ }
+ }
+
+ /* clear tag on pivot column */
+ Col [pivot_col].shared1.thickness = pivot_col_thickness ;
+ max_deg = MAX (max_deg, pivot_row_degree) ;
+
+#ifndef NDEBUG
+ DEBUG3 (("check2\n")) ;
+ debug_mark (n_row, Row, tag_mark, max_mark) ;
+#endif
+
+ /* === Kill all rows used to construct pivot row ==================== */
+
+ /* also kill pivot row, temporarily */
+ cp = &A [Col [pivot_col].start] ;
+ cp_end = cp + Col [pivot_col].length ;
+ while (cp < cp_end)
+ {
+ /* may be killing an already dead row */
+ row = *cp++ ;
+ DEBUG2 (("Kill row in pivot col: %d\n", row)) ;
+ KILL_ROW (row) ;
+ }
+
+ /* === Select a row index to use as the new pivot row =============== */
+
+ pivot_row_length = pfree - pivot_row_start ;
+ if (pivot_row_length > 0)
+ {
+ /* pick the "pivot" row arbitrarily (first row in col) */
+ pivot_row = A [Col [pivot_col].start] ;
+ DEBUG2 (("Pivotal row is %d\n", pivot_row)) ;
+ }
+ else
+ {
+ /* there is no pivot row, since it is of zero length */
+ pivot_row = EMPTY ;
+ assert (pivot_row_length == 0) ;
+ }
+ assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+
+ /* === Approximate degree computation =============================== */
+
+ /* Here begins the computation of the approximate degree. The column */
+ /* score is the sum of the pivot row "length", plus the size of the */
+ /* set differences of each row in the column minus the pattern of the */
+ /* pivot row itself. The column ("thickness") itself is also */
+ /* excluded from the column score (we thus use an approximate */
+ /* external degree). */
+
+ /* The time taken by the following code (compute set differences, and */
+ /* add them up) is proportional to the size of the data structure */
+ /* being scanned - that is, the sum of the sizes of each column in */
+ /* the pivot row. Thus, the amortized time to compute a column score */
+ /* is proportional to the size of that column (where size, in this */
+ /* context, is the column "length", or the number of row indices */
+ /* in that column). The number of row indices in a column is */
+ /* monotonically non-decreasing, from the length of the original */
+ /* column on input to colamd. */
+
+ /* === Compute set differences ====================================== */
+
+ DEBUG1 (("** Computing set differences phase. **\n")) ;
+
+ /* pivot row is currently dead - it will be revived later. */
+
+ DEBUG2 (("Pivot row: ")) ;
+ /* for each column in pivot row */
+ rp = &A [pivot_row_start] ;
+ rp_end = rp + pivot_row_length ;
+ while (rp < rp_end)
+ {
+ col = *rp++ ;
+ assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+ DEBUG2 (("Col: %d\n", col)) ;
+
+ /* clear tags used to construct pivot row pattern */
+ col_thickness = -Col [col].shared1.thickness ;
+ assert (col_thickness > 0) ;
+ Col [col].shared1.thickness = col_thickness ;
+
+ /* === Remove column from degree list =========================== */
+
+ cur_score = Col [col].shared2.score ;
+ prev_col = Col [col].shared3.prev ;
+ next_col = Col [col].shared4.degree_next ;
+ assert (cur_score >= 0) ;
+ assert (cur_score <= n_col) ;
+ assert (cur_score >= EMPTY) ;
+ if (prev_col == EMPTY)
+ {
+ head [cur_score] = next_col ;
+ }
+ else
+ {
+ Col [prev_col].shared4.degree_next = next_col ;
+ }
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = prev_col ;
+ }
+
+ /* === Scan the column ========================================== */
+
+ cp = &A [Col [col].start] ;
+ cp_end = cp + Col [col].length ;
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ row_mark = Row [row].shared2.mark ;
+ /* skip if dead */
+ if (ROW_IS_MARKED_DEAD (row_mark))
+ {
+ continue ;
+ }
+ assert (row != pivot_row) ;
+ set_difference = row_mark - tag_mark ;
+ /* check if the row has been seen yet */
+ if (set_difference < 0)
+ {
+ assert (Row [row].shared1.degree <= max_deg) ;
+ set_difference = Row [row].shared1.degree ;
+ }
+ /* subtract column thickness from this row's set difference */
+ set_difference -= col_thickness ;
+ assert (set_difference >= 0) ;
+ /* absorb this row if the set difference becomes zero */
+ if (set_difference == 0)
+ {
+ DEBUG1 (("aggressive absorption. Row: %d\n", row)) ;
+ KILL_ROW (row) ;
+ }
+ else
+ {
+ /* save the new mark */
+ Row [row].shared2.mark = set_difference + tag_mark ;
+ }
+ }
+ }
+
+#ifndef NDEBUG
+ debug_deg_lists (n_row, n_col, Row, Col, head,
+ min_score, n_col2-k-pivot_row_degree, max_deg) ;
+#endif
+
+ /* === Add up set differences for each column ======================= */
+
+ DEBUG1 (("** Adding set differences phase. **\n")) ;
+
+ /* for each column in pivot row */
+ rp = &A [pivot_row_start] ;
+ rp_end = rp + pivot_row_length ;
+ while (rp < rp_end)
+ {
+ /* get a column */
+ col = *rp++ ;
+ assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+ hash = 0 ;
+ cur_score = 0 ;
+ cp = &A [Col [col].start] ;
+ /* compact the column */
+ new_cp = cp ;
+ cp_end = cp + Col [col].length ;
+
+ DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ;
+
+ while (cp < cp_end)
+ {
+ /* get a row */
+ row = *cp++ ;
+ assert(row >= 0 && row < n_row) ;
+ row_mark = Row [row].shared2.mark ;
+ /* skip if dead */
+ if (ROW_IS_MARKED_DEAD (row_mark))
+ {
+ continue ;
+ }
+ assert (row_mark > tag_mark) ;
+ /* compact the column */
+ *new_cp++ = row ;
+ /* compute hash function */
+ hash += row ;
+ /* add set difference */
+ cur_score += row_mark - tag_mark ;
+ /* integer overflow... */
+ cur_score = MIN (cur_score, n_col) ;
+ }
+
+ /* recompute the column's length */
+ Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
+
+ /* === Further mass elimination ================================= */
+
+ if (Col [col].length == 0)
+ {
+ DEBUG1 (("further mass elimination. Col: %d\n", col)) ;
+ /* nothing left but the pivot row in this column */
+ KILL_PRINCIPAL_COL (col) ;
+ pivot_row_degree -= Col [col].shared1.thickness ;
+ assert (pivot_row_degree >= 0) ;
+ /* order it */
+ Col [col].shared2.order = k ;
+ /* increment order count by column thickness */
+ k += Col [col].shared1.thickness ;
+ }
+ else
+ {
+ /* === Prepare for supercolumn detection ==================== */
+
+ DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ;
+
+ /* save score so far */
+ Col [col].shared2.score = cur_score ;
+
+ /* add column to hash table, for supercolumn detection */
+ hash %= n_col + 1 ;
+
+ DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+ assert (hash <= n_col) ;
+
+ head_column = head [hash] ;
+ if (head_column > EMPTY)
+ {
+ /* degree list "hash" is non-empty, use prev (shared3) of */
+ /* first column in degree list as head of hash bucket */
+ first_col = Col [head_column].shared3.headhash ;
+ Col [head_column].shared3.headhash = col ;
+ }
+ else
+ {
+ /* degree list "hash" is empty, use head as hash bucket */
+ first_col = - (head_column + 2) ;
+ head [hash] = - (col + 2) ;
+ }
+ Col [col].shared4.hash_next = first_col ;
+
+ /* save hash function in Col [col].shared3.hash */
+ Col [col].shared3.hash = (int) hash ;
+ assert (COL_IS_ALIVE (col)) ;
+ }
+ }
+
+ /* The approximate external column degree is now computed. */
+
+ /* === Supercolumn detection ======================================== */
+
+ DEBUG1 (("** Supercolumn detection phase. **\n")) ;
+
+ detect_super_cols (
+#ifndef NDEBUG
+ n_col, Row,
+#endif
+ Col, A, head, pivot_row_start, pivot_row_length) ;
+
+ /* === Kill the pivotal column ====================================== */
+
+ KILL_PRINCIPAL_COL (pivot_col) ;
+
+ /* === Clear mark =================================================== */
+
+ tag_mark += (max_deg + 1) ;
+ if (tag_mark >= max_mark)
+ {
+ DEBUG1 (("clearing tag_mark\n")) ;
+ tag_mark = clear_mark (n_row, Row) ;
+ }
+#ifndef NDEBUG
+ DEBUG3 (("check3\n")) ;
+ debug_mark (n_row, Row, tag_mark, max_mark) ;
+#endif
+
+ /* === Finalize the new pivot row, and column scores ================ */
+
+ DEBUG1 (("** Finalize scores phase. **\n")) ;
+
+ /* for each column in pivot row */
+ rp = &A [pivot_row_start] ;
+ /* compact the pivot row */
+ new_rp = rp ;
+ rp_end = rp + pivot_row_length ;
+ while (rp < rp_end)
+ {
+ col = *rp++ ;
+ /* skip dead columns */
+ if (COL_IS_DEAD (col))
+ {
+ continue ;
+ }
+ *new_rp++ = col ;
+ /* add new pivot row to column */
+ A [Col [col].start + (Col [col].length++)] = pivot_row ;
+
+ /* retrieve score so far and add on pivot row's degree. */
+ /* (we wait until here for this in case the pivot */
+ /* row's degree was reduced due to mass elimination). */
+ cur_score = Col [col].shared2.score + pivot_row_degree ;
+
+ /* calculate the max possible score as the number of */
+ /* external columns minus the 'k' value minus the */
+ /* columns thickness */
+ max_score = n_col - k - Col [col].shared1.thickness ;
+
+ /* make the score the external degree of the union-of-rows */
+ cur_score -= Col [col].shared1.thickness ;
+
+ /* make sure score is less or equal than the max score */
+ cur_score = MIN (cur_score, max_score) ;
+ assert (cur_score >= 0) ;
+
+ /* store updated score */
+ Col [col].shared2.score = cur_score ;
+
+ /* === Place column back in degree list ========================= */
+
+ assert (min_score >= 0) ;
+ assert (min_score <= n_col) ;
+ assert (cur_score >= 0) ;
+ assert (cur_score <= n_col) ;
+ assert (head [cur_score] >= EMPTY) ;
+ next_col = head [cur_score] ;
+ Col [col].shared4.degree_next = next_col ;
+ Col [col].shared3.prev = EMPTY ;
+ if (next_col != EMPTY)
+ {
+ Col [next_col].shared3.prev = col ;
+ }
+ head [cur_score] = col ;
+
+ /* see if this score is less than current min */
+ min_score = MIN (min_score, cur_score) ;
+
+ }
+
+#ifndef NDEBUG
+ debug_deg_lists (n_row, n_col, Row, Col, head,
+ min_score, n_col2-k, max_deg) ;
+#endif
+
+ /* === Resurrect the new pivot row ================================== */
+
+ if (pivot_row_degree > 0)
+ {
+ /* update pivot row length to reflect any cols that were killed */
+ /* during super-col detection and mass elimination */
+ Row [pivot_row].start = pivot_row_start ;
+ Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
+ Row [pivot_row].shared1.degree = pivot_row_degree ;
+ Row [pivot_row].shared2.mark = 0 ;
+ /* pivot row is no longer dead */
+ }
+ }
+
+ /* === All principal columns have now been ordered ====================== */
+
+ return (ngarbage) ;
+}
+
+
+/* ========================================================================== */
+/* === order_children ======================================================= */
+/* ========================================================================== */
+
+/*
+ The find_ordering routine has ordered all of the principal columns (the
+ representatives of the supercolumns). The non-principal columns have not
+ yet been ordered. This routine orders those columns by walking up the
+ parent tree (a column is a child of the column which absorbed it). The
+ final permutation vector is then placed in p [0 ... n_col-1], with p [0]
+ being the first column, and p [n_col-1] being the last. It doesn't look
+ like it at first glance, but be assured that this routine takes time linear
+ in the number of columns. Although not immediately obvious, the time
+ taken by this routine is O (n_col), that is, linear in the number of
+ columns. Not user-callable.
+*/
+
+PRIVATE void order_children
+(
+ /* === Parameters ======================================================= */
+
+ int n_col, /* number of columns of A */
+ ColInfo Col [], /* of size n_col+1 */
+ int p [] /* p [0 ... n_col-1] is the column permutation*/
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ; /* loop counter for all columns */
+ int c ; /* column index */
+ int parent ; /* index of column's parent */
+ int order ; /* column's order */
+
+ /* === Order each non-principal column ================================== */
+
+ for (i = 0 ; i < n_col ; i++)
+ {
+ /* find an un-ordered non-principal column */
+ assert (COL_IS_DEAD (i)) ;
+ if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
+ {
+ parent = i ;
+ /* once found, find its principal parent */
+ do
+ {
+ parent = Col [parent].shared1.parent ;
+ } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
+
+ /* now, order all un-ordered non-principal columns along path */
+ /* to this parent. collapse tree at the same time */
+ c = i ;
+ /* get order of parent */
+ order = Col [parent].shared2.order ;
+
+ do
+ {
+ assert (Col [c].shared2.order == EMPTY) ;
+
+ /* order this column */
+ Col [c].shared2.order = order++ ;
+ /* collaps tree */
+ Col [c].shared1.parent = parent ;
+
+ /* get immediate parent of this column */
+ c = Col [c].shared1.parent ;
+
+ /* continue until we hit an ordered column. There are */
+ /* guarranteed not to be anymore unordered columns */
+ /* above an ordered column */
+ } while (Col [c].shared2.order == EMPTY) ;
+
+ /* re-order the super_col parent to largest order for this group */
+ Col [parent].shared2.order = order ;
+ }
+ }
+
+ /* === Generate the permutation ========================================= */
+
+ for (c = 0 ; c < n_col ; c++)
+ {
+ p [Col [c].shared2.order] = c ;
+ }
+}
+
+
+/* ========================================================================== */
+/* === detect_super_cols ==================================================== */
+/* ========================================================================== */
+
+/*
+ Detects supercolumns by finding matches between columns in the hash buckets.
+ Check amongst columns in the set A [row_start ... row_start + row_length-1].
+ The columns under consideration are currently *not* in the degree lists,
+ and have already been placed in the hash buckets.
+
+ The hash bucket for columns whose hash function is equal to h is stored
+ as follows:
+
+ if head [h] is >= 0, then head [h] contains a degree list, so:
+
+ head [h] is the first column in degree bucket h.
+ Col [head [h]].headhash gives the first column in hash bucket h.
+
+ otherwise, the degree list is empty, and:
+
+ -(head [h] + 2) is the first column in hash bucket h.
+
+ For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
+ column" pointer. Col [c].shared3.hash is used instead as the hash number
+ for that column. The value of Col [c].shared4.hash_next is the next column
+ in the same hash bucket.
+
+ Assuming no, or "few" hash collisions, the time taken by this routine is
+ linear in the sum of the sizes (lengths) of each column whose score has
+ just been computed in the approximate degree computation.
+ Not user-callable.
+*/
+
+PRIVATE void detect_super_cols
+(
+ /* === Parameters ======================================================= */
+
+#ifndef NDEBUG
+ /* these two parameters are only needed when debugging is enabled: */
+ int n_col, /* number of columns of A */
+ RowInfo Row [], /* of size n_row+1 */
+#endif
+ ColInfo Col [], /* of size n_col+1 */
+ int A [], /* row indices of A */
+ int head [], /* head of degree lists and hash buckets */
+ int row_start, /* pointer to set of columns to check */
+ int row_length /* number of columns to check */
+)
+{
+ /* === Local variables ================================================== */
+
+ int hash ; /* hash # for a column */
+ int *rp ; /* pointer to a row */
+ int c ; /* a column index */
+ int super_c ; /* column index of the column to absorb into */
+ int *cp1 ; /* column pointer for column super_c */
+ int *cp2 ; /* column pointer for column c */
+ int length ; /* length of column super_c */
+ int prev_c ; /* column preceding c in hash bucket */
+ int i ; /* loop counter */
+ int *rp_end ; /* pointer to the end of the row */
+ int col ; /* a column index in the row to check */
+ int head_column ; /* first column in hash bucket or degree list */
+ int first_col ; /* first column in hash bucket */
+
+ /* === Consider each column in the row ================================== */
+
+ rp = &A [row_start] ;
+ rp_end = rp + row_length ;
+ while (rp < rp_end)
+ {
+ col = *rp++ ;
+ if (COL_IS_DEAD (col))
+ {
+ continue ;
+ }
+
+ /* get hash number for this column */
+ hash = Col [col].shared3.hash ;
+ assert (hash <= n_col) ;
+
+ /* === Get the first column in this hash bucket ===================== */
+
+ head_column = head [hash] ;
+ if (head_column > EMPTY)
+ {
+ first_col = Col [head_column].shared3.headhash ;
+ }
+ else
+ {
+ first_col = - (head_column + 2) ;
+ }
+
+ /* === Consider each column in the hash bucket ====================== */
+
+ for (super_c = first_col ; super_c != EMPTY ;
+ super_c = Col [super_c].shared4.hash_next)
+ {
+ assert (COL_IS_ALIVE (super_c)) ;
+ assert (Col [super_c].shared3.hash == hash) ;
+ length = Col [super_c].length ;
+
+ /* prev_c is the column preceding column c in the hash bucket */
+ prev_c = super_c ;
+
+ /* === Compare super_c with all columns after it ================ */
+
+ for (c = Col [super_c].shared4.hash_next ;
+ c != EMPTY ; c = Col [c].shared4.hash_next)
+ {
+ assert (c != super_c) ;
+ assert (COL_IS_ALIVE (c)) ;
+ assert (Col [c].shared3.hash == hash) ;
+
+ /* not identical if lengths or scores are different */
+ if (Col [c].length != length ||
+ Col [c].shared2.score != Col [super_c].shared2.score)
+ {
+ prev_c = c ;
+ continue ;
+ }
+
+ /* compare the two columns */
+ cp1 = &A [Col [super_c].start] ;
+ cp2 = &A [Col [c].start] ;
+
+ for (i = 0 ; i < length ; i++)
+ {
+ /* the columns are "clean" (no dead rows) */
+ assert (ROW_IS_ALIVE (*cp1)) ;
+ assert (ROW_IS_ALIVE (*cp2)) ;
+ /* row indices will same order for both supercols, */
+ /* no gather scatter nessasary */
+ if (*cp1++ != *cp2++)
+ {
+ break ;
+ }
+ }
+
+ /* the two columns are different if the for-loop "broke" */
+ if (i != length)
+ {
+ prev_c = c ;
+ continue ;
+ }
+
+ /* === Got it! two columns are identical =================== */
+
+ assert (Col [c].shared2.score == Col [super_c].shared2.score) ;
+
+ Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
+ Col [c].shared1.parent = super_c ;
+ KILL_NON_PRINCIPAL_COL (c) ;
+ /* order c later, in order_children() */
+ Col [c].shared2.order = EMPTY ;
+ /* remove c from hash bucket */
+ Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
+ }
+ }
+
+ /* === Empty this hash bucket ======================================= */
+
+ if (head_column > EMPTY)
+ {
+ /* corresponding degree list "hash" is not empty */
+ Col [head_column].shared3.headhash = EMPTY ;
+ }
+ else
+ {
+ /* corresponding degree list "hash" is empty */
+ head [hash] = EMPTY ;
+ }
+ }
+}
+
+
+/* ========================================================================== */
+/* === garbage_collection =================================================== */
+/* ========================================================================== */
+
+/*
+ Defragments and compacts columns and rows in the workspace A. Used when
+ all avaliable memory has been used while performing row merging. Returns
+ the index of the first free position in A, after garbage collection. The
+ time taken by this routine is linear is the size of the array A, which is
+ itself linear in the number of nonzeros in the input matrix.
+ Not user-callable.
+*/
+
+PRIVATE int garbage_collection /* returns the new value of pfree */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows */
+ int n_col, /* number of columns */
+ RowInfo Row [], /* row info */
+ ColInfo Col [], /* column info */
+ int A [], /* A [0 ... Alen-1] holds the matrix */
+ int *pfree /* &A [0] ... pfree is in use */
+)
+{
+ /* === Local variables ================================================== */
+
+ int *psrc ; /* source pointer */
+ int *pdest ; /* destination pointer */
+ int j ; /* counter */
+ int r ; /* a row index */
+ int c ; /* a column index */
+ int length ; /* length of a row or column */
+
+#ifndef NDEBUG
+ int debug_rows ;
+ DEBUG0 (("Defrag..\n")) ;
+ for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ;
+ debug_rows = 0 ;
+#endif
+
+ /* === Defragment the columns =========================================== */
+
+ pdest = &A[0] ;
+ for (c = 0 ; c < n_col ; c++)
+ {
+ if (COL_IS_ALIVE (c))
+ {
+ psrc = &A [Col [c].start] ;
+
+ /* move and compact the column */
+ assert (pdest <= psrc) ;
+ Col [c].start = (int) (pdest - &A [0]) ;
+ length = Col [c].length ;
+ for (j = 0 ; j < length ; j++)
+ {
+ r = *psrc++ ;
+ if (ROW_IS_ALIVE (r))
+ {
+ *pdest++ = r ;
+ }
+ }
+ Col [c].length = (int) (pdest - &A [Col [c].start]) ;
+ }
+ }
+
+ /* === Prepare to defragment the rows =================================== */
+
+ for (r = 0 ; r < n_row ; r++)
+ {
+ if (ROW_IS_ALIVE (r))
+ {
+ if (Row [r].length == 0)
+ {
+ /* this row is of zero length. cannot compact it, so kill it */
+ DEBUG0 (("Defrag row kill\n")) ;
+ KILL_ROW (r) ;
+ }
+ else
+ {
+ /* save first column index in Row [r].shared2.first_column */
+ psrc = &A [Row [r].start] ;
+ Row [r].shared2.first_column = *psrc ;
+ assert (ROW_IS_ALIVE (r)) ;
+ /* flag the start of the row with the one's complement of row */
+ *psrc = ONES_COMPLEMENT (r) ;
+#ifndef NDEBUG
+ debug_rows++ ;
+#endif
+ }
+ }
+ }
+
+ /* === Defragment the rows ============================================== */
+
+ psrc = pdest ;
+ while (psrc < pfree)
+ {
+ /* find a negative number ... the start of a row */
+ if (*psrc++ < 0)
+ {
+ psrc-- ;
+ /* get the row index */
+ r = ONES_COMPLEMENT (*psrc) ;
+ assert (r >= 0 && r < n_row) ;
+ /* restore first column index */
+ *psrc = Row [r].shared2.first_column ;
+ assert (ROW_IS_ALIVE (r)) ;
+
+ /* move and compact the row */
+ assert (pdest <= psrc) ;
+ Row [r].start = (int) (pdest - &A [0]) ;
+ length = Row [r].length ;
+ for (j = 0 ; j < length ; j++)
+ {
+ c = *psrc++ ;
+ if (COL_IS_ALIVE (c))
+ {
+ *pdest++ = c ;
+ }
+ }
+ Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+#ifndef NDEBUG
+ debug_rows-- ;
+#endif
+ }
+ }
+ /* ensure we found all the rows */
+ assert (debug_rows == 0) ;
+
+ /* === Return the new value of pfree ==================================== */
+
+ return ((int) (pdest - &A [0])) ;
+}
+
+
+/* ========================================================================== */
+/* === clear_mark =========================================================== */
+/* ========================================================================== */
+
+/*
+ Clears the Row [].shared2.mark array, and returns the new tag_mark.
+ Return value is the new tag_mark. Not user-callable.
+*/
+
+PRIVATE int clear_mark /* return the new value for tag_mark */
+(
+ /* === Parameters ======================================================= */
+
+ int n_row, /* number of rows in A */
+ RowInfo Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
+)
+{
+ /* === Local variables ================================================== */
+
+ int r ;
+
+ DEBUG0 (("Clear mark\n")) ;
+ for (r = 0 ; r < n_row ; r++)
+ {
+ if (ROW_IS_ALIVE (r))
+ {
+ Row [r].shared2.mark = 0 ;
+ }
+ }
+ return (1) ;
+}
+
+
+/* ========================================================================== */
+/* === debugging routines =================================================== */
+/* ========================================================================== */
+
+/* When debugging is disabled, the remainder of this file is ignored. */
+
+#ifndef NDEBUG
+
+
+/* ========================================================================== */
+/* === debug_structures ===================================================== */
+/* ========================================================================== */
+
+/*
+ At this point, all empty rows and columns are dead. All live columns
+ are "clean" (containing no dead rows) and simplicial (no supercolumns
+ yet). Rows may contain dead columns, but all live rows contain at
+ least one live column.
+*/
+
+PRIVATE void debug_structures
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A [],
+ int n_col2
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ;
+ int c ;
+ int *cp ;
+ int *cp_end ;
+ int len ;
+ int score ;
+ int r ;
+ int *rp ;
+ int *rp_end ;
+ int deg ;
+
+ /* === Check A, Row, and Col ============================================ */
+
+ for (c = 0 ; c < n_col ; c++)
+ {
+ if (COL_IS_ALIVE (c))
+ {
+ len = Col [c].length ;
+ score = Col [c].shared2.score ;
+ DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
+ assert (len > 0) ;
+ assert (score >= 0) ;
+ assert (Col [c].shared1.thickness == 1) ;
+ cp = &A [Col [c].start] ;
+ cp_end = cp + len ;
+ while (cp < cp_end)
+ {
+ r = *cp++ ;
+ assert (ROW_IS_ALIVE (r)) ;
+ }
+ }
+ else
+ {
+ i = Col [c].shared2.order ;
+ assert (i >= n_col2 && i < n_col) ;
+ }
+ }
+
+ for (r = 0 ; r < n_row ; r++)
+ {
+ if (ROW_IS_ALIVE (r))
+ {
+ i = 0 ;
+ len = Row [r].length ;
+ deg = Row [r].shared1.degree ;
+ assert (len > 0) ;
+ assert (deg > 0) ;
+ rp = &A [Row [r].start] ;
+ rp_end = rp + len ;
+ while (rp < rp_end)
+ {
+ c = *rp++ ;
+ if (COL_IS_ALIVE (c))
+ {
+ i++ ;
+ }
+ }
+ assert (i > 0) ;
+ }
+ }
+}
+
+
+/* ========================================================================== */
+/* === debug_deg_lists ====================================================== */
+/* ========================================================================== */
+
+/*
+ Prints the contents of the degree lists. Counts the number of columns
+ in the degree list and compares it to the total it should have. Also
+ checks the row degrees.
+*/
+
+PRIVATE void debug_deg_lists
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int head [],
+ int min_score,
+ int should,
+ int max_deg
+)
+{
+ /* === Local variables ================================================== */
+
+ int deg ;
+ int col ;
+ int have ;
+ int row ;
+
+ /* === Check the degree lists =========================================== */
+
+ if (n_col > 10000 && debug_colamd <= 0)
+ {
+ return ;
+ }
+ have = 0 ;
+ DEBUG4 (("Degree lists: %d\n", min_score)) ;
+ for (deg = 0 ; deg <= n_col ; deg++)
+ {
+ col = head [deg] ;
+ if (col == EMPTY)
+ {
+ continue ;
+ }
+ DEBUG4 (("%d:", deg)) ;
+ while (col != EMPTY)
+ {
+ DEBUG4 ((" %d", col)) ;
+ have += Col [col].shared1.thickness ;
+ assert (COL_IS_ALIVE (col)) ;
+ col = Col [col].shared4.degree_next ;
+ }
+ DEBUG4 (("\n")) ;
+ }
+ DEBUG4 (("should %d have %d\n", should, have)) ;
+ assert (should == have) ;
+
+ /* === Check the row degrees ============================================ */
+
+ if (n_row > 10000 && debug_colamd <= 0)
+ {
+ return ;
+ }
+ for (row = 0 ; row < n_row ; row++)
+ {
+ if (ROW_IS_ALIVE (row))
+ {
+ assert (Row [row].shared1.degree <= max_deg) ;
+ }
+ }
+}
+
+
+/* ========================================================================== */
+/* === debug_mark =========================================================== */
+/* ========================================================================== */
+
+/*
+ Ensures that the tag_mark is less that the maximum and also ensures that
+ each entry in the mark array is less than the tag mark.
+*/
+
+PRIVATE void debug_mark
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ RowInfo Row [],
+ int tag_mark,
+ int max_mark
+)
+{
+ /* === Local variables ================================================== */
+
+ int r ;
+
+ /* === Check the Row marks ============================================== */
+
+ assert (tag_mark > 0 && tag_mark <= max_mark) ;
+ if (n_row > 10000 && debug_colamd <= 0)
+ {
+ return ;
+ }
+ for (r = 0 ; r < n_row ; r++)
+ {
+ assert (Row [r].shared2.mark < tag_mark) ;
+ }
+}
+
+
+/* ========================================================================== */
+/* === debug_matrix ========================================================= */
+/* ========================================================================== */
+
+/*
+ Prints out the contents of the columns and the rows.
+*/
+
+PRIVATE void debug_matrix
+(
+ /* === Parameters ======================================================= */
+
+ int n_row,
+ int n_col,
+ RowInfo Row [],
+ ColInfo Col [],
+ int A []
+)
+{
+ /* === Local variables ================================================== */
+
+ int r ;
+ int c ;
+ int *rp ;
+ int *rp_end ;
+ int *cp ;
+ int *cp_end ;
+
+ /* === Dump the rows and columns of the matrix ========================== */
+
+ if (debug_colamd < 3)
+ {
+ return ;
+ }
+ DEBUG3 (("DUMP MATRIX:\n")) ;
+ for (r = 0 ; r < n_row ; r++)
+ {
+ DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ;
+ if (ROW_IS_DEAD (r))
+ {
+ continue ;
+ }
+ DEBUG3 (("start %d length %d degree %d\n",
+ Row [r].start, Row [r].length, Row [r].shared1.degree)) ;
+ rp = &A [Row [r].start] ;
+ rp_end = rp + Row [r].length ;
+ while (rp < rp_end)
+ {
+ c = *rp++ ;
+ DEBUG3 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ;
+ }
+ }
+
+ for (c = 0 ; c < n_col ; c++)
+ {
+ DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ;
+ if (COL_IS_DEAD (c))
+ {
+ continue ;
+ }
+ DEBUG3 (("start %d length %d shared1 %d shared2 %d\n",
+ Col [c].start, Col [c].length,
+ Col [c].shared1.thickness, Col [c].shared2.score)) ;
+ cp = &A [Col [c].start] ;
+ cp_end = cp + Col [c].length ;
+ while (cp < cp_end)
+ {
+ r = *cp++ ;
+ DEBUG3 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ;
+ }
+ }
+}
+
+#endif
+
diff --git a/intern/opennl/superlu/colamd.h b/intern/opennl/superlu/colamd.h
new file mode 100644
index 00000000000..00783983b27
--- /dev/null
+++ b/intern/opennl/superlu/colamd.h
@@ -0,0 +1,67 @@
+/* ========================================================================== */
+/* === colamd prototypes and definitions ==================================== */
+/* ========================================================================== */
+
+/*
+ This is the colamd include file,
+
+ http://www.cise.ufl.edu/~davis/colamd/colamd.h
+
+ for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+
+ http://www.cise.ufl.edu/~davis/colamd/
+
+ See those files for a description of colamd and symamd, and for the
+ copyright notice, which also applies to this file.
+
+ August 3, 1998. Version 1.0.
+*/
+
+/* ========================================================================== */
+/* === Definitions ========================================================== */
+/* ========================================================================== */
+
+/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */
+#define COLAMD_KNOBS 20
+
+/* number of output statistics. Only A [0..2] are currently used. */
+#define COLAMD_STATS 20
+
+/* knobs [0] and A [0]: dense row knob and output statistic. */
+#define COLAMD_DENSE_ROW 0
+
+/* knobs [1] and A [1]: dense column knob and output statistic. */
+#define COLAMD_DENSE_COL 1
+
+/* A [2]: memory defragmentation count output statistic */
+#define COLAMD_DEFRAG_COUNT 2
+
+/* A [3]: whether or not the input columns were jumbled or had duplicates */
+#define COLAMD_JUMBLED_COLS 3
+
+/* ========================================================================== */
+/* === Prototypes of user-callable routines ================================= */
+/* ========================================================================== */
+
+int colamd_recommended /* returns recommended value of Alen */
+(
+ int nnz, /* nonzeros in A */
+ int n_row, /* number of rows in A */
+ int n_col /* number of columns in A */
+) ;
+
+void colamd_set_defaults /* sets default parameters */
+( /* knobs argument is modified on output */
+ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
+) ;
+
+int colamd /* returns TRUE if successful, FALSE otherwise*/
+( /* A and p arguments are modified on output */
+ int n_row, /* number of rows in A */
+ int n_col, /* number of columns in A */
+ int Alen, /* size of the array A */
+ int A [], /* row indices of A, of size Alen */
+ int p [], /* column pointers of A, of size n_col+1 */
+ double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
+) ;
+
diff --git a/intern/opennl/superlu/get_perm_c.c b/intern/opennl/superlu/get_perm_c.c
new file mode 100644
index 00000000000..9cdf5a876bf
--- /dev/null
+++ b/intern/opennl/superlu/get_perm_c.c
@@ -0,0 +1,453 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+
+#include "ssp_defs.h"
+#include "colamd.h"
+
+extern int genmmd_(int *, int *, int *, int *, int *, int *, int *,
+ int *, int *, int *, int *, int *);
+
+void
+get_colamd(
+ const int m, /* number of rows in matrix A. */
+ const int n, /* number of columns in matrix A. */
+ const int nnz,/* number of nonzeros in matrix A. */
+ int *colptr, /* column pointer of size n+1 for matrix A. */
+ int *rowind, /* row indices of size nz for matrix A. */
+ int *perm_c /* out - the column permutation vector. */
+ )
+{
+ int Alen, *A, i, info, *p;
+ double *knobs;
+
+ Alen = colamd_recommended(nnz, m, n);
+
+ if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
+ ABORT("Malloc fails for knobs");
+ colamd_set_defaults(knobs);
+
+ if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
+ ABORT("Malloc fails for A[]");
+ if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
+ ABORT("Malloc fails for p[]");
+ for (i = 0; i <= n; ++i) p[i] = colptr[i];
+ for (i = 0; i < nnz; ++i) A[i] = rowind[i];
+ info = colamd(m, n, Alen, A, p, knobs);
+ if ( info == FALSE ) ABORT("COLAMD failed");
+
+ for (i = 0; i < n; ++i) perm_c[p[i]] = i;
+
+ SUPERLU_FREE(knobs);
+ SUPERLU_FREE(A);
+ SUPERLU_FREE(p);
+}
+
+void
+getata(
+ const int m, /* number of rows in matrix A. */
+ const int n, /* number of columns in matrix A. */
+ const int nz, /* number of nonzeros in matrix A */
+ int *colptr, /* column pointer of size n+1 for matrix A. */
+ int *rowind, /* row indices of size nz for matrix A. */
+ int *atanz, /* out - on exit, returns the actual number of
+ nonzeros in matrix A'*A. */
+ int **ata_colptr, /* out - size n+1 */
+ int **ata_rowind /* out - size *atanz */
+ )
+/*
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'*A. A is an m-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'*A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (ata_colptr, ata_rowind).
+ *
+ * This routine is modified from GETATA routine by Tim Davis.
+ * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
+ * i.e., the sum of the square of the row counts.
+ *
+ * Questions
+ * =========
+ * o Do I need to withhold the *dense* rows?
+ * o How do I know the number of nonzeros in A'*A?
+ *
+ */
+{
+ register int i, j, k, col, num_nz, ti, trow;
+ int *marker, *b_colptr, *b_rowind;
+ int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
+
+ if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) )
+ ABORT("SUPERLU_MALLOC fails for marker[]");
+ if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) )
+ ABORT("SUPERLU_MALLOC t_colptr[]");
+ if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) )
+ ABORT("SUPERLU_MALLOC fails for t_rowind[]");
+
+
+ /* Get counts of each column of T, and set up column pointers */
+ for (i = 0; i < m; ++i) marker[i] = 0;
+ for (j = 0; j < n; ++j) {
+ for (i = colptr[j]; i < colptr[j+1]; ++i)
+ ++marker[rowind[i]];
+ }
+ t_colptr[0] = 0;
+ for (i = 0; i < m; ++i) {
+ t_colptr[i+1] = t_colptr[i] + marker[i];
+ marker[i] = t_colptr[i];
+ }
+
+ /* Transpose the matrix from A to T */
+ for (j = 0; j < n; ++j)
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ col = rowind[i];
+ t_rowind[marker[col]] = j;
+ ++marker[col];
+ }
+
+
+ /* ----------------------------------------------------------------
+ compute B = T * A, where column j of B is:
+
+ Struct (B_*j) = UNION ( Struct (T_*k) )
+ A_kj != 0
+
+ do not include the diagonal entry
+
+ ( Partition A as: A = (A_*1, ..., A_*n)
+ Then B = T * A = (T * A_*1, ..., T * A_*n), where
+ T * A_*j = (T_*1, ..., T_*m) * A_*j. )
+ ---------------------------------------------------------------- */
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* First pass determines number of nonzeros in B */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ /* A_kj is nonzero, add pattern of column T_*k to B_*j */
+ k = rowind[i];
+ for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
+ trow = t_rowind[ti];
+ if ( marker[trow] != j ) {
+ marker[trow] = j;
+ num_nz++;
+ }
+ }
+ }
+ }
+ *atanz = num_nz;
+
+ /* Allocate storage for A'*A */
+ if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
+ if ( *atanz ) {
+ if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
+ }
+ b_colptr = *ata_colptr; /* aliasing */
+ b_rowind = *ata_rowind;
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* Compute each column of B, one at a time */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ b_colptr[j] = num_nz;
+
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ /* A_kj is nonzero, add pattern of column T_*k to B_*j */
+ k = rowind[i];
+ for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
+ trow = t_rowind[ti];
+ if ( marker[trow] != j ) {
+ marker[trow] = j;
+ b_rowind[num_nz++] = trow;
+ }
+ }
+ }
+ }
+ b_colptr[n] = num_nz;
+
+ SUPERLU_FREE(marker);
+ SUPERLU_FREE(t_colptr);
+ SUPERLU_FREE(t_rowind);
+}
+
+
+void
+at_plus_a(
+ const int n, /* number of columns in matrix A. */
+ const int nz, /* number of nonzeros in matrix A */
+ int *colptr, /* column pointer of size n+1 for matrix A. */
+ int *rowind, /* row indices of size nz for matrix A. */
+ int *bnz, /* out - on exit, returns the actual number of
+ nonzeros in matrix A'*A. */
+ int **b_colptr, /* out - size n+1 */
+ int **b_rowind /* out - size *bnz */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Form the structure of A'+A. A is an n-by-n matrix in column oriented
+ * format represented by (colptr, rowind). The output A'+A is in column
+ * oriented format (symmetrically, also row oriented), represented by
+ * (b_colptr, b_rowind).
+ *
+ */
+ register int i, j, k, col, num_nz;
+ int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
+ int *marker;
+
+ if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for marker[]");
+ if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for t_colptr[]");
+ if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails t_rowind[]");
+
+
+ /* Get counts of each column of T, and set up column pointers */
+ for (i = 0; i < n; ++i) marker[i] = 0;
+ for (j = 0; j < n; ++j) {
+ for (i = colptr[j]; i < colptr[j+1]; ++i)
+ ++marker[rowind[i]];
+ }
+ t_colptr[0] = 0;
+ for (i = 0; i < n; ++i) {
+ t_colptr[i+1] = t_colptr[i] + marker[i];
+ marker[i] = t_colptr[i];
+ }
+
+ /* Transpose the matrix from A to T */
+ for (j = 0; j < n; ++j)
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ col = rowind[i];
+ t_rowind[marker[col]] = j;
+ ++marker[col];
+ }
+
+
+ /* ----------------------------------------------------------------
+ compute B = A + T, where column j of B is:
+
+ Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)
+
+ do not include the diagonal entry
+ ---------------------------------------------------------------- */
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* First pass determines number of nonzeros in B */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ /* Add pattern of column A_*k to B_*j */
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ k = rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ ++num_nz;
+ }
+ }
+
+ /* Add pattern of column T_*k to B_*j */
+ for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
+ k = t_rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ ++num_nz;
+ }
+ }
+ }
+ *bnz = num_nz;
+
+ /* Allocate storage for A+A' */
+ if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for b_colptr[]");
+ if ( *bnz) {
+ if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
+ ABORT("SUPERLU_MALLOC fails for b_rowind[]");
+ }
+
+ /* Zero the diagonal flag */
+ for (i = 0; i < n; ++i) marker[i] = -1;
+
+ /* Compute each column of B, one at a time */
+ num_nz = 0;
+ for (j = 0; j < n; ++j) {
+ (*b_colptr)[j] = num_nz;
+
+ /* Flag the diagonal so it's not included in the B matrix */
+ marker[j] = j;
+
+ /* Add pattern of column A_*k to B_*j */
+ for (i = colptr[j]; i < colptr[j+1]; ++i) {
+ k = rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ (*b_rowind)[num_nz++] = k;
+ }
+ }
+
+ /* Add pattern of column T_*k to B_*j */
+ for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
+ k = t_rowind[i];
+ if ( marker[k] != j ) {
+ marker[k] = j;
+ (*b_rowind)[num_nz++] = k;
+ }
+ }
+ }
+ (*b_colptr)[n] = num_nz;
+
+ SUPERLU_FREE(marker);
+ SUPERLU_FREE(t_colptr);
+ SUPERLU_FREE(t_rowind);
+}
+
+void
+get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
+/*
+ * Purpose
+ * =======
+ *
+ * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
+ * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
+ * or using approximate minimum degree column ordering by Davis et. al.
+ * The LU factorization of A*Pc tends to have less fill than the LU
+ * factorization of A.
+ *
+ * Arguments
+ * =========
+ *
+ * ispec (input) int
+ * Specifies the type of column ordering to reduce fill:
+ * = 1: minimum degree on the structure of A^T * A
+ * = 2: minimum degree on the structure of A^T + A
+ * = 3: approximate minimum degree for unsymmetric matrices
+ * If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
+ *
+ * A (input) SuperMatrix*
+ * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ * of the linear equations is A->nrow. Currently, the type of A
+ * can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
+ * more general A can be handled.
+ *
+ * perm_c (output) int*
+ * Column permutation vector of size A->ncol, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ *
+ */
+{
+ NCformat *Astore = A->Store;
+ int m, n, bnz, *b_colptr, i;
+ int delta, maxint, nofsub, *invp;
+ int *b_rowind, *dhead, *qsize, *llist, *marker;
+ double t, SuperLU_timer_();
+
+ m = A->nrow;
+ n = A->ncol;
+
+ t = SuperLU_timer_();
+ switch ( ispec ) {
+ case 0: /* Natural ordering */
+ for (i = 0; i < n; ++i) perm_c[i] = i;
+#if ( PRNTlevel>=1 )
+ printf("Use natural column ordering.\n");
+#endif
+ return;
+ case 1: /* Minimum degree ordering on A'*A */
+ getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
+ &bnz, &b_colptr, &b_rowind);
+#if ( PRNTlevel>=1 )
+ printf("Use minimum degree ordering on A'*A.\n");
+#endif
+ t = SuperLU_timer_() - t;
+ /*printf("Form A'*A time = %8.3f\n", t);*/
+ break;
+ case 2: /* Minimum degree ordering on A'+A */
+ if ( m != n ) ABORT("Matrix is not square");
+ at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
+ &bnz, &b_colptr, &b_rowind);
+#if ( PRNTlevel>=1 )
+ printf("Use minimum degree ordering on A'+A.\n");
+#endif
+ t = SuperLU_timer_() - t;
+ /*printf("Form A'+A time = %8.3f\n", t);*/
+ break;
+ case 3: /* Approximate minimum degree column ordering. */
+ get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
+ perm_c);
+#if ( PRNTlevel>=1 )
+ printf(".. Use approximate minimum degree column ordering.\n");
+#endif
+ return;
+ default:
+ ABORT("Invalid ISPEC");
+ }
+
+ if ( bnz != 0 ) {
+ t = SuperLU_timer_();
+
+ /* Initialize and allocate storage for GENMMD. */
+ delta = 1; /* DELTA is a parameter to allow the choice of nodes
+ whose degree <= min-degree + DELTA. */
+ maxint = 2147483647; /* 2**31 - 1 */
+ invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+ if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
+ dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+ if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
+ qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
+ if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
+ llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
+ marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
+
+ /* Transform adjacency list into 1-based indexing required by GENMMD.*/
+ for (i = 0; i <= n; ++i) ++b_colptr[i];
+ for (i = 0; i < bnz; ++i) ++b_rowind[i];
+
+ genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead,
+ qsize, llist, marker, &maxint, &nofsub);
+
+ /* Transform perm_c into 0-based indexing. */
+ for (i = 0; i < n; ++i) --perm_c[i];
+
+ SUPERLU_FREE(b_colptr);
+ SUPERLU_FREE(b_rowind);
+ SUPERLU_FREE(invp);
+ SUPERLU_FREE(dhead);
+ SUPERLU_FREE(qsize);
+ SUPERLU_FREE(llist);
+ SUPERLU_FREE(marker);
+
+ t = SuperLU_timer_() - t;
+ /* printf("call GENMMD time = %8.3f\n", t);*/
+
+ } else { /* Empty adjacency structure */
+ for (i = 0; i < n; ++i) perm_c[i] = i;
+ }
+
+}
diff --git a/intern/opennl/superlu/heap_relax_snode.c b/intern/opennl/superlu/heap_relax_snode.c
new file mode 100644
index 00000000000..86971f59571
--- /dev/null
+++ b/intern/opennl/superlu/heap_relax_snode.c
@@ -0,0 +1,116 @@
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+heap_relax_snode (
+ const int n,
+ int *et, /* column elimination tree */
+ const int relax_columns, /* max no of columns allowed in a
+ relaxed snode */
+ int *descendants, /* no of descendants of each node
+ in the etree */
+ int *relax_end /* last column in a supernode */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * relax_snode() - Identify the initial relaxed supernodes, assuming that
+ * the matrix has been reordered according to the postorder of the etree.
+ *
+ */
+ register int i, j, k, l, parent;
+ register int snode_start; /* beginning of a snode */
+ int *et_save, *post, *inv_post, *iwork;
+ int nsuper_et = 0, nsuper_et_post = 0;
+
+ /* The etree may not be postordered, but is heap ordered. */
+
+ iwork = (int*) intMalloc(3*n+2);
+ if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
+ inv_post = iwork + n+1;
+ et_save = inv_post + n+1;
+
+ /* Post order etree */
+ post = (int *) TreePostorder(n, et);
+ for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
+
+ /* Renumber etree in postorder */
+ for (i = 0; i < n; ++i) {
+ iwork[post[i]] = post[et[i]];
+ et_save[i] = et[i]; /* Save the original etree */
+ }
+ for (i = 0; i < n; ++i) et[i] = iwork[i];
+
+ /* Compute the number of descendants of each node in the etree */
+ ifill (relax_end, n, EMPTY);
+ for (j = 0; j < n; j++) descendants[j] = 0;
+ for (j = 0; j < n; j++) {
+ parent = et[j];
+ if ( parent != n ) /* not the dummy root */
+ descendants[parent] += descendants[j] + 1;
+ }
+
+ /* Identify the relaxed supernodes by postorder traversal of the etree. */
+ for (j = 0; j < n; ) {
+ parent = et[j];
+ snode_start = j;
+ while ( parent != n && descendants[parent] < relax_columns ) {
+ j = parent;
+ parent = et[j];
+ }
+ /* Found a supernode in postordered etree; j is the last column. */
+ ++nsuper_et_post;
+ k = n;
+ for (i = snode_start; i <= j; ++i)
+ k = SUPERLU_MIN(k, inv_post[i]);
+ l = inv_post[j];
+ if ( (l - k) == (j - snode_start) ) {
+ /* It's also a supernode in the original etree */
+ relax_end[k] = l; /* Last column is recorded */
+ ++nsuper_et;
+ } else {
+ for (i = snode_start; i <= j; ++i) {
+ l = inv_post[i];
+ if ( descendants[i] == 0 ) relax_end[l] = l;
+ }
+ }
+ j++;
+ /* Search for a new leaf */
+ while ( descendants[j] != 0 && j < n ) j++;
+ }
+
+#if ( PRNTlevel>=1 )
+ printf(".. heap_snode_relax:\n"
+ "\tNo of relaxed snodes in postordered etree:\t%d\n"
+ "\tNo of relaxed snodes in original etree:\t%d\n",
+ nsuper_et_post, nsuper_et);
+#endif
+
+ /* Recover the original etree */
+ for (i = 0; i < n; ++i) et[i] = et_save[i];
+
+ SUPERLU_FREE(post);
+ SUPERLU_FREE(iwork);
+}
+
+
diff --git a/intern/opennl/superlu/lsame.c b/intern/opennl/superlu/lsame.c
new file mode 100644
index 00000000000..29f27d38fa9
--- /dev/null
+++ b/intern/opennl/superlu/lsame.c
@@ -0,0 +1,70 @@
+int lsame_(char *ca, char *cb)
+{
+/* -- LAPACK auxiliary routine (version 2.0) --
+ Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+ Courant Institute, Argonne National Lab, and Rice University
+ September 30, 1994
+
+ Purpose
+ =======
+
+ LSAME returns .TRUE. if CA is the same letter as CB regardless of case.
+
+ Arguments
+ =========
+
+ CA (input) CHARACTER*1
+ CB (input) CHARACTER*1
+ CA and CB specify the single characters to be compared.
+
+ =====================================================================
+*/
+
+ /* System generated locals */
+ int ret_val;
+
+ /* Local variables */
+ int inta, intb, zcode;
+
+ ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+ if (ret_val) {
+ return ret_val;
+ }
+
+ /* Now test for equivalence if both characters are alphabetic. */
+
+ zcode = 'Z';
+
+ /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+ machines, on which ICHAR returns a value with bit 8 set.
+ ICHAR('A') on Prime machines returns 193 which is the same as
+ ICHAR('A') on an EBCDIC machine. */
+
+ inta = *(unsigned char *)ca;
+ intb = *(unsigned char *)cb;
+
+ if (zcode == 90 || zcode == 122) {
+ /* ASCII is assumed - ZCODE is the ASCII code of either lower or
+ upper case 'Z'. */
+ if (inta >= 97 && inta <= 122) inta += -32;
+ if (intb >= 97 && intb <= 122) intb += -32;
+
+ } else if (zcode == 233 || zcode == 169) {
+ /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+ upper case 'Z'. */
+ if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta
+ >= 162 && inta <= 169))
+ inta += 64;
+ if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb
+ >= 162 && intb <= 169))
+ intb += 64;
+ } else if (zcode == 218 || zcode == 250) {
+ /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+ plus 128 of either lower or upper case 'Z'. */
+ if (inta >= 225 && inta <= 250) inta += -32;
+ if (intb >= 225 && intb <= 250) intb += -32;
+ }
+ ret_val = inta == intb;
+ return ret_val;
+
+} /* lsame_ */
diff --git a/intern/opennl/superlu/memory.c b/intern/opennl/superlu/memory.c
new file mode 100644
index 00000000000..54d863ea9e9
--- /dev/null
+++ b/intern/opennl/superlu/memory.c
@@ -0,0 +1,207 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/** Precision-independent memory-related routines.
+ (Shared by [sdcz]memory.c) **/
+
+#include "ssp_defs.h"
+
+
+#if ( DEBUGlevel>=1 ) /* Debug malloc/free. */
+int superlu_malloc_total = 0;
+
+#define PAD_FACTOR 2
+#define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */
+
+void *superlu_malloc(size_t size)
+{
+ char *buf;
+
+ buf = (char *) malloc(size + DWORD);
+ if ( !buf ) {
+ printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n",
+ superlu_malloc_total*1e-6, size);
+ ABORT("superlu_malloc: out of memory");
+ }
+
+ ((int_t *) buf)[0] = size;
+#if 0
+ superlu_malloc_total += size + DWORD;
+#else
+ superlu_malloc_total += size;
+#endif
+ return (void *) (buf + DWORD);
+}
+
+void superlu_free(void *addr)
+{
+ char *p = ((char *) addr) - DWORD;
+
+ if ( !addr )
+ ABORT("superlu_free: tried to free NULL pointer");
+
+ if ( !p )
+ ABORT("superlu_free: tried to free NULL+DWORD pointer");
+
+ {
+ int_t n = ((int_t *) p)[0];
+
+ if ( !n )
+ ABORT("superlu_free: tried to free a freed pointer");
+ *((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */
+#if 0
+ superlu_malloc_total -= (n + DWORD);
+#else
+ superlu_malloc_total -= n;
+#endif
+
+ if ( superlu_malloc_total < 0 )
+ ABORT("superlu_malloc_total went negative!");
+
+ /*free (addr);*/
+ free (p);
+ }
+
+}
+
+#else /* production mode */
+
+void *superlu_malloc(size_t size)
+{
+ void *buf;
+ buf = (void *) malloc(size);
+ return (buf);
+}
+
+void superlu_free(void *addr)
+{
+ free (addr);
+}
+
+#endif
+
+
+/*
+ * Set up pointers for integer working arrays.
+ */
+void
+SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep,
+ int **parent, int **xplore, int **repfnz, int **panel_lsub,
+ int **xprune, int **marker)
+{
+ *segrep = iworkptr;
+ *parent = iworkptr + m;
+ *xplore = *parent + m;
+ *repfnz = *xplore + m;
+ *panel_lsub = *repfnz + panel_size * m;
+ *xprune = *panel_lsub + panel_size * m;
+ *marker = *xprune + n;
+ ifill (*repfnz, m * panel_size, EMPTY);
+ ifill (*panel_lsub, m * panel_size, EMPTY);
+}
+
+
+void
+copy_mem_int(int howmany, void *old, void *new)
+{
+ register int i;
+ int *iold = old;
+ int *inew = new;
+ for (i = 0; i < howmany; i++) inew[i] = iold[i];
+}
+
+
+void
+user_bcopy(char *src, char *dest, int bytes)
+{
+ char *s_ptr, *d_ptr;
+
+ s_ptr = src + bytes - 1;
+ d_ptr = dest + bytes - 1;
+ for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
+}
+
+
+
+int *intMalloc(int n)
+{
+ int *buf;
+ buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC fails for buf in intMalloc()");
+ }
+ return (buf);
+}
+
+int *intCalloc(int n)
+{
+ int *buf;
+ register int i;
+ buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC fails for buf in intCalloc()");
+ }
+ for (i = 0; i < n; ++i) buf[i] = 0;
+ return (buf);
+}
+
+
+
+#if 0
+check_expanders()
+{
+ int p;
+ printf("Check expanders:\n");
+ for (p = 0; p < NO_MEMTYPE; p++) {
+ printf("type %d, size %d, mem %d\n",
+ p, expanders[p].size, (int)expanders[p].mem);
+ }
+
+ return 0;
+}
+
+
+StackInfo()
+{
+ printf("Stack: size %d, used %d, top1 %d, top2 %d\n",
+ stack.size, stack.used, stack.top1, stack.top2);
+ return 0;
+}
+
+
+
+PrintStack(char *msg, GlobalLU_t *Glu)
+{
+ int i;
+ int *xlsub, *lsub, *xusub, *usub;
+
+ xlsub = Glu->xlsub;
+ lsub = Glu->lsub;
+ xusub = Glu->xusub;
+ usub = Glu->usub;
+
+ printf("%s\n", msg);
+
+/* printf("\nUCOL: ");
+ for (i = 0; i < xusub[ndim]; ++i)
+ printf("%f ", ucol[i]);
+
+ printf("\nLSUB: ");
+ for (i = 0; i < xlsub[ndim]; ++i)
+ printf("%d ", lsub[i]);
+
+ printf("\nUSUB: ");
+ for (i = 0; i < xusub[ndim]; ++i)
+ printf("%d ", usub[i]);
+
+ printf("\n");*/
+ return 0;
+}
+#endif
+
+
+
diff --git a/intern/opennl/superlu/mmd.c b/intern/opennl/superlu/mmd.c
new file mode 100644
index 00000000000..05f26ce0995
--- /dev/null
+++ b/intern/opennl/superlu/mmd.c
@@ -0,0 +1,1012 @@
+
+typedef int shortint;
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* **** GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE **** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE */
+/* ALGORITHM. IT MAKES USE OF THE IMPLICIT REPRESENTATION */
+/* OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE */
+/* NOTION OF INDISTINGUISHABLE NODES. IT ALSO IMPLEMENTS */
+/* THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM */
+/* EXTERNAL DEGREE. */
+/* --------------------------------------------- */
+/* CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE */
+/* DESTROYED. */
+/* --------------------------------------------- */
+
+/* INPUT PARAMETERS - */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE. */
+/* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
+/* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER */
+/* (ANY SMALLER ESTIMATE WILL DO) FOR MARKING */
+/* NODES. */
+
+/* OUTPUT PARAMETERS - */
+/* PERM - THE MINIMUM DEGREE ORDERING. */
+/* INVP - THE INVERSE OF PERM. */
+/* NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO */
+/* SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME. */
+
+/* WORKING PARAMETERS - */
+/* DHEAD - VECTOR FOR HEAD OF DEGREE LISTS. */
+/* INVP - USED TEMPORARILY FOR DEGREE FORWARD LINK. */
+/* PERM - USED TEMPORARILY FOR DEGREE BACKWARD LINK. */
+/* QSIZE - VECTOR FOR SIZE OF SUPERNODES. */
+/* LLIST - VECTOR FOR TEMPORARY LINKED LISTS. */
+/* MARKER - A TEMPORARY MARKER VECTOR. */
+
+/* PROGRAM SUBROUTINES - */
+/* MMDELM, MMDINT, MMDNUM, MMDUPD. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int genmmd_(int *neqns, int *xadj, shortint *adjncy,
+ shortint *invp, shortint *perm, int *delta, shortint *dhead,
+ shortint *qsize, shortint *llist, shortint *marker, int *maxint,
+ int *nofsub)
+{
+ /* System generated locals */
+ int i__1;
+
+ /* Local variables */
+ static int mdeg, ehead, i, mdlmt, mdnode;
+ extern /* Subroutine */ int mmdelm_(int *, int *, shortint *,
+ shortint *, shortint *, shortint *, shortint *, shortint *,
+ shortint *, int *, int *), mmdupd_(int *, int *,
+ int *, shortint *, int *, int *, shortint *, shortint
+ *, shortint *, shortint *, shortint *, shortint *, int *,
+ int *), mmdint_(int *, int *, shortint *, shortint *,
+ shortint *, shortint *, shortint *, shortint *, shortint *),
+ mmdnum_(int *, shortint *, shortint *, shortint *);
+ static int nextmd, tag, num;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dhead;
+ --perm;
+ --invp;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ if (*neqns <= 0) {
+ return 0;
+ }
+
+/* ------------------------------------------------ */
+/* INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM. */
+/* ------------------------------------------------ */
+ *nofsub = 0;
+ mmdint_(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
+ qsize[1], &llist[1], &marker[1]);
+
+/* ---------------------------------------------- */
+/* NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1. */
+/* ---------------------------------------------- */
+ num = 1;
+
+/* ----------------------------- */
+/* ELIMINATE ALL ISOLATED NODES. */
+/* ----------------------------- */
+ nextmd = dhead[1];
+L100:
+ if (nextmd <= 0) {
+ goto L200;
+ }
+ mdnode = nextmd;
+ nextmd = invp[mdnode];
+ marker[mdnode] = *maxint;
+ invp[mdnode] = -num;
+ ++num;
+ goto L100;
+
+L200:
+/* ---------------------------------------- */
+/* SEARCH FOR NODE OF THE MINIMUM DEGREE. */
+/* MDEG IS THE CURRENT MINIMUM DEGREE; */
+/* TAG IS USED TO FACILITATE MARKING NODES. */
+/* ---------------------------------------- */
+ if (num > *neqns) {
+ goto L1000;
+ }
+ tag = 1;
+ dhead[1] = 0;
+ mdeg = 2;
+L300:
+ if (dhead[mdeg] > 0) {
+ goto L400;
+ }
+ ++mdeg;
+ goto L300;
+L400:
+/* ------------------------------------------------- */
+/* USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS */
+/* WHEN A DEGREE UPDATE IS TO BE PERFORMED. */
+/* ------------------------------------------------- */
+ mdlmt = mdeg + *delta;
+ ehead = 0;
+
+L500:
+ mdnode = dhead[mdeg];
+ if (mdnode > 0) {
+ goto L600;
+ }
+ ++mdeg;
+ if (mdeg > mdlmt) {
+ goto L900;
+ }
+ goto L500;
+L600:
+/* ---------------------------------------- */
+/* REMOVE MDNODE FROM THE DEGREE STRUCTURE. */
+/* ---------------------------------------- */
+ nextmd = invp[mdnode];
+ dhead[mdeg] = nextmd;
+ if (nextmd > 0) {
+ perm[nextmd] = -mdeg;
+ }
+ invp[mdnode] = -num;
+ *nofsub = *nofsub + mdeg + qsize[mdnode] - 2;
+ if (num + qsize[mdnode] > *neqns) {
+ goto L1000;
+ }
+/* ---------------------------------------------- */
+/* ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH */
+/* TRANSFORMATION. RESET TAG VALUE IF NECESSARY. */
+/* ---------------------------------------------- */
+ ++tag;
+ if (tag < *maxint) {
+ goto L800;
+ }
+ tag = 1;
+ i__1 = *neqns;
+ for (i = 1; i <= i__1; ++i) {
+ if (marker[i] < *maxint) {
+ marker[i] = 0;
+ }
+/* L700: */
+ }
+L800:
+ mmdelm_(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
+ qsize[1], &llist[1], &marker[1], maxint, &tag);
+ num += qsize[mdnode];
+ llist[mdnode] = ehead;
+ ehead = mdnode;
+ if (*delta >= 0) {
+ goto L500;
+ }
+L900:
+/* ------------------------------------------- */
+/* UPDATE DEGREES OF THE NODES INVOLVED IN THE */
+/* MINIMUM DEGREE NODES ELIMINATION. */
+/* ------------------------------------------- */
+ if (num > *neqns) {
+ goto L1000;
+ }
+ mmdupd_(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], &
+ invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag)
+ ;
+ goto L300;
+
+L1000:
+ mmdnum_(neqns, &perm[1], &invp[1], &qsize[1]);
+ return 0;
+
+} /* genmmd_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* *** MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION *** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE */
+/* MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE */
+/* ALGORITHM. */
+
+/* INPUT PARAMETERS - */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
+
+/* OUTPUT PARAMETERS - */
+/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/* QSIZE - SIZE OF SUPERNODE (INITIALIZED TO ONE). */
+/* LLIST - LINKED LIST. */
+/* MARKER - MARKER VECTOR. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdint_(int *neqns, int *xadj, shortint *adjncy,
+ shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize,
+ shortint *llist, shortint *marker)
+{
+ /* System generated locals */
+ int i__1;
+
+ /* Local variables */
+ static int ndeg, node, fnode;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dbakw;
+ --dforw;
+ --dhead;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ dhead[node] = 0;
+ qsize[node] = 1;
+ marker[node] = 0;
+ llist[node] = 0;
+/* L100: */
+ }
+/* ------------------------------------------ */
+/* INITIALIZE THE DEGREE DOUBLY LINKED LISTS. */
+/* ------------------------------------------ */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ ndeg = xadj[node + 1] - xadj[node] + 1;
+ fnode = dhead[ndeg];
+ dforw[node] = fnode;
+ dhead[ndeg] = node;
+ if (fnode > 0) {
+ dbakw[fnode] = node;
+ }
+ dbakw[node] = -ndeg;
+/* L200: */
+ }
+ return 0;
+
+} /* mmdint_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ** MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION *** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF */
+/* MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH */
+/* IS STORED IN THE QUOTIENT GRAPH FORMAT. IT ALSO */
+/* TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE */
+/* ELIMINATION GRAPH. */
+
+/* INPUT PARAMETERS - */
+/* MDNODE - NODE OF MINIMUM DEGREE. */
+/* MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT) */
+/* INT. */
+/* TAG - TAG VALUE. */
+
+/* UPDATED PARAMETERS - */
+/* (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE. */
+/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/* QSIZE - SIZE OF SUPERNODE. */
+/* MARKER - MARKER VECTOR. */
+/* LLIST - TEMPORARY LINKED LIST OF ELIMINATED NABORS. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdelm_(int *mdnode, int *xadj, shortint *adjncy,
+ shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize,
+ shortint *llist, shortint *marker, int *maxint, int *tag)
+{
+ /* System generated locals */
+ int i__1, i__2;
+
+ /* Local variables */
+ static int node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr,
+ istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+/* ----------------------------------------------- */
+/* FIND REACHABLE SET AND PLACE IN DATA STRUCTURE. */
+/* ----------------------------------------------- */
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dbakw;
+ --dforw;
+ --dhead;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ marker[*mdnode] = *tag;
+ istrt = xadj[*mdnode];
+ istop = xadj[*mdnode + 1] - 1;
+/* ------------------------------------------------------- */
+/* ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED */
+/* NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION */
+/* FOR THE NEXT REACHABLE NODE. */
+/* ------------------------------------------------------- */
+ elmnt = 0;
+ rloc = istrt;
+ rlmt = istop;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ nabor = adjncy[i];
+ if (nabor == 0) {
+ goto L300;
+ }
+ if (marker[nabor] >= *tag) {
+ goto L200;
+ }
+ marker[nabor] = *tag;
+ if (dforw[nabor] < 0) {
+ goto L100;
+ }
+ adjncy[rloc] = nabor;
+ ++rloc;
+ goto L200;
+L100:
+ llist[nabor] = elmnt;
+ elmnt = nabor;
+L200:
+ ;
+ }
+L300:
+/* ----------------------------------------------------- */
+/* MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS. */
+/* ----------------------------------------------------- */
+ if (elmnt <= 0) {
+ goto L1000;
+ }
+ adjncy[rlmt] = -elmnt;
+ link = elmnt;
+L400:
+ jstrt = xadj[link];
+ jstop = xadj[link + 1] - 1;
+ i__1 = jstop;
+ for (j = jstrt; j <= i__1; ++j) {
+ node = adjncy[j];
+ link = -node;
+ if (node < 0) {
+ goto L400;
+ } else if (node == 0) {
+ goto L900;
+ } else {
+ goto L500;
+ }
+L500:
+ if (marker[node] >= *tag || dforw[node] < 0) {
+ goto L800;
+ }
+ marker[node] = *tag;
+/* --------------------------------- */
+/* USE STORAGE FROM ELIMINATED NODES */
+/* IF NECESSARY. */
+/* --------------------------------- */
+L600:
+ if (rloc < rlmt) {
+ goto L700;
+ }
+ link = -adjncy[rlmt];
+ rloc = xadj[link];
+ rlmt = xadj[link + 1] - 1;
+ goto L600;
+L700:
+ adjncy[rloc] = node;
+ ++rloc;
+L800:
+ ;
+ }
+L900:
+ elmnt = llist[elmnt];
+ goto L300;
+L1000:
+ if (rloc <= rlmt) {
+ adjncy[rloc] = 0;
+ }
+/* -------------------------------------------------------- */
+/* FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ... */
+/* -------------------------------------------------------- */
+ link = *mdnode;
+L1100:
+ istrt = xadj[link];
+ istop = xadj[link + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ rnode = adjncy[i];
+ link = -rnode;
+ if (rnode < 0) {
+ goto L1100;
+ } else if (rnode == 0) {
+ goto L1800;
+ } else {
+ goto L1200;
+ }
+L1200:
+/* -------------------------------------------- */
+/* IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */
+/* -------------------------------------------- */
+ pvnode = dbakw[rnode];
+ if (pvnode == 0 || pvnode == -(*maxint)) {
+ goto L1300;
+ }
+/* ------------------------------------- */
+/* THEN REMOVE RNODE FROM THE STRUCTURE. */
+/* ------------------------------------- */
+ nxnode = dforw[rnode];
+ if (nxnode > 0) {
+ dbakw[nxnode] = pvnode;
+ }
+ if (pvnode > 0) {
+ dforw[pvnode] = nxnode;
+ }
+ npv = -pvnode;
+ if (pvnode < 0) {
+ dhead[npv] = nxnode;
+ }
+L1300:
+/* ---------------------------------------- */
+/* PURGE INACTIVE QUOTIENT NABORS OF RNODE. */
+/* ---------------------------------------- */
+ jstrt = xadj[rnode];
+ jstop = xadj[rnode + 1] - 1;
+ xqnbr = jstrt;
+ i__2 = jstop;
+ for (j = jstrt; j <= i__2; ++j) {
+ nabor = adjncy[j];
+ if (nabor == 0) {
+ goto L1500;
+ }
+ if (marker[nabor] >= *tag) {
+ goto L1400;
+ }
+ adjncy[xqnbr] = nabor;
+ ++xqnbr;
+L1400:
+ ;
+ }
+L1500:
+/* ---------------------------------------- */
+/* IF NO ACTIVE NABOR AFTER THE PURGING ... */
+/* ---------------------------------------- */
+ nqnbrs = xqnbr - jstrt;
+ if (nqnbrs > 0) {
+ goto L1600;
+ }
+/* ----------------------------- */
+/* THEN MERGE RNODE WITH MDNODE. */
+/* ----------------------------- */
+ qsize[*mdnode] += qsize[rnode];
+ qsize[rnode] = 0;
+ marker[rnode] = *maxint;
+ dforw[rnode] = -(*mdnode);
+ dbakw[rnode] = -(*maxint);
+ goto L1700;
+L1600:
+/* -------------------------------------- */
+/* ELSE FLAG RNODE FOR DEGREE UPDATE, AND */
+/* ADD MDNODE AS A NABOR OF RNODE. */
+/* -------------------------------------- */
+ dforw[rnode] = nqnbrs + 1;
+ dbakw[rnode] = 0;
+ adjncy[xqnbr] = *mdnode;
+ ++xqnbr;
+ if (xqnbr <= jstop) {
+ adjncy[xqnbr] = 0;
+ }
+
+L1700:
+ ;
+ }
+L1800:
+ return 0;
+
+} /* mmdelm_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ***** MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE ***** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES */
+/* AFTER A MULTIPLE ELIMINATION STEP. */
+
+/* INPUT PARAMETERS - */
+/* EHEAD - THE BEGINNING OF THE LIST OF ELIMINATED */
+/* NODES (I.E., NEWLY FORMED ELEMENTS). */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
+/* DELTA - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
+/* MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) */
+/* INTEGER. */
+
+/* UPDATED PARAMETERS - */
+/* MDEG - NEW MINIMUM DEGREE AFTER DEGREE UPDATE. */
+/* (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
+/* QSIZE - SIZE OF SUPERNODE. */
+/* LLIST - WORKING LINKED LIST. */
+/* MARKER - MARKER VECTOR FOR DEGREE UPDATE. */
+/* TAG - TAG VALUE. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdupd_(int *ehead, int *neqns, int *xadj,
+ shortint *adjncy, int *delta, int *mdeg, shortint *dhead,
+ shortint *dforw, shortint *dbakw, shortint *qsize, shortint *llist,
+ shortint *marker, int *maxint, int *tag)
+{
+ /* System generated locals */
+ int i__1, i__2;
+
+ /* Local variables */
+ static int node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt,
+ istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --marker;
+ --llist;
+ --qsize;
+ --dbakw;
+ --dforw;
+ --dhead;
+ --adjncy;
+ --xadj;
+
+ /* Function Body */
+ mdeg0 = *mdeg + *delta;
+ elmnt = *ehead;
+L100:
+/* ------------------------------------------------------- */
+/* FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING. */
+/* (RESET TAG VALUE IF NECESSARY.) */
+/* ------------------------------------------------------- */
+ if (elmnt <= 0) {
+ return 0;
+ }
+ mtag = *tag + mdeg0;
+ if (mtag < *maxint) {
+ goto L300;
+ }
+ *tag = 1;
+ i__1 = *neqns;
+ for (i = 1; i <= i__1; ++i) {
+ if (marker[i] < *maxint) {
+ marker[i] = 0;
+ }
+/* L200: */
+ }
+ mtag = *tag + mdeg0;
+L300:
+/* --------------------------------------------- */
+/* CREATE TWO LINKED LISTS FROM NODES ASSOCIATED */
+/* WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN */
+/* ADJACENCY STRUCTURE, AND THE OTHER WITH MORE */
+/* THAN TWO NABORS (QXHEAD). ALSO COMPUTE DEG0, */
+/* NUMBER OF NODES IN THIS ELEMENT. */
+/* --------------------------------------------- */
+ q2head = 0;
+ qxhead = 0;
+ deg0 = 0;
+ link = elmnt;
+L400:
+ istrt = xadj[link];
+ istop = xadj[link + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ enode = adjncy[i];
+ link = -enode;
+ if (enode < 0) {
+ goto L400;
+ } else if (enode == 0) {
+ goto L800;
+ } else {
+ goto L500;
+ }
+
+L500:
+ if (qsize[enode] == 0) {
+ goto L700;
+ }
+ deg0 += qsize[enode];
+ marker[enode] = mtag;
+/* ---------------------------------- */
+/* IF ENODE REQUIRES A DEGREE UPDATE, */
+/* THEN DO THE FOLLOWING. */
+/* ---------------------------------- */
+ if (dbakw[enode] != 0) {
+ goto L700;
+ }
+/* ---------------------------------------
+*/
+/* PLACE EITHER IN QXHEAD OR Q2HEAD LISTS.
+*/
+/* ---------------------------------------
+*/
+ if (dforw[enode] == 2) {
+ goto L600;
+ }
+ llist[enode] = qxhead;
+ qxhead = enode;
+ goto L700;
+L600:
+ llist[enode] = q2head;
+ q2head = enode;
+L700:
+ ;
+ }
+L800:
+/* -------------------------------------------- */
+/* FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING. */
+/* -------------------------------------------- */
+ enode = q2head;
+ iq2 = 1;
+L900:
+ if (enode <= 0) {
+ goto L1500;
+ }
+ if (dbakw[enode] != 0) {
+ goto L2200;
+ }
+ ++(*tag);
+ deg = deg0;
+/* ------------------------------------------ */
+/* IDENTIFY THE OTHER ADJACENT ELEMENT NABOR. */
+/* ------------------------------------------ */
+ istrt = xadj[enode];
+ nabor = adjncy[istrt];
+ if (nabor == elmnt) {
+ nabor = adjncy[istrt + 1];
+ }
+/* ------------------------------------------------ */
+/* IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT. */
+/* ------------------------------------------------ */
+ link = nabor;
+ if (dforw[nabor] < 0) {
+ goto L1000;
+ }
+ deg += qsize[nabor];
+ goto L2100;
+L1000:
+/* -------------------------------------------- */
+/* OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */
+/* DO THE FOLLOWING. */
+/* -------------------------------------------- */
+ istrt = xadj[link];
+ istop = xadj[link + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ node = adjncy[i];
+ link = -node;
+ if (node == enode) {
+ goto L1400;
+ }
+ if (node < 0) {
+ goto L1000;
+ } else if (node == 0) {
+ goto L2100;
+ } else {
+ goto L1100;
+ }
+
+L1100:
+ if (qsize[node] == 0) {
+ goto L1400;
+ }
+ if (marker[node] >= *tag) {
+ goto L1200;
+ }
+/* -----------------------------------
+-- */
+/* CASE WHEN NODE IS NOT YET CONSIDERED
+. */
+/* -----------------------------------
+-- */
+ marker[node] = *tag;
+ deg += qsize[node];
+ goto L1400;
+L1200:
+/* ----------------------------------------
+ */
+/* CASE WHEN NODE IS INDISTINGUISHABLE FROM
+ */
+/* ENODE. MERGE THEM INTO A NEW SUPERNODE.
+ */
+/* ----------------------------------------
+ */
+ if (dbakw[node] != 0) {
+ goto L1400;
+ }
+ if (dforw[node] != 2) {
+ goto L1300;
+ }
+ qsize[enode] += qsize[node];
+ qsize[node] = 0;
+ marker[node] = *maxint;
+ dforw[node] = -enode;
+ dbakw[node] = -(*maxint);
+ goto L1400;
+L1300:
+/* --------------------------------------
+*/
+/* CASE WHEN NODE IS OUTMATCHED BY ENODE.
+*/
+/* --------------------------------------
+*/
+ if (dbakw[node] == 0) {
+ dbakw[node] = -(*maxint);
+ }
+L1400:
+ ;
+ }
+ goto L2100;
+L1500:
+/* ------------------------------------------------ */
+/* FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING. */
+/* ------------------------------------------------ */
+ enode = qxhead;
+ iq2 = 0;
+L1600:
+ if (enode <= 0) {
+ goto L2300;
+ }
+ if (dbakw[enode] != 0) {
+ goto L2200;
+ }
+ ++(*tag);
+ deg = deg0;
+/* --------------------------------- */
+/* FOR EACH UNMARKED NABOR OF ENODE, */
+/* DO THE FOLLOWING. */
+/* --------------------------------- */
+ istrt = xadj[enode];
+ istop = xadj[enode + 1] - 1;
+ i__1 = istop;
+ for (i = istrt; i <= i__1; ++i) {
+ nabor = adjncy[i];
+ if (nabor == 0) {
+ goto L2100;
+ }
+ if (marker[nabor] >= *tag) {
+ goto L2000;
+ }
+ marker[nabor] = *tag;
+ link = nabor;
+/* ------------------------------ */
+/* IF UNELIMINATED, INCLUDE IT IN */
+/* DEG COUNT. */
+/* ------------------------------ */
+ if (dforw[nabor] < 0) {
+ goto L1700;
+ }
+ deg += qsize[nabor];
+ goto L2000;
+L1700:
+/* -------------------------------
+*/
+/* IF ELIMINATED, INCLUDE UNMARKED
+*/
+/* NODES IN THIS ELEMENT INTO THE
+*/
+/* DEGREE COUNT. */
+/* -------------------------------
+*/
+ jstrt = xadj[link];
+ jstop = xadj[link + 1] - 1;
+ i__2 = jstop;
+ for (j = jstrt; j <= i__2; ++j) {
+ node = adjncy[j];
+ link = -node;
+ if (node < 0) {
+ goto L1700;
+ } else if (node == 0) {
+ goto L2000;
+ } else {
+ goto L1800;
+ }
+
+L1800:
+ if (marker[node] >= *tag) {
+ goto L1900;
+ }
+ marker[node] = *tag;
+ deg += qsize[node];
+L1900:
+ ;
+ }
+L2000:
+ ;
+ }
+L2100:
+/* ------------------------------------------- */
+/* UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE */
+/* STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */
+/* ------------------------------------------- */
+ deg = deg - qsize[enode] + 1;
+ fnode = dhead[deg];
+ dforw[enode] = fnode;
+ dbakw[enode] = -deg;
+ if (fnode > 0) {
+ dbakw[fnode] = enode;
+ }
+ dhead[deg] = enode;
+ if (deg < *mdeg) {
+ *mdeg = deg;
+ }
+L2200:
+/* ---------------------------------- */
+/* GET NEXT ENODE IN CURRENT ELEMENT. */
+/* ---------------------------------- */
+ enode = llist[enode];
+ if (iq2 == 1) {
+ goto L900;
+ }
+ goto L1600;
+L2300:
+/* ----------------------------- */
+/* GET NEXT ELEMENT IN THE LIST. */
+/* ----------------------------- */
+ *tag = mtag;
+ elmnt = llist[elmnt];
+ goto L100;
+
+} /* mmdupd_ */
+
+/* *************************************************************** */
+/* *************************************************************** */
+/* ***** MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING ***** */
+/* *************************************************************** */
+/* *************************************************************** */
+
+/* AUTHOR - JOSEPH W.H. LIU */
+/* DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
+
+/* PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN */
+/* PRODUCING THE PERMUTATION AND INVERSE PERMUTATION */
+/* VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE */
+/* MINIMUM DEGREE ORDERING ALGORITHM. */
+
+/* INPUT PARAMETERS - */
+/* NEQNS - NUMBER OF EQUATIONS. */
+/* QSIZE - SIZE OF SUPERNODES AT ELIMINATION. */
+
+/* UPDATED PARAMETERS - */
+/* INVP - INVERSE PERMUTATION VECTOR. ON INPUT, */
+/* IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */
+/* INTO THE NODE -INVP(NODE); OTHERWISE, */
+/* -INVP(NODE) IS ITS INVERSE LABELLING. */
+
+/* OUTPUT PARAMETERS - */
+/* PERM - THE PERMUTATION VECTOR. */
+
+/* *************************************************************** */
+
+/* Subroutine */ int mmdnum_(int *neqns, shortint *perm, shortint *invp,
+ shortint *qsize)
+{
+ /* System generated locals */
+ int i__1;
+
+ /* Local variables */
+ static int node, root, nextf, father, nqsize, num;
+
+
+/* *************************************************************** */
+
+
+/* *************************************************************** */
+
+ /* Parameter adjustments */
+ --qsize;
+ --invp;
+ --perm;
+
+ /* Function Body */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ nqsize = qsize[node];
+ if (nqsize <= 0) {
+ perm[node] = invp[node];
+ }
+ if (nqsize > 0) {
+ perm[node] = -invp[node];
+ }
+/* L100: */
+ }
+/* ------------------------------------------------------ */
+/* FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING. */
+/* ------------------------------------------------------ */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ if (perm[node] > 0) {
+ goto L500;
+ }
+/* ----------------------------------------- */
+/* TRACE THE MERGED TREE UNTIL ONE WHICH HAS */
+/* NOT BEEN MERGED, CALL IT ROOT. */
+/* ----------------------------------------- */
+ father = node;
+L200:
+ if (perm[father] > 0) {
+ goto L300;
+ }
+ father = -perm[father];
+ goto L200;
+L300:
+/* ----------------------- */
+/* NUMBER NODE AFTER ROOT. */
+/* ----------------------- */
+ root = father;
+ num = perm[root] + 1;
+ invp[node] = -num;
+ perm[root] = num;
+/* ------------------------ */
+/* SHORTEN THE MERGED TREE. */
+/* ------------------------ */
+ father = node;
+L400:
+ nextf = -perm[father];
+ if (nextf <= 0) {
+ goto L500;
+ }
+ perm[father] = -root;
+ father = nextf;
+ goto L400;
+L500:
+ ;
+ }
+/* ---------------------- */
+/* READY TO COMPUTE PERM. */
+/* ---------------------- */
+ i__1 = *neqns;
+ for (node = 1; node <= i__1; ++node) {
+ num = -invp[node];
+ invp[node] = num;
+ perm[num] = node;
+/* L600: */
+ }
+ return 0;
+
+} /* mmdnum_ */
+
diff --git a/intern/opennl/superlu/relax_snode.c b/intern/opennl/superlu/relax_snode.c
new file mode 100644
index 00000000000..549f3fcf873
--- /dev/null
+++ b/intern/opennl/superlu/relax_snode.c
@@ -0,0 +1,71 @@
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+relax_snode (
+ const int n,
+ int *et, /* column elimination tree */
+ const int relax_columns, /* max no of columns allowed in a
+ relaxed snode */
+ int *descendants, /* no of descendants of each node
+ in the etree */
+ int *relax_end /* last column in a supernode */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * relax_snode() - Identify the initial relaxed supernodes, assuming that
+ * the matrix has been reordered according to the postorder of the etree.
+ *
+ */
+ register int j, parent;
+ register int snode_start; /* beginning of a snode */
+
+ ifill (relax_end, n, EMPTY);
+ for (j = 0; j < n; j++) descendants[j] = 0;
+
+ /* Compute the number of descendants of each node in the etree */
+ for (j = 0; j < n; j++) {
+ parent = et[j];
+ if ( parent != n ) /* not the dummy root */
+ descendants[parent] += descendants[j] + 1;
+ }
+
+ /* Identify the relaxed supernodes by postorder traversal of the etree. */
+ for (j = 0; j < n; ) {
+ parent = et[j];
+ snode_start = j;
+ while ( parent != n && descendants[parent] < relax_columns ) {
+ j = parent;
+ parent = et[j];
+ }
+ /* Found a supernode with j being the last column. */
+ relax_end[snode_start] = j; /* Last column is recorded */
+ j++;
+ /* Search for a new leaf */
+ while ( descendants[j] != 0 && j < n ) j++;
+ }
+
+ /*printf("No of relaxed snodes: %d; relaxed columns: %d\n",
+ nsuper, no_relaxed_col); */
+}
diff --git a/intern/opennl/superlu/scolumn_bmod.c b/intern/opennl/superlu/scolumn_bmod.c
new file mode 100644
index 00000000000..c877a27dd53
--- /dev/null
+++ b/intern/opennl/superlu/scolumn_bmod.c
@@ -0,0 +1,353 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+
+/* Return value: 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ */
+int
+scolumn_bmod (
+ const int jcol, /* in */
+ const int nseg, /* in */
+ float *dense, /* in */
+ float *tempv, /* working array */
+ int *segrep, /* in */
+ int *repfnz, /* in */
+ int fpanelc, /* in -- first column in the current panel */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose:
+ * ========
+ * Performs numeric block updates (sup-col) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+
+#ifdef USE_VENDOR_BLAS
+ int incx = 1, incy = 1;
+ float alpha, beta;
+#endif
+
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = no of columns in supernode
+ * nsupr = no of rows in supernode (used as leading dimension)
+ * luptr = location of supernodal LU-block in storage
+ * kfnz = first nonz in the k-th supernodal segment
+ * no_zeros = no of leading zeros in a supernodal U-segment
+ */
+ float ukj, ukj1, ukj2;
+ int luptr, luptr1, luptr2;
+ int fsupc, nsupc, nsupr, segsze;
+ int nrow; /* No of rows in the matrix of matrix-vector */
+ int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
+ register int lptr, kfnz, isub, irow, i;
+ register int no_zeros, new_next;
+ int ufirst, nextlu;
+ int fst_col; /* First column within small LU update */
+ int d_fsupc; /* Distance between the first column of the current
+ panel and the first column of the current snode. */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ int nzlumax;
+ float *tempv1;
+ float zero = 0.0;
+#ifdef USE_VENDOR_BLAS
+ float one = 1.0;
+ float none = -1.0;
+#endif
+ int mem_error;
+ flops_t *ops = stat->ops;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ nzlumax = Glu->nzlumax;
+ jcolp1 = jcol + 1;
+ jsupno = supno[jcol];
+
+ /*
+ * For each nonz supernode segment of U[*,j] in topological order
+ */
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) {
+
+ krep = segrep[k];
+ k--;
+ ksupno = supno[krep];
+ if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
+
+ fsupc = xsup[ksupno];
+ fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+ /* Distance from the current supernode to the current panel;
+ d_fsupc=0 if fsupc > fpanelc. */
+ d_fsupc = fst_col - fsupc;
+
+ luptr = xlusup[fst_col] + d_fsupc;
+ lptr = xlsub[fsupc] + d_fsupc;
+
+ kfnz = repfnz[krep];
+ kfnz = SUPERLU_MAX ( kfnz, fpanelc );
+
+ segsze = krep - kfnz + 1;
+ nsupc = krep - fst_col + 1;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
+ nrow = nsupr - d_fsupc - nsupc;
+ krep_ind = lptr + nsupc - 1;
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+
+ /*
+ * Case 1: Update U-segment of size 1 -- col-col update
+ */
+ if ( segsze == 1 ) {
+ ukj = dense[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ dense[irow] -= ukj*lusup[luptr];
+ luptr++;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ ukj1 = dense[lsub[krep_ind - 1]];
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) { /* Case 2: 2cols-col update */
+ ukj -= ukj1 * lusup[luptr1];
+ dense[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++;
+ luptr1++;
+ dense[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] );
+ }
+ } else { /* Case 3: 3cols-col update */
+ ukj2 = dense[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense[lsub[krep_ind]] = ukj;
+ dense[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++;
+ luptr1++;
+ luptr2++;
+ dense[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+
+
+ } else {
+ /*
+ * Case: sup-col update
+ * Perform a triangular solve and block update,
+ * then scatter the result of sup-col update to dense
+ */
+
+ no_zeros = kfnz - fst_col;
+
+ /* Copy U[*,j] segment from dense[*] to tempv[*] */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ tempv[i] = dense[irow];
+ ++isub;
+ }
+
+ /* Dense triangular solve -- start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#endif
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+ sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
+#endif
+
+
+ /* Scatter tempv[] into SPA dense[] as a temporary storage */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense[irow] = tempv[i];
+ tempv[i] = zero;
+ ++isub;
+ }
+
+ /* Scatter tempv1[] into SPA dense[] */
+ for (i = 0; i < nrow; i++) {
+ irow = lsub[isub];
+ dense[irow] -= tempv1[i];
+ tempv1[i] = zero;
+ ++isub;
+ }
+ }
+
+ } /* if jsupno ... */
+
+ } /* for each segment... */
+
+ /*
+ * Process the supernodal portion of L\U[*,j]
+ */
+ nextlu = xlusup[jcol];
+ fsupc = xsup[jsupno];
+
+ /* Copy the SPA dense into L\U[*,j] */
+ new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
+ while ( new_next > nzlumax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)))
+ return (mem_error);
+ lusup = Glu->lusup;
+ lsub = Glu->lsub;
+ }
+
+ for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+ irow = lsub[isub];
+ lusup[nextlu] = dense[irow];
+ dense[irow] = zero;
+ ++nextlu;
+ }
+
+ xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */
+
+ /* For more updates within the panel (also within the current supernode),
+ * should start from the first column of the panel, or the first column
+ * of the supernode, whichever is bigger. There are 2 cases:
+ * 1) fsupc < fpanelc, then fst_col := fpanelc
+ * 2) fsupc >= fpanelc, then fst_col := fsupc
+ */
+ fst_col = SUPERLU_MAX ( fsupc, fpanelc );
+
+ if ( fst_col < jcol ) {
+
+ /* Distance between the current supernode and the current panel.
+ d_fsupc=0 if fsupc >= fpanelc. */
+ d_fsupc = fst_col - fsupc;
+
+ lptr = xlsub[fsupc] + d_fsupc;
+ luptr = xlusup[fst_col] + d_fsupc;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */
+ nsupc = jcol - fst_col; /* Excluding jcol */
+ nrow = nsupr - d_fsupc - nsupc;
+
+ /* Points to the beginning of jcol in snode L\U(jsupno) */
+ ufirst = xlusup[jcol] + d_fsupc;
+
+ ops[TRSV] += nsupc * (nsupc - 1);
+ ops[GEMV] += 2 * nrow * nsupc;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr],
+ &nsupr, &lusup[ufirst], &incx );
+#else
+ strsv_( "L", "N", "U", &nsupc, &lusup[luptr],
+ &nsupr, &lusup[ufirst], &incx );
+#endif
+
+ alpha = none; beta = one; /* y := beta*y + alpha*A*x */
+
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+ sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+ slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+
+ smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
+ &lusup[ufirst], tempv );
+
+ /* Copy updates from tempv[*] into lusup[*] */
+ isub = ufirst + nsupc;
+ for (i = 0; i < nrow; i++) {
+ lusup[isub] -= tempv[i];
+ tempv[i] = 0.0;
+ ++isub;
+ }
+
+#endif
+
+
+ } /* if fst_col < jcol ... */
+
+ return 0;
+}
diff --git a/intern/opennl/superlu/scolumn_dfs.c b/intern/opennl/superlu/scolumn_dfs.c
new file mode 100644
index 00000000000..ecfb5c3b839
--- /dev/null
+++ b/intern/opennl/superlu/scolumn_dfs.c
@@ -0,0 +1,270 @@
+
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+/* What type of supernodes we want */
+#define T2_SUPER
+
+int
+scolumn_dfs(
+ const int m, /* in - number of rows in the matrix */
+ const int jcol, /* in */
+ int *perm_r, /* in */
+ int *nseg, /* modified - with new segments appended */
+ int *lsub_col, /* in - defines the RHS vector to start the dfs */
+ int *segrep, /* modified - with new segments appended */
+ int *repfnz, /* modified */
+ int *xprune, /* modified */
+ int *marker, /* modified */
+ int *parent, /* working array */
+ int *xplore, /* working array */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * "column_dfs" performs a symbolic factorization on column jcol, and
+ * decide the supernode boundary.
+ *
+ * This routine does not use numeric values, but only use the RHS
+ * row indices to start the dfs.
+ *
+ * A supernode representative is the last column of a supernode.
+ * The nonzeros in U[*,j] are segments that end at supernodal
+ * representatives. The routine returns a list of such supernodal
+ * representatives in topological order of the dfs that generates them.
+ * The location of the first nonzero in each such supernodal segment
+ * (supernodal entry location) is also returned.
+ *
+ * Local parameters
+ * ================
+ * nseg: no of segments in current U[*,j]
+ * jsuper: jsuper=EMPTY if column j does not belong to the same
+ * supernode as j-1. Otherwise, jsuper=nsuper.
+ *
+ * marker2: A-row --> A-row/col (0/1)
+ * repfnz: SuperA-col --> PA-row
+ * parent: SuperA-col --> SuperA-col
+ * xplore: SuperA-col --> index to L-structure
+ *
+ * Return value
+ * ============
+ * 0 success;
+ * > 0 number of bytes allocated when run out of space.
+ *
+ */
+ int jcolp1, jcolm1, jsuper, nsuper, nextl;
+ int k, krep, krow, kmark, kperm;
+ int *marker2; /* Used for small panel LU */
+ int fsupc; /* First column of a snode */
+ int myfnz; /* First nonz column of a U-segment */
+ int chperm, chmark, chrep, kchild;
+ int xdfs, maxdfs, kpar, oldrep;
+ int jptr, jm1ptr;
+ int ito, ifrom, istop; /* Used to compress row subscripts */
+ int mem_error;
+ int *xsup, *supno, *lsub, *xlsub;
+ int nzlmax;
+ static int first = 1, maxsuper;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ nzlmax = Glu->nzlmax;
+
+ if ( first ) {
+ maxsuper = sp_ienv(3);
+ first = 0;
+ }
+ jcolp1 = jcol + 1;
+ jcolm1 = jcol - 1;
+ nsuper = supno[jcol];
+ jsuper = nsuper;
+ nextl = xlsub[jcol];
+ marker2 = &marker[2*m];
+
+
+ /* For each nonzero in A[*,jcol] do dfs */
+ for (k = 0; lsub_col[k] != EMPTY; k++) {
+
+ krow = lsub_col[k];
+ lsub_col[k] = EMPTY;
+ kmark = marker2[krow];
+
+ /* krow was visited before, go to the next nonz */
+ if ( kmark == jcol ) continue;
+
+ /* For each unmarked nbr krow of jcol
+ * krow is in L: place it in structure of L[*,jcol]
+ */
+ marker2[krow] = jcol;
+ kperm = perm_r[krow];
+
+ if ( kperm == EMPTY ) {
+ lsub[nextl++] = krow; /* krow is indexed into A */
+ if ( nextl >= nzlmax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)))
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */
+ } else {
+ /* krow is in U: if its supernode-rep krep
+ * has been explored, update repfnz[*]
+ */
+ krep = xsup[supno[kperm]+1] - 1;
+ myfnz = repfnz[krep];
+
+ if ( myfnz != EMPTY ) { /* Visited before */
+ if ( myfnz > kperm ) repfnz[krep] = kperm;
+ /* continue; */
+ }
+ else {
+ /* Otherwise, perform dfs starting at krep */
+ oldrep = EMPTY;
+ parent[krep] = oldrep;
+ repfnz[krep] = kperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+
+ do {
+ /*
+ * For each unmarked kchild of krep
+ */
+ while ( xdfs < maxdfs ) {
+
+ kchild = lsub[xdfs];
+ xdfs++;
+ chmark = marker2[kchild];
+
+ if ( chmark != jcol ) { /* Not reached yet */
+ marker2[kchild] = jcol;
+ chperm = perm_r[kchild];
+
+ /* Case kchild is in L: place it in L[*,k] */
+ if ( chperm == EMPTY ) {
+ lsub[nextl++] = kchild;
+ if ( nextl >= nzlmax ) {
+ if ((mem_error =
+ sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu)))
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ if ( chmark != jcolm1 ) jsuper = EMPTY;
+ } else {
+ /* Case kchild is in U:
+ * chrep = its supernode-rep. If its rep has
+ * been explored, update its repfnz[*]
+ */
+ chrep = xsup[supno[chperm]+1] - 1;
+ myfnz = repfnz[chrep];
+ if ( myfnz != EMPTY ) { /* Visited before */
+ if ( myfnz > chperm )
+ repfnz[chrep] = chperm;
+ } else {
+ /* Continue dfs at super-rep of kchild */
+ xplore[krep] = xdfs;
+ oldrep = krep;
+ krep = chrep; /* Go deeper down G(L^t) */
+ parent[krep] = oldrep;
+ repfnz[krep] = chperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+ } /* else */
+
+ } /* else */
+
+ } /* if */
+
+ } /* while */
+
+ /* krow has no more unexplored nbrs;
+ * place supernode-rep krep in postorder DFS.
+ * backtrack dfs to its parent
+ */
+ segrep[*nseg] = krep;
+ ++(*nseg);
+ kpar = parent[krep]; /* Pop from stack, mimic recursion */
+ if ( kpar == EMPTY ) break; /* dfs done */
+ krep = kpar;
+ xdfs = xplore[krep];
+ maxdfs = xprune[krep];
+
+ } while ( kpar != EMPTY ); /* Until empty stack */
+
+ } /* else */
+
+ } /* else */
+
+ } /* for each nonzero ... */
+
+ /* Check to see if j belongs in the same supernode as j-1 */
+ if ( jcol == 0 ) { /* Do nothing for column 0 */
+ nsuper = supno[0] = 0;
+ } else {
+ fsupc = xsup[nsuper];
+ jptr = xlsub[jcol]; /* Not compressed yet */
+ jm1ptr = xlsub[jcolm1];
+
+#ifdef T2_SUPER
+ if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
+#endif
+ /* Make sure the number of columns in a supernode doesn't
+ exceed threshold. */
+ if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
+
+ /* If jcol starts a new supernode, reclaim storage space in
+ * lsub from the previous supernode. Note we only store
+ * the subscript set of the first and last columns of
+ * a supernode. (first for num values, last for pruning)
+ */
+ if ( jsuper == EMPTY ) { /* starts a new supernode */
+ if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
+#ifdef CHK_COMPRESS
+ printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
+#endif
+ ito = xlsub[fsupc+1];
+ xlsub[jcolm1] = ito;
+ istop = ito + jptr - jm1ptr;
+ xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
+ xlsub[jcol] = istop;
+ for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
+ lsub[ito] = lsub[ifrom];
+ nextl = ito; /* = istop + length(jcol) */
+ }
+ nsuper++;
+ supno[jcol] = nsuper;
+ } /* if a new supernode */
+
+ } /* else: jcol > 0 */
+
+ /* Tidy up the pointers before exit */
+ xsup[nsuper+1] = jcolp1;
+ supno[jcolp1] = nsuper;
+ xprune[jcol] = nextl; /* Initialize upper bound for pruning */
+ xlsub[jcolp1] = nextl;
+
+ return 0;
+}
diff --git a/intern/opennl/superlu/scopy_to_ucol.c b/intern/opennl/superlu/scopy_to_ucol.c
new file mode 100644
index 00000000000..fd97352923f
--- /dev/null
+++ b/intern/opennl/superlu/scopy_to_ucol.c
@@ -0,0 +1,105 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+scopy_to_ucol(
+ int jcol, /* in */
+ int nseg, /* in */
+ int *segrep, /* in */
+ int *repfnz, /* in */
+ int *perm_r, /* in */
+ float *dense, /* modified - reset to zero on return */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/*
+ * Gather from SPA dense[*] to global ucol[*].
+ */
+ int ksub, krep, ksupno;
+ int i, k, kfnz, segsze;
+ int fsupc, isub, irow;
+ int jsupno, nextu;
+ int new_next, mem_error;
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *ucol;
+ int *usub, *xusub;
+ int nzumax;
+
+ float zero = 0.0;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ ucol = Glu->ucol;
+ usub = Glu->usub;
+ xusub = Glu->xusub;
+ nzumax = Glu->nzumax;
+
+ jsupno = supno[jcol];
+ nextu = xusub[jcol];
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) {
+ krep = segrep[k--];
+ ksupno = supno[krep];
+
+ if ( ksupno != jsupno ) { /* Should go into ucol[] */
+ kfnz = repfnz[krep];
+ if ( kfnz != EMPTY ) { /* Nonzero U-segment */
+
+ fsupc = xsup[ksupno];
+ isub = xlsub[fsupc] + kfnz - fsupc;
+ segsze = krep - kfnz + 1;
+
+ new_next = nextu + segsze;
+ while ( new_next > nzumax ) {
+ if ((mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)))
+ return (mem_error);
+ ucol = Glu->ucol;
+ if ((mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)))
+ return (mem_error);
+ usub = Glu->usub;
+ lsub = Glu->lsub;
+ }
+
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ usub[nextu] = perm_r[irow];
+ ucol[nextu] = dense[irow];
+ dense[irow] = zero;
+ nextu++;
+ isub++;
+ }
+
+ }
+
+ }
+
+ } /* for each segment... */
+
+ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */
+ return 0;
+}
diff --git a/intern/opennl/superlu/sgssv.c b/intern/opennl/superlu/sgssv.c
new file mode 100644
index 00000000000..ede3dc83907
--- /dev/null
+++ b/intern/opennl/superlu/sgssv.c
@@ -0,0 +1,221 @@
+
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#include "ssp_defs.h"
+
+void
+sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
+ SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
+ SuperLUStat_t *stat, int *info )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSSV solves the system of linear equations A*X=B, using the
+ * LU factorization from SGSTRF. It performs the following steps:
+ *
+ * 1. If A is stored column-wise (A->Stype = SLU_NC):
+ *
+ * 1.1. Permute the columns of A, forming A*Pc, where Pc
+ * is a permutation matrix. For more details of this step,
+ * see sp_preorder.c.
+ *
+ * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
+ * by Gaussian elimination with partial pivoting.
+ * L is unit lower triangular with offdiagonal entries
+ * bounded by 1 in magnitude, and U is upper triangular.
+ *
+ * 1.3. Solve the system of equations A*X=B using the factored
+ * form of A.
+ *
+ * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the
+ * above algorithm to the transpose of A:
+ *
+ * 2.1. Permute columns of transpose(A) (rows of A),
+ * forming transpose(A)*Pc, where Pc is a permutation matrix.
+ * For more details of this step, see sp_preorder.c.
+ *
+ * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
+ * determined by Gaussian elimination with partial pivoting.
+ * L is unit lower triangular with offdiagonal entries
+ * bounded by 1 in magnitude, and U is upper triangular.
+ *
+ * 2.3. Solve the system of equations A*X=B using the factored
+ * form of A.
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ * The structure defines the input parameters to control
+ * how the LU decomposition will be performed and how the
+ * system will be solved.
+ *
+ * A (input) SuperMatrix*
+ * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ * of linear equations is A->nrow. Currently, the type of A can be:
+ * Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE.
+ * In the future, more general A may be handled.
+ *
+ * perm_c (input/output) int*
+ * If A->Stype = SLU_NC, column permutation vector of size A->ncol
+ * which defines the permutation matrix Pc; perm_c[i] = j means
+ * column i of A is in position j in A*Pc.
+ * If A->Stype = SLU_NR, column permutation vector of size A->nrow
+ * which describes permutation of columns of transpose(A)
+ * (rows of A) as described above.
+ *
+ * If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
+ * options->Fact = SamePattern_SameRowPerm, it is an input argument.
+ * On exit, perm_c may be overwritten by the product of the input
+ * perm_c and a permutation that postorders the elimination tree
+ * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
+ * is already in postorder.
+ * Otherwise, it is an output argument.
+ *
+ * perm_r (input/output) int*
+ * If A->Stype = SLU_NC, row permutation vector of size A->nrow,
+ * which defines the permutation matrix Pr, and is determined
+ * by partial pivoting. perm_r[i] = j means row i of A is in
+ * position j in Pr*A.
+ * If A->Stype = SLU_NR, permutation vector of size A->ncol, which
+ * determines permutation of rows of transpose(A)
+ * (columns of A) as described above.
+ *
+ * If options->RowPerm = MY_PERMR or
+ * options->Fact = SamePattern_SameRowPerm, perm_r is an
+ * input argument.
+ * otherwise it is an output argument.
+ *
+ * L (output) SuperMatrix*
+ * The factor L from the factorization
+ * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
+ * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
+ * Uses compressed row subscripts storage for supernodes, i.e.,
+ * L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U (output) SuperMatrix*
+ * The factor U from the factorization
+ * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or
+ * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
+ * Uses column-wise storage scheme, i.e., U has types:
+ * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * B (input/output) SuperMatrix*
+ * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ * On entry, the right hand side matrix.
+ * On exit, the solution matrix if info = 0;
+ *
+ * stat (output) SuperLUStat_t*
+ * Record the statistics on runtime and floating-point operation count.
+ * See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info (output) int*
+ * = 0: successful exit
+ * > 0: if info = i, and i is
+ * <= A->ncol: U(i,i) is exactly zero. The factorization has
+ * been completed, but the factor U is exactly singular,
+ * so the solution could not be computed.
+ * > A->ncol: number of bytes allocated when memory allocation
+ * failure occurred, plus A->ncol.
+ *
+ */
+ DNformat *Bstore;
+ SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/
+ SuperMatrix AC; /* Matrix postmultiplied by Pc */
+ int lwork = 0, *etree, i;
+
+ /* Set default values for some parameters */
+ int panel_size; /* panel size */
+ int relax; /* no of columns in a relaxed snodes */
+ int permc_spec;
+ trans_t trans = NOTRANS;
+ double *utime;
+ double t; /* Temporary time */
+
+ /* Test the input parameters ... */
+ *info = 0;
+ Bstore = B->Store;
+ if ( options->Fact != DOFACT ) *info = -1;
+ else if ( A->nrow != A->ncol || A->nrow < 0 ||
+ (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
+ A->Dtype != SLU_S || A->Mtype != SLU_GE )
+ *info = -2;
+ else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
+ B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
+ *info = -7;
+ if ( *info != 0 ) {
+ i = -(*info);
+ xerbla_("sgssv", &i);
+ return;
+ }
+
+ utime = stat->utime;
+
+ /* Convert A to SLU_NC format when necessary. */
+ if ( A->Stype == SLU_NR ) {
+ NRformat *Astore = A->Store;
+ AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
+ sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
+ Astore->nzval, Astore->colind, Astore->rowptr,
+ SLU_NC, A->Dtype, A->Mtype);
+ trans = TRANS;
+ } else {
+ if ( A->Stype == SLU_NC ) AA = A;
+ }
+
+ t = SuperLU_timer_();
+ /*
+ * Get column permutation vector perm_c[], according to permc_spec:
+ * permc_spec = NATURAL: natural ordering
+ * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
+ * permc_spec = MMD_ATA: minimum degree on structure of A'*A
+ * permc_spec = COLAMD: approximate minimum degree column ordering
+ * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
+ */
+ permc_spec = options->ColPerm;
+ if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
+ get_perm_c(permc_spec, AA, perm_c);
+ utime[COLPERM] = SuperLU_timer_() - t;
+
+ etree = intMalloc(A->ncol);
+
+ t = SuperLU_timer_();
+ sp_preorder(options, AA, perm_c, etree, &AC);
+ utime[ETREE] = SuperLU_timer_() - t;
+
+ panel_size = sp_ienv(1);
+ relax = sp_ienv(2);
+
+ /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
+ relax, panel_size, sp_ienv(3), sp_ienv(4));*/
+ t = SuperLU_timer_();
+ /* Compute the LU factorization of A. */
+ sgstrf(options, &AC, relax, panel_size,
+ etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
+ utime[FACT] = SuperLU_timer_() - t;
+
+ t = SuperLU_timer_();
+ if ( *info == 0 ) {
+ /* Solve the system A*X=B, overwriting B with X. */
+ sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
+ }
+ utime[SOLVE] = SuperLU_timer_() - t;
+
+ SUPERLU_FREE (etree);
+ Destroy_CompCol_Permuted(&AC);
+ if ( A->Stype == SLU_NR ) {
+ Destroy_SuperMatrix_Store(AA);
+ SUPERLU_FREE(AA);
+ }
+
+}
diff --git a/intern/opennl/superlu/sgstrf.c b/intern/opennl/superlu/sgstrf.c
new file mode 100644
index 00000000000..42f8dc9d0ee
--- /dev/null
+++ b/intern/opennl/superlu/sgstrf.c
@@ -0,0 +1,433 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void
+sgstrf (superlu_options_t *options, SuperMatrix *A,
+ int relax, int panel_size, int *etree, void *work, int lwork,
+ int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
+ SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSTRF computes an LU factorization of a general sparse m-by-n
+ * matrix A using partial pivoting with row interchanges.
+ * The factorization has the form
+ * Pr * A = L * U
+ * where Pr is a row permutation matrix, L is lower triangular with unit
+ * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper
+ * triangular (upper trapezoidal if A->nrow < A->ncol).
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ * The structure defines the input parameters to control
+ * how the LU decomposition will be performed.
+ *
+ * A (input) SuperMatrix*
+ * Original matrix A, permuted by columns, of dimension
+ * (A->nrow, A->ncol). The type of A can be:
+ * Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
+ *
+ * drop_tol (input) float (NOT IMPLEMENTED)
+ * Drop tolerance parameter. At step j of the Gaussian elimination,
+ * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
+ * 0 <= drop_tol <= 1. The default value of drop_tol is 0.
+ *
+ * relax (input) int
+ * To control degree of relaxing supernodes. If the number
+ * of nodes (columns) in a subtree of the elimination tree is less
+ * than relax, this subtree is considered as one supernode,
+ * regardless of the row structures of those columns.
+ *
+ * panel_size (input) int
+ * A panel consists of at most panel_size consecutive columns.
+ *
+ * etree (input) int*, dimension (A->ncol)
+ * Elimination tree of A'*A.
+ * Note: etree is a vector of parent pointers for a forest whose
+ * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ * On input, the columns of A should be permuted so that the
+ * etree is in a certain postorder.
+ *
+ * work (input/output) void*, size (lwork) (in bytes)
+ * User-supplied work space and space for the output data structures.
+ * Not referenced if lwork = 0;
+ *
+ * lwork (input) int
+ * Specifies the size of work array in bytes.
+ * = 0: allocate space internally by system malloc;
+ * > 0: use user-supplied work array of length lwork in bytes,
+ * returns error if space runs out.
+ * = -1: the routine guesses the amount of space needed without
+ * performing the factorization, and returns it in
+ * *info; no other side effects.
+ *
+ * perm_c (input) int*, dimension (A->ncol)
+ * Column permutation vector, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ * When searching for diagonal, perm_c[*] is applied to the
+ * row subscripts of A, so that diagonal threshold pivoting
+ * can find the diagonal of A, rather than that of A*Pc.
+ *
+ * perm_r (input/output) int*, dimension (A->nrow)
+ * Row permutation vector which defines the permutation matrix Pr,
+ * perm_r[i] = j means row i of A is in position j in Pr*A.
+ * If options->Fact = SamePattern_SameRowPerm, the pivoting routine
+ * will try to use the input perm_r, unless a certain threshold
+ * criterion is violated. In that case, perm_r is overwritten by
+ * a new permutation determined by partial pivoting or diagonal
+ * threshold pivoting.
+ * Otherwise, perm_r is output argument;
+ *
+ * L (output) SuperMatrix*
+ * The factor L from the factorization Pr*A=L*U; use compressed row
+ * subscripts storage for supernodes, i.e., L has type:
+ * Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U (output) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
+ * storage scheme, i.e., U has types: Stype = SLU_NC,
+ * Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * stat (output) SuperLUStat_t*
+ * Record the statistics on runtime and floating-point operation count.
+ * See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info (output) int*
+ * = 0: successful exit
+ * < 0: if info = -i, the i-th argument had an illegal value
+ * > 0: if info = i, and i is
+ * <= A->ncol: U(i,i) is exactly zero. The factorization has
+ * been completed, but the factor U is exactly singular,
+ * and division by zero will occur if it is used to solve a
+ * system of equations.
+ * > A->ncol: number of bytes allocated when memory allocation
+ * failure occurred, plus A->ncol. If lwork = -1, it is
+ * the estimated amount of space needed, plus A->ncol.
+ *
+ * ======================================================================
+ *
+ * Local Working Arrays:
+ * ======================
+ * m = number of rows in the matrix
+ * n = number of columns in the matrix
+ *
+ * xprune[0:n-1]: xprune[*] points to locations in subscript
+ * vector lsub[*]. For column i, xprune[i] denotes the point where
+ * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need
+ * to be traversed for symbolic factorization.
+ *
+ * marker[0:3*m-1]: marker[i] = j means that node i has been
+ * reached when working on column j.
+ * Storage: relative to original row subscripts
+ * NOTE: There are 3 of them: marker/marker1 are used for panel dfs,
+ * see spanel_dfs.c; marker2 is used for inner-factorization,
+ * see scolumn_dfs.c.
+ *
+ * parent[0:m-1]: parent vector used during dfs
+ * Storage: relative to new row subscripts
+ *
+ * xplore[0:m-1]: xplore[i] gives the location of the next (dfs)
+ * unexplored neighbor of i in lsub[*]
+ *
+ * segrep[0:nseg-1]: contains the list of supernodal representatives
+ * in topological order of the dfs. A supernode representative is the
+ * last column of a supernode.
+ * The maximum size of segrep[] is n.
+ *
+ * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a
+ * supernodal representative r, repfnz[r] is the location of the first
+ * nonzero in this segment. It is also used during the dfs: repfnz[r]>0
+ * indicates the supernode r has been explored.
+ * NOTE: There are W of them, each used for one column of a panel.
+ *
+ * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below
+ * the panel diagonal. These are filled in during spanel_dfs(), and are
+ * used later in the inner LU factorization within the panel.
+ * panel_lsub[]/dense[] pair forms the SPA data structure.
+ * NOTE: There are W of them.
+ *
+ * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
+ * NOTE: there are W of them.
+ *
+ * tempv[0:*]: real temporary used for dense numeric kernels;
+ * The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
+ *
+ */
+ /* Local working arrays */
+ NCPformat *Astore;
+ int *iperm_r = NULL; /* inverse of perm_r;
+ used when options->Fact == SamePattern_SameRowPerm */
+ int *iperm_c; /* inverse of perm_c */
+ int *iwork;
+ float *swork;
+ int *segrep, *repfnz, *parent, *xplore;
+ int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */
+ int *xprune;
+ int *marker;
+ float *dense, *tempv;
+ int *relax_end;
+ float *a;
+ int *asub;
+ int *xa_begin, *xa_end;
+ int *xsup, *supno;
+ int *xlsub, *xlusup, *xusub;
+ int nzlumax;
+ static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
+
+ /* Local scalars */
+ fact_t fact = options->Fact;
+ double diag_pivot_thresh = options->DiagPivotThresh;
+ int pivrow; /* pivotal row number in the original matrix A */
+ int nseg1; /* no of segments in U-column above panel row jcol */
+ int nseg; /* no of segments in each U-column */
+ register int jcol;
+ register int kcol; /* end column of a relaxed snode */
+ register int icol;
+ register int i, k, jj, new_next, iinfo;
+ int m, n, min_mn, jsupno, fsupc, nextlu, nextu;
+ int w_def; /* upper bound on panel width */
+ int usepr, iperm_r_allocated = 0;
+ int nnzL, nnzU;
+ int *panel_histo = stat->panel_histo;
+ flops_t *ops = stat->ops;
+
+ iinfo = 0;
+ m = A->nrow;
+ n = A->ncol;
+ min_mn = SUPERLU_MIN(m, n);
+ Astore = A->Store;
+ a = Astore->nzval;
+ asub = Astore->rowind;
+ xa_begin = Astore->colbeg;
+ xa_end = Astore->colend;
+
+ /* Allocate storage common to the factor routines */
+ *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
+ panel_size, L, U, &Glu, &iwork, &swork);
+ if ( *info ) return;
+
+ xsup = Glu.xsup;
+ supno = Glu.supno;
+ xlsub = Glu.xlsub;
+ xlusup = Glu.xlusup;
+ xusub = Glu.xusub;
+
+ SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
+ &repfnz, &panel_lsub, &xprune, &marker);
+ sSetRWork(m, panel_size, swork, &dense, &tempv);
+
+ usepr = (fact == SamePattern_SameRowPerm);
+ if ( usepr ) {
+ /* Compute the inverse of perm_r */
+ iperm_r = (int *) intMalloc(m);
+ for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
+ iperm_r_allocated = 1;
+ }
+ iperm_c = (int *) intMalloc(n);
+ for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
+
+ /* Identify relaxed snodes */
+ relax_end = (int *) intMalloc(n);
+ if ( options->SymmetricMode == YES ) {
+ heap_relax_snode(n, etree, relax, marker, relax_end);
+ } else {
+ relax_snode(n, etree, relax, marker, relax_end);
+ }
+
+ ifill (perm_r, m, EMPTY);
+ ifill (marker, m * NO_MARKER, EMPTY);
+ supno[0] = -1;
+ xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0;
+ w_def = panel_size;
+
+ /*
+ * Work on one "panel" at a time. A panel is one of the following:
+ * (a) a relaxed supernode at the bottom of the etree, or
+ * (b) panel_size contiguous columns, defined by the user
+ */
+ for (jcol = 0; jcol < min_mn; ) {
+
+ if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
+ kcol = relax_end[jcol]; /* end of the relaxed snode */
+ panel_histo[kcol-jcol+1]++;
+
+ /* --------------------------------------
+ * Factorize the relaxed supernode(jcol:kcol)
+ * -------------------------------------- */
+ /* Determine the union of the row structure of the snode */
+ if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
+ xprune, marker, &Glu)) != 0 )
+ return;
+
+ nextu = xusub[jcol];
+ nextlu = xlusup[jcol];
+ jsupno = supno[jcol];
+ fsupc = xsup[jsupno];
+ new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
+ nzlumax = Glu.nzlumax;
+ while ( new_next > nzlumax ) {
+ if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) )
+ return;
+ }
+
+ for (icol = jcol; icol<= kcol; icol++) {
+ xusub[icol+1] = nextu;
+
+ /* Scatter into SPA dense[*] */
+ for (k = xa_begin[icol]; k < xa_end[icol]; k++)
+ dense[asub[k]] = a[k];
+
+ /* Numeric update within the snode */
+ ssnode_bmod(icol, fsupc, dense, tempv, &Glu, stat);
+
+ if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
+ iperm_r, iperm_c, &pivrow, &Glu, stat)) )
+ if ( iinfo == 0 ) iinfo = *info;
+
+#ifdef DEBUG
+ sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
+#endif
+
+ }
+
+ jcol = icol;
+
+ } else { /* Work on one panel of panel_size columns */
+
+ /* Adjust panel_size so that a panel won't overlap with the next
+ * relaxed snode.
+ */
+ panel_size = w_def;
+ for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++)
+ if ( relax_end[k] != EMPTY ) {
+ panel_size = k - jcol;
+ break;
+ }
+ if ( k == min_mn ) panel_size = min_mn - jcol;
+ panel_histo[panel_size]++;
+
+ /* symbolic factor on a panel of columns */
+ spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
+ dense, panel_lsub, segrep, repfnz, xprune,
+ marker, parent, xplore, &Glu);
+
+ /* numeric sup-panel updates in topological order */
+ spanel_bmod(m, panel_size, jcol, nseg1, dense,
+ tempv, segrep, repfnz, &Glu, stat);
+
+ /* Sparse LU within the panel, and below panel diagonal */
+ for ( jj = jcol; jj < jcol + panel_size; jj++) {
+ k = (jj - jcol) * m; /* column index for w-wide arrays */
+
+ nseg = nseg1; /* Begin after all the panel segments */
+
+ if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
+ segrep, &repfnz[k], xprune, marker,
+ parent, xplore, &Glu)) != 0) return;
+
+ /* Numeric updates */
+ if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
+ tempv, &segrep[nseg1], &repfnz[k],
+ jcol, &Glu, stat)) != 0) return;
+
+ /* Copy the U-segments to ucol[*] */
+ if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
+ perm_r, &dense[k], &Glu)) != 0)
+ return;
+
+ if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
+ iperm_r, iperm_c, &pivrow, &Glu, stat)) )
+ if ( iinfo == 0 ) iinfo = *info;
+
+ /* Prune columns (0:jj-1) using column jj */
+ spruneL(jj, perm_r, pivrow, nseg, segrep,
+ &repfnz[k], xprune, &Glu);
+
+ /* Reset repfnz[] for this column */
+ resetrep_col (nseg, segrep, &repfnz[k]);
+
+#ifdef DEBUG
+ sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
+#endif
+
+ }
+
+ jcol += panel_size; /* Move to the next panel */
+
+ } /* else */
+
+ } /* for */
+
+ *info = iinfo;
+
+ if ( m > n ) {
+ k = 0;
+ for (i = 0; i < m; ++i)
+ if ( perm_r[i] == EMPTY ) {
+ perm_r[i] = n + k;
+ ++k;
+ }
+ }
+
+ countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
+ fixupL(min_mn, perm_r, &Glu);
+
+ sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
+
+ if ( fact == SamePattern_SameRowPerm ) {
+ /* L and U structures may have changed due to possibly different
+ pivoting, even though the storage is available.
+ There could also be memory expansions, so the array locations
+ may have changed, */
+ ((SCformat *)L->Store)->nnz = nnzL;
+ ((SCformat *)L->Store)->nsuper = Glu.supno[n];
+ ((SCformat *)L->Store)->nzval = Glu.lusup;
+ ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
+ ((SCformat *)L->Store)->rowind = Glu.lsub;
+ ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
+ ((NCformat *)U->Store)->nnz = nnzU;
+ ((NCformat *)U->Store)->nzval = Glu.ucol;
+ ((NCformat *)U->Store)->rowind = Glu.usub;
+ ((NCformat *)U->Store)->colptr = Glu.xusub;
+ } else {
+ sCreate_SuperNode_Matrix(L, A->nrow, A->ncol, nnzL, Glu.lusup,
+ Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
+ Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
+ sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol,
+ Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
+ }
+
+ ops[FACT] += ops[TRSV] + ops[GEMV];
+
+ if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
+ SUPERLU_FREE (iperm_c);
+ SUPERLU_FREE (relax_end);
+
+}
diff --git a/intern/opennl/superlu/sgstrs.c b/intern/opennl/superlu/sgstrs.c
new file mode 100644
index 00000000000..5f7b9b57195
--- /dev/null
+++ b/intern/opennl/superlu/sgstrs.c
@@ -0,0 +1,331 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+
+void
+sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
+ int *perm_c, int *perm_r, SuperMatrix *B,
+ SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * SGSTRS solves a system of linear equations A*X=B or A'*X=B
+ * with A sparse and B dense, using the LU factorization computed by
+ * SGSTRF.
+ *
+ * See supermatrix.h for the definition of 'SuperMatrix' structure.
+ *
+ * Arguments
+ * =========
+ *
+ * trans (input) trans_t
+ * Specifies the form of the system of equations:
+ * = NOTRANS: A * X = B (No transpose)
+ * = TRANS: A'* X = B (Transpose)
+ * = CONJ: A**H * X = B (Conjugate transpose)
+ *
+ * L (input) SuperMatrix*
+ * The factor L from the factorization Pr*A*Pc=L*U as computed by
+ * sgstrf(). Use compressed row subscripts storage for supernodes,
+ * i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
+ *
+ * U (input) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U as computed by
+ * sgstrf(). Use column-wise storage scheme, i.e., U has types:
+ * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
+ *
+ * perm_c (input) int*, dimension (L->ncol)
+ * Column permutation vector, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ *
+ * perm_r (input) int*, dimension (L->nrow)
+ * Row permutation vector, which defines the permutation matrix Pr;
+ * perm_r[i] = j means row i of A is in position j in Pr*A.
+ *
+ * B (input/output) SuperMatrix*
+ * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
+ * On entry, the right hand side matrix.
+ * On exit, the solution matrix if info = 0;
+ *
+ * stat (output) SuperLUStat_t*
+ * Record the statistics on runtime and floating-point operation count.
+ * See util.h for the definition of 'SuperLUStat_t'.
+ *
+ * info (output) int*
+ * = 0: successful exit
+ * < 0: if info = -i, the i-th argument had an illegal value
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1, ftcs2, ftcs3, ftcs4;
+#endif
+#ifdef USE_VENDOR_BLAS
+ float alpha = 1.0, beta = 1.0;
+ float *work_col;
+#endif
+ DNformat *Bstore;
+ float *Bmat;
+ SCformat *Lstore;
+ NCformat *Ustore;
+ float *Lval, *Uval;
+ int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
+ int i, j, k, iptr, jcol, n, ldb, nrhs;
+ float *work, *rhs_work, *soln;
+ flops_t solve_ops;
+ void sprint_soln();
+
+ /* Test input parameters ... */
+ *info = 0;
+ Bstore = B->Store;
+ ldb = Bstore->lda;
+ nrhs = B->ncol;
+ if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
+ else if ( L->nrow != L->ncol || L->nrow < 0 ||
+ L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
+ *info = -2;
+ else if ( U->nrow != U->ncol || U->nrow < 0 ||
+ U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
+ *info = -3;
+ else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
+ B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
+ *info = -6;
+ if ( *info ) {
+ i = -(*info);
+ xerbla_("sgstrs", &i);
+ return;
+ }
+
+ n = L->nrow;
+ work = floatCalloc(n * nrhs);
+ if ( !work ) ABORT("Malloc fails for local work[].");
+ soln = floatMalloc(n);
+ if ( !soln ) ABORT("Malloc fails for local soln[].");
+
+ Bmat = Bstore->nzval;
+ Lstore = L->Store;
+ Lval = Lstore->nzval;
+ Ustore = U->Store;
+ Uval = Ustore->nzval;
+ solve_ops = 0;
+
+ if ( trans == NOTRANS ) {
+ /* Permute right hand sides to form Pr*B */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ /* Forward solve PLy=Pb. */
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ nrow = nsupr - nsupc;
+
+ solve_ops += nsupc * (nsupc - 1) * nrhs;
+ solve_ops += 2 * nrow * nsupc * nrhs;
+
+ if ( nsupc == 1 ) {
+ for (j = 0; j < nrhs; j++) {
+ rhs_work = &Bmat[j*ldb];
+ luptr = L_NZ_START(fsupc);
+ for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
+ irow = L_SUB(iptr);
+ ++luptr;
+ rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
+ }
+ }
+ } else {
+ luptr = L_NZ_START(fsupc);
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("N", strlen("N"));
+ ftcs3 = _cptofcd("U", strlen("U"));
+ STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+
+ SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
+ &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
+ &beta, &work[0], &n );
+#else
+ strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+
+ sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
+ &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
+ &beta, &work[0], &n );
+#endif
+ for (j = 0; j < nrhs; j++) {
+ rhs_work = &Bmat[j*ldb];
+ work_col = &work[j*n];
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; i++) {
+ irow = L_SUB(iptr);
+ rhs_work[irow] -= work_col[i]; /* Scatter */
+ work_col[i] = 0.0;
+ iptr++;
+ }
+ }
+#else
+ for (j = 0; j < nrhs; j++) {
+ rhs_work = &Bmat[j*ldb];
+ slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
+ smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
+ &rhs_work[fsupc], &work[0] );
+
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; i++) {
+ irow = L_SUB(iptr);
+ rhs_work[irow] -= work[i];
+ work[i] = 0.0;
+ iptr++;
+ }
+ }
+#endif
+ } /* else ... */
+ } /* for L-solve */
+
+#ifdef DEBUG
+ printf("After L-solve: y=\n");
+ sprint_soln(n, Bmat);
+#endif
+
+ /*
+ * Back solve Ux=y.
+ */
+ for (k = Lstore->nsuper; k >= 0; k--) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += nsupc * (nsupc + 1) * nrhs;
+
+ if ( nsupc == 1 ) {
+ rhs_work = &Bmat[0];
+ for (j = 0; j < nrhs; j++) {
+ rhs_work[fsupc] /= Lval[luptr];
+ rhs_work += ldb;
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("U", strlen("U"));
+ ftcs3 = _cptofcd("N", strlen("N"));
+ STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+#else
+ strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
+ &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
+#endif
+#else
+ for (j = 0; j < nrhs; j++)
+ susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
+#endif
+ }
+
+ for (j = 0; j < nrhs; ++j) {
+ rhs_work = &Bmat[j*ldb];
+ for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
+ irow = U_SUB(i);
+ rhs_work[irow] -= rhs_work[jcol] * Uval[i];
+ }
+ }
+ }
+
+ } /* for U-solve */
+
+#ifdef DEBUG
+ printf("After U-solve: x=\n");
+ sprint_soln(n, Bmat);
+#endif
+
+ /* Compute the final solution X := Pc*X. */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ stat->ops[SOLVE] = solve_ops;
+
+ } else { /* Solve A'*X=B or CONJ(A)*X=B */
+ /* Permute right hand sides to form Pc'*B. */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ stat->ops[SOLVE] = 0;
+ for (k = 0; k < nrhs; ++k) {
+
+ /* Multiply by inv(U'). */
+ sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
+
+ /* Multiply by inv(L'). */
+ sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
+
+ }
+ /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
+ for (i = 0; i < nrhs; i++) {
+ rhs_work = &Bmat[i*ldb];
+ for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
+ for (k = 0; k < n; k++) rhs_work[k] = soln[k];
+ }
+
+ }
+
+ SUPERLU_FREE(work);
+ SUPERLU_FREE(soln);
+}
+
+/*
+ * Diagnostic print of the solution vector
+ */
+void
+sprint_soln(int n, float *soln)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ printf("\t%d: %.4f\n", i, soln[i]);
+}
diff --git a/intern/opennl/superlu/smemory.c b/intern/opennl/superlu/smemory.c
new file mode 100644
index 00000000000..79da748671a
--- /dev/null
+++ b/intern/opennl/superlu/smemory.c
@@ -0,0 +1,676 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#include "ssp_defs.h"
+
+/* Constants */
+#define NO_MEMTYPE 4 /* 0: lusup;
+ 1: ucol;
+ 2: lsub;
+ 3: usub */
+#define GluIntArray(n) (5 * (n) + 5)
+
+/* Internal prototypes */
+void *sexpand (int *, MemType,int, int, GlobalLU_t *);
+int sLUWorkInit (int, int, int, int **, float **, LU_space_t);
+void copy_mem_float (int, void *, void *);
+void sStackCompress (GlobalLU_t *);
+void sSetupSpace (void *, int, LU_space_t *);
+void *suser_malloc (int, int);
+void suser_free (int, int);
+
+/* External prototypes (in memory.c - prec-indep) */
+extern void copy_mem_int (int, void *, void *);
+extern void user_bcopy (char *, char *, int);
+
+/* Headers for 4 types of dynamatically managed memory */
+typedef struct e_node {
+ int size; /* length of the memory that has been used */
+ void *mem; /* pointer to the new malloc'd store */
+} ExpHeader;
+
+typedef struct {
+ int size;
+ int used;
+ int top1; /* grow upward, relative to &array[0] */
+ int top2; /* grow downward */
+ void *array;
+} LU_stack_t;
+
+/* Variables local to this file */
+static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
+static LU_stack_t stack;
+static int no_expand;
+
+/* Macros to manipulate stack */
+#define StackFull(x) ( x + stack.used >= stack.size )
+#define NotDoubleAlign(addr) ( (long int)addr & 7 )
+#define DoubleAlign(addr) ( ((long int)addr + 7) & ~7L )
+#define TempSpace(m, w) ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
+ (w + 1) * m * sizeof(float) )
+#define Reduce(alpha) ((alpha + 1) / 2) /* i.e. (alpha-1)/2 + 1 */
+
+
+
+
+/*
+ * Setup the memory model to be used for factorization.
+ * lwork = 0: use system malloc;
+ * lwork > 0: use user-supplied work[] space.
+ */
+void sSetupSpace(void *work, int lwork, LU_space_t *MemModel)
+{
+ if ( lwork == 0 ) {
+ *MemModel = SYSTEM; /* malloc/free */
+ } else if ( lwork > 0 ) {
+ *MemModel = USER; /* user provided space */
+ stack.used = 0;
+ stack.top1 = 0;
+ stack.top2 = (lwork/4)*4; /* must be word addressable */
+ stack.size = stack.top2;
+ stack.array = (void *) work;
+ }
+}
+
+
+
+void *suser_malloc(int bytes, int which_end)
+{
+ void *buf;
+
+ if ( StackFull(bytes) ) return (NULL);
+
+ if ( which_end == HEAD ) {
+ buf = (char*) stack.array + stack.top1;
+ stack.top1 += bytes;
+ } else {
+ stack.top2 -= bytes;
+ buf = (char*) stack.array + stack.top2;
+ }
+
+ stack.used += bytes;
+ return buf;
+}
+
+
+void suser_free(int bytes, int which_end)
+{
+ if ( which_end == HEAD ) {
+ stack.top1 -= bytes;
+ } else {
+ stack.top2 += bytes;
+ }
+ stack.used -= bytes;
+}
+
+
+
+/*
+ * mem_usage consists of the following fields:
+ * - for_lu (float)
+ * The amount of space used in bytes for the L\U data structures.
+ * - total_needed (float)
+ * The amount of space needed in bytes to perform factorization.
+ * - expansions (int)
+ * Number of memory expansions during the LU factorization.
+ */
+int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
+{
+ SCformat *Lstore;
+ NCformat *Ustore;
+ register int n, iword, dword, panel_size = sp_ienv(1);
+
+ Lstore = L->Store;
+ Ustore = U->Store;
+ n = L->ncol;
+ iword = sizeof(int);
+ dword = sizeof(float);
+
+ /* For LU factors */
+ mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
+ dword + Lstore->rowind_colptr[n] * iword );
+ mem_usage->for_lu += (float)( (n + 1) * iword +
+ Ustore->colptr[n] * (dword + iword) );
+
+ /* Working storage to support factorization */
+ mem_usage->total_needed = mem_usage->for_lu +
+ (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
+ (panel_size + 1) * n * dword );
+
+ mem_usage->expansions = --no_expand;
+
+ return 0;
+} /* sQuerySpace */
+
+/*
+ * Allocate storage for the data structures common to all factor routines.
+ * For those unpredictable size, make a guess as FILL * nnz(A).
+ * Return value:
+ * If lwork = -1, return the estimated amount of space required, plus n;
+ * otherwise, return the amount of space actually allocated when
+ * memory allocation failure occurred.
+ */
+int
+sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
+ int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
+ int **iwork, float **dwork)
+{
+ int info, iword, dword;
+ SCformat *Lstore;
+ NCformat *Ustore;
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ float *ucol;
+ int *usub, *xusub;
+ int nzlmax, nzumax, nzlumax;
+ int FILL = sp_ienv(6);
+
+ Glu->n = n;
+ no_expand = 0;
+ iword = sizeof(int);
+ dword = sizeof(float);
+
+ if ( !expanders )
+ expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
+ if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
+
+ if ( fact != SamePattern_SameRowPerm ) {
+ /* Guess for L\U factors */
+ nzumax = nzlumax = FILL * annz;
+ nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
+
+ if ( lwork == -1 ) {
+ return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+ + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
+ } else {
+ sSetupSpace(work, lwork, &Glu->MemModel);
+ }
+
+#ifdef DEBUG
+ printf("sLUMemInit() called: annz %d, MemModel %d\n",
+ annz, Glu->MemModel);
+#endif
+
+ /* Integer pointers for L\U factors */
+ if ( Glu->MemModel == SYSTEM ) {
+ xsup = intMalloc(n+1);
+ supno = intMalloc(n+1);
+ xlsub = intMalloc(n+1);
+ xlusup = intMalloc(n+1);
+ xusub = intMalloc(n+1);
+ } else {
+ xsup = (int *)suser_malloc((n+1) * iword, HEAD);
+ supno = (int *)suser_malloc((n+1) * iword, HEAD);
+ xlsub = (int *)suser_malloc((n+1) * iword, HEAD);
+ xlusup = (int *)suser_malloc((n+1) * iword, HEAD);
+ xusub = (int *)suser_malloc((n+1) * iword, HEAD);
+ }
+
+ lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+ ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+ lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
+ usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
+
+ while ( !lusup || !ucol || !lsub || !usub ) {
+ if ( Glu->MemModel == SYSTEM ) {
+ SUPERLU_FREE(lusup);
+ SUPERLU_FREE(ucol);
+ SUPERLU_FREE(lsub);
+ SUPERLU_FREE(usub);
+ } else {
+ suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
+ }
+ nzlumax /= 2;
+ nzumax /= 2;
+ nzlmax /= 2;
+ if ( nzlumax < annz ) {
+ printf("Not enough memory to perform factorization.\n");
+ return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
+ }
+ lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
+ ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
+ lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
+ usub = (int *) sexpand( &nzumax, USUB, 0, 1, Glu );
+ }
+
+ } else {
+ /* fact == SamePattern_SameRowPerm */
+ Lstore = L->Store;
+ Ustore = U->Store;
+ xsup = Lstore->sup_to_col;
+ supno = Lstore->col_to_sup;
+ xlsub = Lstore->rowind_colptr;
+ xlusup = Lstore->nzval_colptr;
+ xusub = Ustore->colptr;
+ nzlmax = Glu->nzlmax; /* max from previous factorization */
+ nzumax = Glu->nzumax;
+ nzlumax = Glu->nzlumax;
+
+ if ( lwork == -1 ) {
+ return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
+ + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
+ } else if ( lwork == 0 ) {
+ Glu->MemModel = SYSTEM;
+ } else {
+ Glu->MemModel = USER;
+ stack.top2 = (lwork/4)*4; /* must be word-addressable */
+ stack.size = stack.top2;
+ }
+
+ lsub = expanders[LSUB].mem = Lstore->rowind;
+ lusup = expanders[LUSUP].mem = Lstore->nzval;
+ usub = expanders[USUB].mem = Ustore->rowind;
+ ucol = expanders[UCOL].mem = Ustore->nzval;;
+ expanders[LSUB].size = nzlmax;
+ expanders[LUSUP].size = nzlumax;
+ expanders[USUB].size = nzumax;
+ expanders[UCOL].size = nzumax;
+ }
+
+ Glu->xsup = xsup;
+ Glu->supno = supno;
+ Glu->lsub = lsub;
+ Glu->xlsub = xlsub;
+ Glu->lusup = lusup;
+ Glu->xlusup = xlusup;
+ Glu->ucol = ucol;
+ Glu->usub = usub;
+ Glu->xusub = xusub;
+ Glu->nzlmax = nzlmax;
+ Glu->nzumax = nzumax;
+ Glu->nzlumax = nzlumax;
+
+ info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
+ if ( info )
+ return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
+
+ ++no_expand;
+ return 0;
+
+} /* sLUMemInit */
+
+/* Allocate known working storage. Returns 0 if success, otherwise
+ returns the number of bytes allocated so far when failure occurred. */
+int
+sLUWorkInit(int m, int n, int panel_size, int **iworkptr,
+ float **dworkptr, LU_space_t MemModel)
+{
+ int isize, dsize, extra;
+ float *old_ptr;
+ int maxsuper = sp_ienv(3),
+ rowblk = sp_ienv(4);
+
+ isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
+ dsize = (m * panel_size +
+ NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
+
+ if ( MemModel == SYSTEM )
+ *iworkptr = (int *) intCalloc(isize/sizeof(int));
+ else
+ *iworkptr = (int *) suser_malloc(isize, TAIL);
+ if ( ! *iworkptr ) {
+ fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
+ return (isize + n);
+ }
+
+ if ( MemModel == SYSTEM )
+ *dworkptr = (float *) SUPERLU_MALLOC(dsize);
+ else {
+ *dworkptr = (float *) suser_malloc(dsize, TAIL);
+ if ( NotDoubleAlign(*dworkptr) ) {
+ old_ptr = *dworkptr;
+ *dworkptr = (float*) DoubleAlign(*dworkptr);
+ *dworkptr = (float*) ((double*)*dworkptr - 1);
+ extra = (char*)old_ptr - (char*)*dworkptr;
+#ifdef DEBUG
+ printf("sLUWorkInit: not aligned, extra %d\n", extra);
+#endif
+ stack.top2 -= extra;
+ stack.used += extra;
+ }
+ }
+ if ( ! *dworkptr ) {
+ fprintf(stderr, "malloc fails for local dworkptr[].");
+ return (isize + dsize + n);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Set up pointers for real working arrays.
+ */
+void
+sSetRWork(int m, int panel_size, float *dworkptr,
+ float **dense, float **tempv)
+{
+ float zero = 0.0;
+
+ int maxsuper = sp_ienv(3),
+ rowblk = sp_ienv(4);
+ *dense = dworkptr;
+ *tempv = *dense + panel_size*m;
+ sfill (*dense, m * panel_size, zero);
+ sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
+}
+
+/*
+ * Free the working storage used by factor routines.
+ */
+void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
+{
+ if ( Glu->MemModel == SYSTEM ) {
+ SUPERLU_FREE (iwork);
+ SUPERLU_FREE (dwork);
+ } else {
+ stack.used -= (stack.size - stack.top2);
+ stack.top2 = stack.size;
+/* sStackCompress(Glu); */
+ }
+
+ SUPERLU_FREE (expanders);
+ expanders = 0;
+}
+
+/* Expand the data structures for L and U during the factorization.
+ * Return value: 0 - successful return
+ * > 0 - number of bytes allocated when run out of space
+ */
+int
+sLUMemXpand(int jcol,
+ int next, /* number of elements currently in the factors */
+ MemType mem_type, /* which type of memory to expand */
+ int *maxlen, /* modified - maximum length of a data structure */
+ GlobalLU_t *Glu /* modified - global LU data structures */
+ )
+{
+ void *new_mem;
+
+#ifdef DEBUG
+ printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
+ jcol, next, *maxlen, mem_type);
+#endif
+
+ if (mem_type == USUB)
+ new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
+ else
+ new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
+
+ if ( !new_mem ) {
+ int nzlmax = Glu->nzlmax;
+ int nzumax = Glu->nzumax;
+ int nzlumax = Glu->nzlumax;
+ fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
+ return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
+ }
+
+ switch ( mem_type ) {
+ case LUSUP:
+ Glu->lusup = (float *) new_mem;
+ Glu->nzlumax = *maxlen;
+ break;
+ case UCOL:
+ Glu->ucol = (float *) new_mem;
+ Glu->nzumax = *maxlen;
+ break;
+ case LSUB:
+ Glu->lsub = (int *) new_mem;
+ Glu->nzlmax = *maxlen;
+ break;
+ case USUB:
+ Glu->usub = (int *) new_mem;
+ Glu->nzumax = *maxlen;
+ break;
+ }
+
+ return 0;
+
+}
+
+
+
+void
+copy_mem_float(int howmany, void *old, void *new)
+{
+ register int i;
+ float *dold = old;
+ float *dnew = new;
+ for (i = 0; i < howmany; i++) dnew[i] = dold[i];
+}
+
+/*
+ * Expand the existing storage to accommodate more fill-ins.
+ */
+void
+*sexpand (
+ int *prev_len, /* length used from previous call */
+ MemType type, /* which part of the memory to expand */
+ int len_to_copy, /* size of the memory to be copied to new store */
+ int keep_prev, /* = 1: use prev_len;
+ = 0: compute new_len to expand */
+ GlobalLU_t *Glu /* modified - global LU data structures */
+ )
+{
+ float EXPAND = 1.5;
+ float alpha;
+ void *new_mem, *old_mem;
+ int new_len, tries, lword, extra, bytes_to_copy;
+
+ alpha = EXPAND;
+
+ if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
+ new_len = *prev_len;
+ else {
+ new_len = alpha * *prev_len;
+ }
+
+ if ( type == LSUB || type == USUB ) lword = sizeof(int);
+ else lword = sizeof(float);
+
+ if ( Glu->MemModel == SYSTEM ) {
+ new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
+/* new_mem = (void *) calloc(new_len, lword); */
+ if ( no_expand != 0 ) {
+ tries = 0;
+ if ( keep_prev ) {
+ if ( !new_mem ) return (NULL);
+ } else {
+ while ( !new_mem ) {
+ if ( ++tries > 10 ) return (NULL);
+ alpha = Reduce(alpha);
+ new_len = alpha * *prev_len;
+ new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
+/* new_mem = (void *) calloc(new_len, lword); */
+ }
+ }
+ if ( type == LSUB || type == USUB ) {
+ copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
+ } else {
+ copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
+ }
+ SUPERLU_FREE (expanders[type].mem);
+ }
+ expanders[type].mem = (void *) new_mem;
+
+ } else { /* MemModel == USER */
+ if ( no_expand == 0 ) {
+ new_mem = suser_malloc(new_len * lword, HEAD);
+ if ( NotDoubleAlign(new_mem) &&
+ (type == LUSUP || type == UCOL) ) {
+ old_mem = new_mem;
+ new_mem = (void *)DoubleAlign(new_mem);
+ extra = (char*)new_mem - (char*)old_mem;
+#ifdef DEBUG
+ printf("expand(): not aligned, extra %d\n", extra);
+#endif
+ stack.top1 += extra;
+ stack.used += extra;
+ }
+ expanders[type].mem = (void *) new_mem;
+ }
+ else {
+ tries = 0;
+ extra = (new_len - *prev_len) * lword;
+ if ( keep_prev ) {
+ if ( StackFull(extra) ) return (NULL);
+ } else {
+ while ( StackFull(extra) ) {
+ if ( ++tries > 10 ) return (NULL);
+ alpha = Reduce(alpha);
+ new_len = alpha * *prev_len;
+ extra = (new_len - *prev_len) * lword;
+ }
+ }
+
+ if ( type != USUB ) {
+ new_mem = (void*)((char*)expanders[type + 1].mem + extra);
+ bytes_to_copy = (char*)stack.array + stack.top1
+ - (char*)expanders[type + 1].mem;
+ user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
+
+ if ( type < USUB ) {
+ Glu->usub = expanders[USUB].mem =
+ (void*)((char*)expanders[USUB].mem + extra);
+ }
+ if ( type < LSUB ) {
+ Glu->lsub = expanders[LSUB].mem =
+ (void*)((char*)expanders[LSUB].mem + extra);
+ }
+ if ( type < UCOL ) {
+ Glu->ucol = expanders[UCOL].mem =
+ (void*)((char*)expanders[UCOL].mem + extra);
+ }
+ stack.top1 += extra;
+ stack.used += extra;
+ if ( type == UCOL ) {
+ stack.top1 += extra; /* Add same amount for USUB */
+ stack.used += extra;
+ }
+
+ } /* if ... */
+
+ } /* else ... */
+ }
+
+ expanders[type].size = new_len;
+ *prev_len = new_len;
+ if ( no_expand ) ++no_expand;
+
+ return (void *) expanders[type].mem;
+
+} /* sexpand */
+
+
+/*
+ * Compress the work[] array to remove fragmentation.
+ */
+void
+sStackCompress(GlobalLU_t *Glu)
+{
+ register int iword, dword, ndim;
+ char *last, *fragment;
+ int *ifrom, *ito;
+ float *dfrom, *dto;
+ int *xlsub, *lsub, *xusub, *usub, *xlusup;
+ float *ucol, *lusup;
+
+ iword = sizeof(int);
+ dword = sizeof(float);
+ ndim = Glu->n;
+
+ xlsub = Glu->xlsub;
+ lsub = Glu->lsub;
+ xusub = Glu->xusub;
+ usub = Glu->usub;
+ xlusup = Glu->xlusup;
+ ucol = Glu->ucol;
+ lusup = Glu->lusup;
+
+ dfrom = ucol;
+ dto = (float *)((char*)lusup + xlusup[ndim] * dword);
+ copy_mem_float(xusub[ndim], dfrom, dto);
+ ucol = dto;
+
+ ifrom = lsub;
+ ito = (int *) ((char*)ucol + xusub[ndim] * iword);
+ copy_mem_int(xlsub[ndim], ifrom, ito);
+ lsub = ito;
+
+ ifrom = usub;
+ ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
+ copy_mem_int(xusub[ndim], ifrom, ito);
+ usub = ito;
+
+ last = (char*)usub + xusub[ndim] * iword;
+ fragment = (char*) (((char*)stack.array + stack.top1) - last);
+ stack.used -= (long int) fragment;
+ stack.top1 -= (long int) fragment;
+
+ Glu->ucol = ucol;
+ Glu->lsub = lsub;
+ Glu->usub = usub;
+
+#ifdef DEBUG
+ printf("sStackCompress: fragment %d\n", fragment);
+ /* for (last = 0; last < ndim; ++last)
+ print_lu_col("After compress:", last, 0);*/
+#endif
+
+}
+
+/*
+ * Allocate storage for original matrix A
+ */
+void
+sallocateA(int n, int nnz, float **a, int **asub, int **xa)
+{
+ *a = (float *) floatMalloc(nnz);
+ *asub = (int *) intMalloc(nnz);
+ *xa = (int *) intMalloc(n+1);
+}
+
+
+float *floatMalloc(int n)
+{
+ float *buf;
+ buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
+ }
+ return (buf);
+}
+
+float *floatCalloc(int n)
+{
+ float *buf;
+ register int i;
+ float zero = 0.0;
+ buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
+ }
+ for (i = 0; i < n; ++i) buf[i] = zero;
+ return (buf);
+}
+
+
+int smemory_usage(const int nzlmax, const int nzumax,
+ const int nzlumax, const int n)
+{
+ register int iword, dword;
+
+ iword = sizeof(int);
+ dword = sizeof(float);
+
+ return (10 * n * iword +
+ nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
+
+}
diff --git a/intern/opennl/superlu/smyblas2.c b/intern/opennl/superlu/smyblas2.c
new file mode 100644
index 00000000000..729e17f7674
--- /dev/null
+++ b/intern/opennl/superlu/smyblas2.c
@@ -0,0 +1,225 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name: smyblas2.c
+ * Purpose:
+ * Level 2 BLAS operations: solves and matvec, written in C.
+ * Note:
+ * This is only used when the system lacks an efficient BLAS library.
+ */
+
+/*
+ * Solves a dense UNIT lower triangular system. The unit lower
+ * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
+ * The solution will be returned in the rhs vector.
+ */
+void slsolve ( int ldm, int ncol, float *M, float *rhs )
+{
+ int k;
+ float x0, x1, x2, x3, x4, x5, x6, x7;
+ float *M0;
+ register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+ register int firstcol = 0;
+
+ M0 = &M[0];
+
+ while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+ Mki2 = Mki1 + ldm + 1;
+ Mki3 = Mki2 + ldm + 1;
+ Mki4 = Mki3 + ldm + 1;
+ Mki5 = Mki4 + ldm + 1;
+ Mki6 = Mki5 + ldm + 1;
+ Mki7 = Mki6 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+ x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+ x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+ x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++;
+ x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++;
+ x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
+ x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
+ - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
+ - x6 * *Mki6++;
+
+ rhs[++firstcol] = x1;
+ rhs[++firstcol] = x2;
+ rhs[++firstcol] = x3;
+ rhs[++firstcol] = x4;
+ rhs[++firstcol] = x5;
+ rhs[++firstcol] = x6;
+ rhs[++firstcol] = x7;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+ - x2 * *Mki2++ - x3 * *Mki3++
+ - x4 * *Mki4++ - x5 * *Mki5++
+ - x6 * *Mki6++ - x7 * *Mki7++;
+
+ M0 += 8 * ldm + 8;
+ }
+
+ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+ Mki2 = Mki1 + ldm + 1;
+ Mki3 = Mki2 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+ x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
+ x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
+
+ rhs[++firstcol] = x1;
+ rhs[++firstcol] = x2;
+ rhs[++firstcol] = x3;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
+ - x2 * *Mki2++ - x3 * *Mki3++;
+
+ M0 += 4 * ldm + 4;
+ }
+
+ if ( firstcol < ncol - 1 ) { /* Do 2 columns */
+ Mki0 = M0 + 1;
+ Mki1 = Mki0 + ldm + 1;
+
+ x0 = rhs[firstcol];
+ x1 = rhs[firstcol+1] - x0 * *Mki0++;
+
+ rhs[++firstcol] = x1;
+ ++firstcol;
+
+ for (k = firstcol; k < ncol; k++)
+ rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
+
+ }
+
+}
+
+/*
+ * Solves a dense upper triangular system. The upper triangular matrix is
+ * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
+ * in the rhs vector.
+ */
+void
+susolve ( ldm, ncol, M, rhs )
+int ldm; /* in */
+int ncol; /* in */
+float *M; /* in */
+float *rhs; /* modified */
+{
+ float xj;
+ int jcol, j, irow;
+
+ jcol = ncol - 1;
+
+ for (j = 0; j < ncol; j++) {
+
+ xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
+ rhs[jcol] = xj;
+
+ for (irow = 0; irow < jcol; irow++)
+ rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
+
+ jcol--;
+
+ }
+}
+
+
+/*
+ * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
+ * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
+ */
+void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
+
+int ldm; /* in -- leading dimension of M */
+int nrow; /* in */
+int ncol; /* in */
+float *M; /* in */
+float *vec; /* in */
+float *Mxvec; /* in/out */
+
+{
+ float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
+ float *M0;
+ register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
+ register int firstcol = 0;
+ int k;
+
+ M0 = &M[0];
+ while ( firstcol < ncol - 7 ) { /* Do 8 columns */
+
+ Mki0 = M0;
+ Mki1 = Mki0 + ldm;
+ Mki2 = Mki1 + ldm;
+ Mki3 = Mki2 + ldm;
+ Mki4 = Mki3 + ldm;
+ Mki5 = Mki4 + ldm;
+ Mki6 = Mki5 + ldm;
+ Mki7 = Mki6 + ldm;
+
+ vi0 = vec[firstcol++];
+ vi1 = vec[firstcol++];
+ vi2 = vec[firstcol++];
+ vi3 = vec[firstcol++];
+ vi4 = vec[firstcol++];
+ vi5 = vec[firstcol++];
+ vi6 = vec[firstcol++];
+ vi7 = vec[firstcol++];
+
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ + vi2 * *Mki2++ + vi3 * *Mki3++
+ + vi4 * *Mki4++ + vi5 * *Mki5++
+ + vi6 * *Mki6++ + vi7 * *Mki7++;
+
+ M0 += 8 * ldm;
+ }
+
+ while ( firstcol < ncol - 3 ) { /* Do 4 columns */
+
+ Mki0 = M0;
+ Mki1 = Mki0 + ldm;
+ Mki2 = Mki1 + ldm;
+ Mki3 = Mki2 + ldm;
+
+ vi0 = vec[firstcol++];
+ vi1 = vec[firstcol++];
+ vi2 = vec[firstcol++];
+ vi3 = vec[firstcol++];
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
+ + vi2 * *Mki2++ + vi3 * *Mki3++ ;
+
+ M0 += 4 * ldm;
+ }
+
+ while ( firstcol < ncol ) { /* Do 1 column */
+
+ Mki0 = M0;
+ vi0 = vec[firstcol++];
+ for (k = 0; k < nrow; k++)
+ Mxvec[k] += vi0 * *Mki0++;
+
+ M0 += ldm;
+ }
+
+}
+
diff --git a/intern/opennl/superlu/sp_coletree.c b/intern/opennl/superlu/sp_coletree.c
new file mode 100644
index 00000000000..d49919167f5
--- /dev/null
+++ b/intern/opennl/superlu/sp_coletree.c
@@ -0,0 +1,332 @@
+
+/* Elimination tree computation and layout routines */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Implementation of disjoint set union routines.
+ * Elements are integers in 0..n-1, and the
+ * names of the sets themselves are of type int.
+ *
+ * Calls are:
+ * initialize_disjoint_sets (n) initial call.
+ * s = make_set (i) returns a set containing only i.
+ * s = link (t, u) returns s = t union u, destroying t and u.
+ * s = find (i) return name of set containing i.
+ * finalize_disjoint_sets final call.
+ *
+ * This implementation uses path compression but not weighted union.
+ * See Tarjan's book for details.
+ * John Gilbert, CMI, 1987.
+ *
+ * Implemented path-halving by XSL 07/05/95.
+ */
+
+static int *pp; /* parent array for sets */
+
+static
+int *mxCallocInt(int n)
+{
+ register int i;
+ int *buf;
+
+ buf = (int *) SUPERLU_MALLOC( n * sizeof(int) );
+ if ( !buf ) {
+ ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()");
+ }
+ for (i = 0; i < n; i++) buf[i] = 0;
+ return (buf);
+}
+
+static
+void initialize_disjoint_sets (
+ int n
+ )
+{
+ pp = mxCallocInt(n);
+}
+
+
+static
+int make_set (
+ int i
+ )
+{
+ pp[i] = i;
+ return i;
+}
+
+
+static
+int link (
+ int s,
+ int t
+ )
+{
+ pp[s] = t;
+ return t;
+}
+
+
+/* PATH HALVING */
+static
+int find (int i)
+{
+ register int p, gp;
+
+ p = pp[i];
+ gp = pp[p];
+ while (gp != p) {
+ pp[i] = gp;
+ i = gp;
+ p = pp[i];
+ gp = pp[p];
+ }
+ return (p);
+}
+
+#if 0
+/* PATH COMPRESSION */
+static
+int find (
+ int i
+ )
+{
+ if (pp[i] != i)
+ pp[i] = find (pp[i]);
+ return pp[i];
+}
+#endif
+
+static
+void finalize_disjoint_sets (
+ void
+ )
+{
+ SUPERLU_FREE(pp);
+}
+
+
+/*
+ * Find the elimination tree for A'*A.
+ * This uses something similar to Liu's algorithm.
+ * It runs in time O(nz(A)*log n) and does not form A'*A.
+ *
+ * Input:
+ * Sparse matrix A. Numeric values are ignored, so any
+ * explicit zeros are treated as nonzero.
+ * Output:
+ * Integer array of parents representing the elimination
+ * tree of the symbolic product A'*A. Each vertex is a
+ * column of A, and nc means a root of the elimination forest.
+ *
+ * John R. Gilbert, Xerox, 10 Dec 1990
+ * Based on code by JRG dated 1987, 1988, and 1990.
+ */
+
+/*
+ * Nonsymmetric elimination tree
+ */
+int
+sp_coletree(
+ int *acolst, int *acolend, /* column start and end past 1 */
+ int *arow, /* row indices of A */
+ int nr, int nc, /* dimension of A */
+ int *parent /* parent in elim tree */
+ )
+{
+ int *root; /* root of subtee of etree */
+ int *firstcol; /* first nonzero col in each row*/
+ int rset, cset;
+ int row, col;
+ int rroot;
+ int p;
+
+ root = mxCallocInt (nc);
+ initialize_disjoint_sets (nc);
+
+ /* Compute firstcol[row] = first nonzero column in row */
+
+ firstcol = mxCallocInt (nr);
+ for (row = 0; row < nr; firstcol[row++] = nc);
+ for (col = 0; col < nc; col++)
+ for (p = acolst[col]; p < acolend[col]; p++) {
+ row = arow[p];
+ firstcol[row] = SUPERLU_MIN(firstcol[row], col);
+ }
+
+ /* Compute etree by Liu's algorithm for symmetric matrices,
+ except use (firstcol[r],c) in place of an edge (r,c) of A.
+ Thus each row clique in A'*A is replaced by a star
+ centered at its first vertex, which has the same fill. */
+
+ for (col = 0; col < nc; col++) {
+ cset = make_set (col);
+ root[cset] = col;
+ parent[col] = nc; /* Matlab */
+ for (p = acolst[col]; p < acolend[col]; p++) {
+ row = firstcol[arow[p]];
+ if (row >= col) continue;
+ rset = find (row);
+ rroot = root[rset];
+ if (rroot != col) {
+ parent[rroot] = col;
+ cset = link (cset, rset);
+ root[cset] = col;
+ }
+ }
+ }
+
+ SUPERLU_FREE (root);
+ SUPERLU_FREE (firstcol);
+ finalize_disjoint_sets ();
+ return 0;
+}
+
+/*
+ * q = TreePostorder (n, p);
+ *
+ * Postorder a tree.
+ * Input:
+ * p is a vector of parent pointers for a forest whose
+ * vertices are the integers 0 to n-1; p[root]==n.
+ * Output:
+ * q is a vector indexed by 0..n-1 such that q[i] is the
+ * i-th vertex in a postorder numbering of the tree.
+ *
+ * ( 2/7/95 modified by X.Li:
+ * q is a vector indexed by 0:n-1 such that vertex i is the
+ * q[i]-th vertex in a postorder numbering of the tree.
+ * That is, this is the inverse of the previous q. )
+ *
+ * In the child structure, lower-numbered children are represented
+ * first, so that a tree which is already numbered in postorder
+ * will not have its order changed.
+ *
+ * Written by John Gilbert, Xerox, 10 Dec 1990.
+ * Based on code written by John Gilbert at CMI in 1987.
+ */
+
+static int *first_kid, *next_kid; /* Linked list of children. */
+static int *post, postnum;
+
+static
+/*
+ * Depth-first search from vertex v.
+ */
+void etdfs (
+ int v
+ )
+{
+ int w;
+
+ for (w = first_kid[v]; w != -1; w = next_kid[w]) {
+ etdfs (w);
+ }
+ /* post[postnum++] = v; in Matlab */
+ post[v] = postnum++; /* Modified by X.Li on 2/14/95 */
+}
+
+
+/*
+ * Post order a tree
+ */
+int *TreePostorder(
+ int n,
+ int *parent
+)
+{
+ int v, dad;
+
+ /* Allocate storage for working arrays and results */
+ first_kid = mxCallocInt (n+1);
+ next_kid = mxCallocInt (n+1);
+ post = mxCallocInt (n+1);
+
+ /* Set up structure describing children */
+ for (v = 0; v <= n; first_kid[v++] = -1);
+ for (v = n-1; v >= 0; v--) {
+ dad = parent[v];
+ next_kid[v] = first_kid[dad];
+ first_kid[dad] = v;
+ }
+
+ /* Depth-first search from dummy root vertex #n */
+ postnum = 0;
+ etdfs (n);
+
+ SUPERLU_FREE (first_kid);
+ SUPERLU_FREE (next_kid);
+ return post;
+}
+
+
+/*
+ * p = spsymetree (A);
+ *
+ * Find the elimination tree for symmetric matrix A.
+ * This uses Liu's algorithm, and runs in time O(nz*log n).
+ *
+ * Input:
+ * Square sparse matrix A. No check is made for symmetry;
+ * elements below and on the diagonal are ignored.
+ * Numeric values are ignored, so any explicit zeros are
+ * treated as nonzero.
+ * Output:
+ * Integer array of parents representing the etree, with n
+ * meaning a root of the elimination forest.
+ * Note:
+ * This routine uses only the upper triangle, while sparse
+ * Cholesky (as in spchol.c) uses only the lower. Matlab's
+ * dense Cholesky uses only the upper. This routine could
+ * be modified to use the lower triangle either by transposing
+ * the matrix or by traversing it by rows with auxiliary
+ * pointer and link arrays.
+ *
+ * John R. Gilbert, Xerox, 10 Dec 1990
+ * Based on code by JRG dated 1987, 1988, and 1990.
+ * Modified by X.S. Li, November 1999.
+ */
+
+/*
+ * Symmetric elimination tree
+ */
+int
+sp_symetree(
+ int *acolst, int *acolend, /* column starts and ends past 1 */
+ int *arow, /* row indices of A */
+ int n, /* dimension of A */
+ int *parent /* parent in elim tree */
+ )
+{
+ int *root; /* root of subtree of etree */
+ int rset, cset;
+ int row, col;
+ int rroot;
+ int p;
+
+ root = mxCallocInt (n);
+ initialize_disjoint_sets (n);
+
+ for (col = 0; col < n; col++) {
+ cset = make_set (col);
+ root[cset] = col;
+ parent[col] = n; /* Matlab */
+ for (p = acolst[col]; p < acolend[col]; p++) {
+ row = arow[p];
+ if (row >= col) continue;
+ rset = find (row);
+ rroot = root[rset];
+ if (rroot != col) {
+ parent[rroot] = col;
+ cset = link (cset, rset);
+ root[cset] = col;
+ }
+ }
+ }
+ SUPERLU_FREE (root);
+ finalize_disjoint_sets ();
+ return 0;
+} /* SP_SYMETREE */
diff --git a/intern/opennl/superlu/sp_ienv.c b/intern/opennl/superlu/sp_ienv.c
new file mode 100644
index 00000000000..5b0ba7b2151
--- /dev/null
+++ b/intern/opennl/superlu/sp_ienv.c
@@ -0,0 +1,65 @@
+/*
+ * File name: sp_ienv.c
+ * History: Modified from lapack routine ILAENV
+ */
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+sp_ienv(int ispec)
+{
+/*
+ Purpose
+ =======
+
+ sp_ienv() is inquired to choose machine-dependent parameters for the
+ local environment. See ISPEC for a description of the parameters.
+
+ This version provides a set of parameters which should give good,
+ but not optimal, performance on many of the currently available
+ computers. Users are encouraged to modify this subroutine to set
+ the tuning parameters for their particular machine using the option
+ and problem size information in the arguments.
+
+ Arguments
+ =========
+
+ ISPEC (input) int
+ Specifies the parameter to be returned as the value of SP_IENV.
+ = 1: the panel size w; a panel consists of w consecutive
+ columns of matrix A in the process of Gaussian elimination.
+ The best value depends on machine's cache characters.
+ = 2: the relaxation parameter relax; if the number of
+ nodes (columns) in a subtree of the elimination tree is less
+ than relax, this subtree is considered as one supernode,
+ regardless of their row structures.
+ = 3: the maximum size for a supernode;
+ = 4: the minimum row dimension for 2-D blocking to be used;
+ = 5: the minimum column dimension for 2-D blocking to be used;
+ = 6: the estimated fills factor for L and U, compared with A;
+
+ (SP_IENV) (output) int
+ >= 0: the value of the parameter specified by ISPEC
+ < 0: if SP_IENV = -k, the k-th argument had an illegal value.
+
+ =====================================================================
+*/
+ int i;
+
+ switch (ispec) {
+ case 1: return (10);
+ case 2: return (5);
+ case 3: return (100);
+ case 4: return (200);
+ case 5: return (40);
+ case 6: return (20);
+ }
+
+ /* Invalid value for ISPEC */
+ i = 1;
+ xerbla_("sp_ienv", &i);
+ return 0;
+
+} /* sp_ienv_ */
+
diff --git a/intern/opennl/superlu/sp_preorder.c b/intern/opennl/superlu/sp_preorder.c
new file mode 100644
index 00000000000..f82da2de1aa
--- /dev/null
+++ b/intern/opennl/superlu/sp_preorder.c
@@ -0,0 +1,203 @@
+#include "ssp_defs.h"
+
+void
+sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c,
+ int *etree, SuperMatrix *AC)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * sp_preorder() permutes the columns of the original matrix. It performs
+ * the following steps:
+ *
+ * 1. Apply column permutation perm_c[] to A's column pointers to form AC;
+ *
+ * 2. If options->Fact = DOFACT, then
+ * (1) Compute column elimination tree etree[] of AC'AC;
+ * (2) Post order etree[] to get a postordered elimination tree etree[],
+ * and a postorder permutation post[];
+ * (3) Apply post[] permutation to columns of AC;
+ * (4) Overwrite perm_c[] with the product perm_c * post.
+ *
+ * Arguments
+ * =========
+ *
+ * options (input) superlu_options_t*
+ * Specifies whether or not the elimination tree will be re-used.
+ * If options->Fact == DOFACT, this means first time factor A,
+ * etree is computed, postered, and output.
+ * Otherwise, re-factor A, etree is input, unchanged on exit.
+ *
+ * A (input) SuperMatrix*
+ * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
+ * of the linear equations is A->nrow. Currently, the type of A can be:
+ * Stype = NC or SLU_NCP; Mtype = SLU_GE.
+ * In the future, more general A may be handled.
+ *
+ * perm_c (input/output) int*
+ * Column permutation vector of size A->ncol, which defines the
+ * permutation matrix Pc; perm_c[i] = j means column i of A is
+ * in position j in A*Pc.
+ * If options->Fact == DOFACT, perm_c is both input and output.
+ * On output, it is changed according to a postorder of etree.
+ * Otherwise, perm_c is input.
+ *
+ * etree (input/output) int*
+ * Elimination tree of Pc'*A'*A*Pc, dimension A->ncol.
+ * If options->Fact == DOFACT, etree is an output argument,
+ * otherwise it is an input argument.
+ * Note: etree is a vector of parent pointers for a forest whose
+ * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
+ *
+ * AC (output) SuperMatrix*
+ * The resulting matrix after applied the column permutation
+ * perm_c[] to matrix A. The type of AC can be:
+ * Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
+ *
+ */
+
+ NCformat *Astore;
+ NCPformat *ACstore;
+ int *iwork, *post;
+ register int n, i;
+
+ n = A->ncol;
+
+ /* Apply column permutation perm_c to A's column pointers so to
+ obtain NCP format in AC = A*Pc. */
+ AC->Stype = SLU_NCP;
+ AC->Dtype = A->Dtype;
+ AC->Mtype = A->Mtype;
+ AC->nrow = A->nrow;
+ AC->ncol = A->ncol;
+ Astore = A->Store;
+ ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) );
+ if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore");
+ ACstore->nnz = Astore->nnz;
+ ACstore->nzval = Astore->nzval;
+ ACstore->rowind = Astore->rowind;
+ ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for ACstore->colbeg");
+ ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int));
+ if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for ACstore->colend");
+
+#ifdef DEBUG
+ print_int_vec("pre_order:", n, perm_c);
+ check_perm("Initial perm_c", n, perm_c);
+#endif
+
+ for (i = 0; i < n; i++) {
+ ACstore->colbeg[perm_c[i]] = Astore->colptr[i];
+ ACstore->colend[perm_c[i]] = Astore->colptr[i+1];
+ }
+
+ if ( options->Fact == DOFACT ) {
+#undef ETREE_ATplusA
+#ifdef ETREE_ATplusA
+ /*--------------------------------------------
+ COMPUTE THE ETREE OF Pc*(A'+A)*Pc'.
+ --------------------------------------------*/
+ int *b_colptr, *b_rowind, bnz, j;
+ int *c_colbeg, *c_colend;
+
+ /*printf("Use etree(A'+A)\n");*/
+
+ /* Form B = A + A'. */
+ at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
+ &bnz, &b_colptr, &b_rowind);
+
+ /* Form C = Pc*B*Pc'. */
+ c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int));
+ c_colend = c_colbeg + n;
+ if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend");
+ for (i = 0; i < n; i++) {
+ c_colbeg[perm_c[i]] = b_colptr[i];
+ c_colend[perm_c[i]] = b_colptr[i+1];
+ }
+ for (j = 0; j < n; ++j) {
+ for (i = c_colbeg[j]; i < c_colend[j]; ++i) {
+ b_rowind[i] = perm_c[b_rowind[i]];
+ }
+ }
+
+ /* Compute etree of C. */
+ sp_symetree(c_colbeg, c_colend, b_rowind, n, etree);
+
+ SUPERLU_FREE(b_colptr);
+ if ( bnz ) SUPERLU_FREE(b_rowind);
+ SUPERLU_FREE(c_colbeg);
+
+#else
+ /*--------------------------------------------
+ COMPUTE THE COLUMN ELIMINATION TREE.
+ --------------------------------------------*/
+ sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind,
+ A->nrow, A->ncol, etree);
+#endif
+#ifdef DEBUG
+ print_int_vec("etree:", n, etree);
+#endif
+
+ /* In symmetric mode, do not do postorder here. */
+ if ( options->SymmetricMode == NO ) {
+ /* Post order etree */
+ post = (int *) TreePostorder(n, etree);
+ /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
+ iwork = post; */
+
+#ifdef DEBUG
+ print_int_vec("post:", n+1, post);
+ check_perm("post", n, post);
+#endif
+ iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int));
+ if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
+
+ /* Renumber etree in postorder */
+ for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]];
+ for (i = 0; i < n; ++i) etree[i] = iwork[i];
+
+#ifdef DEBUG
+ print_int_vec("postorder etree:", n, etree);
+#endif
+
+ /* Postmultiply A*Pc by post[] */
+ for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i];
+ for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i];
+ for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i];
+ for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i];
+
+ for (i = 0; i < n; ++i)
+ iwork[i] = post[perm_c[i]]; /* product of perm_c and post */
+ for (i = 0; i < n; ++i) perm_c[i] = iwork[i];
+
+#ifdef DEBUG
+ print_int_vec("Pc*post:", n, perm_c);
+ check_perm("final perm_c", n, perm_c);
+#endif
+ SUPERLU_FREE (post);
+ SUPERLU_FREE (iwork);
+ } /* end postordering */
+
+ } /* if options->Fact == DOFACT ... */
+
+}
+
+int check_perm(char *what, int n, int *perm)
+{
+ register int i;
+ int *marker;
+ marker = (int *) calloc(n, sizeof(int));
+
+ for (i = 0; i < n; ++i) {
+ if ( marker[perm[i]] == 1 || perm[i] >= n ) {
+ printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]);
+ ABORT("check_perm");
+ } else {
+ marker[perm[i]] = 1;
+ }
+ }
+
+ SUPERLU_FREE(marker);
+ return 0;
+}
diff --git a/intern/opennl/superlu/spanel_bmod.c b/intern/opennl/superlu/spanel_bmod.c
new file mode 100644
index 00000000000..a59a9086df1
--- /dev/null
+++ b/intern/opennl/superlu/spanel_bmod.c
@@ -0,0 +1,449 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void slsolve(int, int, float *, float *);
+void smatvec(int, int, int, float *, float *, float *);
+extern void scheck_tempv();
+
+void
+spanel_bmod (
+ const int m, /* in - number of rows in the matrix */
+ const int w, /* in */
+ const int jcol, /* in */
+ const int nseg, /* in */
+ float *dense, /* out, of size n by w */
+ float *tempv, /* working array */
+ int *segrep, /* in */
+ int *repfnz, /* in, of size n by w */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Performs numeric block updates (sup-panel) in topological order.
+ * It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
+ * Special processing on the supernodal portion of L\U[*,j]
+ *
+ * Before entering this routine, the original nonzeros in the panel
+ * were already copied into the spa[m,w].
+ *
+ * Updated/Output parameters-
+ * dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned
+ * collectively in the m-by-w vector dense[*].
+ *
+ */
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ int incx = 1, incy = 1;
+ float alpha, beta;
+#endif
+
+ register int k, ksub;
+ int fsupc, nsupc, nsupr, nrow;
+ int krep, krep_ind;
+ float ukj, ukj1, ukj2;
+ int luptr, luptr1, luptr2;
+ int segsze;
+ int block_nrow; /* no of rows in a block row */
+ register int lptr; /* Points to the row subscripts of a supernode */
+ int kfnz, irow, no_zeros;
+ register int isub, isub1, i;
+ register int jj; /* Index through each column in the panel */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ int *repfnz_col; /* repfnz[] for a column in the panel */
+ float *dense_col; /* dense[] for a column in the panel */
+ float *tempv1; /* Used in 1-D update */
+ float *TriTmp, *MatvecTmp; /* used in 2-D update */
+ float zero = 0.0;
+ register int ldaTmp;
+ register int r_ind, r_hi;
+ static int first = 1, maxsuper, rowblk, colblk;
+ flops_t *ops = stat->ops;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ if ( first ) {
+ maxsuper = sp_ienv(3);
+ rowblk = sp_ienv(4);
+ colblk = sp_ienv(5);
+ first = 0;
+ }
+ ldaTmp = maxsuper + rowblk;
+
+ /*
+ * For each nonz supernode segment of U[*,j] in topological order
+ */
+ k = nseg - 1;
+ for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
+
+ /* krep = representative of current k-th supernode
+ * fsupc = first supernodal column
+ * nsupc = no of columns in a supernode
+ * nsupr = no of rows in a supernode
+ */
+ krep = segrep[k--];
+ fsupc = xsup[supno[krep]];
+ nsupc = krep - fsupc + 1;
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+ nrow = nsupr - nsupc;
+ lptr = xlsub[fsupc];
+ krep_ind = lptr + nsupc - 1;
+
+ repfnz_col = repfnz;
+ dense_col = dense;
+
+ if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
+
+ TriTmp = tempv;
+
+ /* Sequence through each column in panel -- triangular solves */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ luptr = xlusup[fsupc];
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+ /* Case 1: Update U-segment of size 1 -- col-col update */
+ if ( segsze == 1 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+ irow = lsub[i];
+ dense_col[irow] -= ukj * lusup[luptr];
+ ++luptr;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ ukj1 = dense_col[lsub[krep_ind - 1]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) {
+ ukj -= ukj1 * lusup[luptr1];
+ dense_col[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++; luptr1++;
+ dense_col[irow] -= (ukj*lusup[luptr]
+ + ukj1*lusup[luptr1]);
+ }
+ } else {
+ ukj2 = dense_col[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense_col[lsub[krep_ind]] = ukj;
+ dense_col[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ luptr++; luptr1++; luptr2++;
+ dense_col[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+ } else { /* segsze >= 4 */
+
+ /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
+ holds the result of triangular solves. */
+ no_zeros = kfnz - fsupc;
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; ++i) {
+ irow = lsub[isub];
+ TriTmp[i] = dense_col[irow]; /* Gather */
+ ++isub;
+ }
+
+ /* start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, TriTmp, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, TriTmp, &incx );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
+#endif
+
+
+ } /* else ... */
+
+ } /* for jj ... end tri-solves */
+
+ /* Block row updates; push all the way into dense[*] block */
+ for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
+
+ r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
+ block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
+ luptr = xlusup[fsupc] + nsupc + r_ind;
+ isub1 = lptr + nsupc + r_ind;
+
+ repfnz_col = repfnz;
+ TriTmp = tempv;
+ dense_col = dense;
+
+ /* Sequence through each column in panel -- matrix-vector */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ if ( segsze <= 3 ) continue; /* skip unrolled cases */
+
+ /* Perform a block update, and scatter the result of
+ matrix-vector to dense[]. */
+ no_zeros = kfnz - fsupc;
+ luptr1 = luptr + nsupr * no_zeros;
+ MatvecTmp = &TriTmp[maxsuper];
+
+#ifdef USE_VENDOR_BLAS
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
+ &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#else
+ sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
+ &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
+#endif
+#else
+ smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
+ TriTmp, MatvecTmp);
+#endif
+
+ /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
+ * such that MatvecTmp[*] can be re-used for the
+ * the next blok row update. dense[] will be copied into
+ * global store after the whole panel has been finished.
+ */
+ isub = isub1;
+ for (i = 0; i < block_nrow; i++) {
+ irow = lsub[isub];
+ dense_col[irow] -= MatvecTmp[i];
+ MatvecTmp[i] = zero;
+ ++isub;
+ }
+
+ } /* for jj ... */
+
+ } /* for each block row ... */
+
+ /* Scatter the triangular solves into SPA dense[*] */
+ repfnz_col = repfnz;
+ TriTmp = tempv;
+ dense_col = dense;
+
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ if ( segsze <= 3 ) continue; /* skip unrolled cases */
+
+ no_zeros = kfnz - fsupc;
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense_col[irow] = TriTmp[i];
+ TriTmp[i] = zero;
+ ++isub;
+ }
+
+ } /* for jj ... */
+
+ } else { /* 1-D block modification */
+
+
+ /* Sequence through each column in the panel */
+ for (jj = jcol; jj < jcol + w; jj++,
+ repfnz_col += m, dense_col += m) {
+
+ kfnz = repfnz_col[krep];
+ if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
+
+ segsze = krep - kfnz + 1;
+ luptr = xlusup[fsupc];
+
+ ops[TRSV] += segsze * (segsze - 1);
+ ops[GEMV] += 2 * nrow * segsze;
+
+ /* Case 1: Update U-segment of size 1 -- col-col update */
+ if ( segsze == 1 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc;
+
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
+ irow = lsub[i];
+ dense_col[irow] -= ukj * lusup[luptr];
+ ++luptr;
+ }
+
+ } else if ( segsze <= 3 ) {
+ ukj = dense_col[lsub[krep_ind]];
+ luptr += nsupr*(nsupc-1) + nsupc-1;
+ ukj1 = dense_col[lsub[krep_ind - 1]];
+ luptr1 = luptr - nsupr;
+
+ if ( segsze == 2 ) {
+ ukj -= ukj1 * lusup[luptr1];
+ dense_col[lsub[krep_ind]] = ukj;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ ++luptr; ++luptr1;
+ dense_col[irow] -= (ukj*lusup[luptr]
+ + ukj1*lusup[luptr1]);
+ }
+ } else {
+ ukj2 = dense_col[lsub[krep_ind - 2]];
+ luptr2 = luptr1 - nsupr;
+ ukj1 -= ukj2 * lusup[luptr2-1];
+ ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
+ dense_col[lsub[krep_ind]] = ukj;
+ dense_col[lsub[krep_ind-1]] = ukj1;
+ for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
+ irow = lsub[i];
+ ++luptr; ++luptr1; ++luptr2;
+ dense_col[irow] -= ( ukj*lusup[luptr]
+ + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
+ }
+ }
+
+ } else { /* segsze >= 4 */
+ /*
+ * Perform a triangular solve and block update,
+ * then scatter the result of sup-col update to dense[].
+ */
+ no_zeros = kfnz - fsupc;
+
+ /* Copy U[*,j] segment from dense[*] to tempv[*]:
+ * The result of triangular solve is in tempv[*];
+ * The result of matrix vector update is in dense_col[*]
+ */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; ++i) {
+ irow = lsub[isub];
+ tempv[i] = dense_col[irow]; /* Gather */
+ ++isub;
+ }
+
+ /* start effective triangle */
+ luptr += nsupr * no_zeros + no_zeros;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#else
+ strsv_( "L", "N", "U", &segsze, &lusup[luptr],
+ &nsupr, tempv, &incx );
+#endif
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ alpha = one;
+ beta = zero;
+#ifdef _CRAY
+ SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#else
+ sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
+ &nsupr, tempv, &incx, &beta, tempv1, &incy );
+#endif
+#else
+ slsolve ( nsupr, segsze, &lusup[luptr], tempv );
+
+ luptr += segsze; /* Dense matrix-vector */
+ tempv1 = &tempv[segsze];
+ smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
+#endif
+
+ /* Scatter tempv[*] into SPA dense[*] temporarily, such
+ * that tempv[*] can be used for the triangular solve of
+ * the next column of the panel. They will be copied into
+ * ucol[*] after the whole panel has been finished.
+ */
+ isub = lptr + no_zeros;
+ for (i = 0; i < segsze; i++) {
+ irow = lsub[isub];
+ dense_col[irow] = tempv[i];
+ tempv[i] = zero;
+ isub++;
+ }
+
+ /* Scatter the update from tempv1[*] into SPA dense[*] */
+ /* Start dense rectangular L */
+ for (i = 0; i < nrow; i++) {
+ irow = lsub[isub];
+ dense_col[irow] -= tempv1[i];
+ tempv1[i] = zero;
+ ++isub;
+ }
+
+ } /* else segsze>=4 ... */
+
+ } /* for each column in the panel... */
+
+ } /* else 1-D update ... */
+
+ } /* for each updating supernode ... */
+
+}
+
+
+
diff --git a/intern/opennl/superlu/spanel_dfs.c b/intern/opennl/superlu/spanel_dfs.c
new file mode 100644
index 00000000000..7f5f3c7532a
--- /dev/null
+++ b/intern/opennl/superlu/spanel_dfs.c
@@ -0,0 +1,249 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+void
+spanel_dfs (
+ const int m, /* in - number of rows in the matrix */
+ const int w, /* in */
+ const int jcol, /* in */
+ SuperMatrix *A, /* in - original matrix */
+ int *perm_r, /* in */
+ int *nseg, /* out */
+ float *dense, /* out */
+ int *panel_lsub, /* out */
+ int *segrep, /* out */
+ int *repfnz, /* out */
+ int *xprune, /* out */
+ int *marker, /* out */
+ int *parent, /* working array */
+ int *xplore, /* working array */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/*
+ * Purpose
+ * =======
+ *
+ * Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
+ *
+ * A supernode representative is the last column of a supernode.
+ * The nonzeros in U[*,j] are segments that end at supernodal
+ * representatives.
+ *
+ * The routine returns one list of the supernodal representatives
+ * in topological order of the dfs that generates them. This list is
+ * a superset of the topological order of each individual column within
+ * the panel.
+ * The location of the first nonzero in each supernodal segment
+ * (supernodal entry location) is also returned. Each column has a
+ * separate list for this purpose.
+ *
+ * Two marker arrays are used for dfs:
+ * marker[i] == jj, if i was visited during dfs of current column jj;
+ * marker1[i] >= jcol, if i was visited by earlier columns in this panel;
+ *
+ * marker: A-row --> A-row/col (0/1)
+ * repfnz: SuperA-col --> PA-row
+ * parent: SuperA-col --> SuperA-col
+ * xplore: SuperA-col --> index to L-structure
+ *
+ */
+ NCPformat *Astore;
+ float *a;
+ int *asub;
+ int *xa_begin, *xa_end;
+ int krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
+ int k, krow, kmark, kperm;
+ int xdfs, maxdfs, kpar;
+ int jj; /* index through each column in the panel */
+ int *marker1; /* marker1[jj] >= jcol if vertex jj was visited
+ by a previous column within this panel. */
+ int *repfnz_col; /* start of each column in the panel */
+ float *dense_col; /* start of each column in the panel */
+ int nextl_col; /* next available position in panel_lsub[*,jj] */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+
+ /* Initialize pointers */
+ Astore = A->Store;
+ a = Astore->nzval;
+ asub = Astore->rowind;
+ xa_begin = Astore->colbeg;
+ xa_end = Astore->colend;
+ marker1 = marker + m;
+ repfnz_col = repfnz;
+ dense_col = dense;
+ *nseg = 0;
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+
+ /* For each column in the panel */
+ for (jj = jcol; jj < jcol + w; jj++) {
+ nextl_col = (jj - jcol) * m;
+
+#ifdef CHK_DFS
+ printf("\npanel col %d: ", jj);
+#endif
+
+ /* For each nonz in A[*,jj] do dfs */
+ for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
+ krow = asub[k];
+ dense_col[krow] = a[k];
+ kmark = marker[krow];
+ if ( kmark == jj )
+ continue; /* krow visited before, go to the next nonzero */
+
+ /* For each unmarked nbr krow of jj
+ * krow is in L: place it in structure of L[*,jj]
+ */
+ marker[krow] = jj;
+ kperm = perm_r[krow];
+
+ if ( kperm == EMPTY ) {
+ panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
+ }
+ /*
+ * krow is in U: if its supernode-rep krep
+ * has been explored, update repfnz[*]
+ */
+ else {
+
+ krep = xsup[supno[kperm]+1] - 1;
+ myfnz = repfnz_col[krep];
+
+#ifdef CHK_DFS
+ printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, kperm);
+#endif
+ if ( myfnz != EMPTY ) { /* Representative visited before */
+ if ( myfnz > kperm ) repfnz_col[krep] = kperm;
+ /* continue; */
+ }
+ else {
+ /* Otherwise, perform dfs starting at krep */
+ oldrep = EMPTY;
+ parent[krep] = oldrep;
+ repfnz_col[krep] = kperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+
+#ifdef CHK_DFS
+ printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs);
+ for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+ printf("\n");
+#endif
+ do {
+ /*
+ * For each unmarked kchild of krep
+ */
+ while ( xdfs < maxdfs ) {
+
+ kchild = lsub[xdfs];
+ xdfs++;
+ chmark = marker[kchild];
+
+ if ( chmark != jj ) { /* Not reached yet */
+ marker[kchild] = jj;
+ chperm = perm_r[kchild];
+
+ /* Case kchild is in L: place it in L[*,j] */
+ if ( chperm == EMPTY ) {
+ panel_lsub[nextl_col++] = kchild;
+ }
+ /* Case kchild is in U:
+ * chrep = its supernode-rep. If its rep has
+ * been explored, update its repfnz[*]
+ */
+ else {
+
+ chrep = xsup[supno[chperm]+1] - 1;
+ myfnz = repfnz_col[chrep];
+#ifdef CHK_DFS
+ printf("chrep %d,myfnz %d,perm_r[%d] %d\n",chrep,myfnz,kchild,chperm);
+#endif
+ if ( myfnz != EMPTY ) { /* Visited before */
+ if ( myfnz > chperm )
+ repfnz_col[chrep] = chperm;
+ }
+ else {
+ /* Cont. dfs at snode-rep of kchild */
+ xplore[krep] = xdfs;
+ oldrep = krep;
+ krep = chrep; /* Go deeper down G(L) */
+ parent[krep] = oldrep;
+ repfnz_col[krep] = chperm;
+ xdfs = xlsub[krep];
+ maxdfs = xprune[krep];
+#ifdef CHK_DFS
+ printf(" xdfs %d, maxdfs %d: ", xdfs, maxdfs);
+ for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+ printf("\n");
+#endif
+ } /* else */
+
+ } /* else */
+
+ } /* if... */
+
+ } /* while xdfs < maxdfs */
+
+ /* krow has no more unexplored nbrs:
+ * Place snode-rep krep in postorder DFS, if this
+ * segment is seen for the first time. (Note that
+ * "repfnz[krep]" may change later.)
+ * Backtrack dfs to its parent.
+ */
+ if ( marker1[krep] < jcol ) {
+ segrep[*nseg] = krep;
+ ++(*nseg);
+ marker1[krep] = jj;
+ }
+
+ kpar = parent[krep]; /* Pop stack, mimic recursion */
+ if ( kpar == EMPTY ) break; /* dfs done */
+ krep = kpar;
+ xdfs = xplore[krep];
+ maxdfs = xprune[krep];
+
+#ifdef CHK_DFS
+ printf(" pop stack: krep %d,xdfs %d,maxdfs %d: ", krep,xdfs,maxdfs);
+ for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
+ printf("\n");
+#endif
+ } while ( kpar != EMPTY ); /* do-while - until empty stack */
+
+ } /* else */
+
+ } /* else */
+
+ } /* for each nonz in A[*,jj] */
+
+ repfnz_col += m; /* Move to next column */
+ dense_col += m;
+
+ } /* for jj ... */
+
+}
diff --git a/intern/opennl/superlu/spivotL.c b/intern/opennl/superlu/spivotL.c
new file mode 100644
index 00000000000..6243065bb5b
--- /dev/null
+++ b/intern/opennl/superlu/spivotL.c
@@ -0,0 +1,173 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include "ssp_defs.h"
+
+#undef DEBUG
+
+int
+spivotL(
+ const int jcol, /* in */
+ const float u, /* in - diagonal pivoting threshold */
+ int *usepr, /* re-use the pivot sequence given by perm_r/iperm_r */
+ int *perm_r, /* may be modified */
+ int *iperm_r, /* in - inverse of perm_r */
+ int *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
+ int *pivrow, /* out */
+ GlobalLU_t *Glu, /* modified - global LU data structures */
+ SuperLUStat_t *stat /* output */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * Performs the numerical pivoting on the current column of L,
+ * and the CDIV operation.
+ *
+ * Pivot policy:
+ * (1) Compute thresh = u * max_(i>=j) abs(A_ij);
+ * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
+ * pivot row = k;
+ * ELSE IF abs(A_jj) >= thresh THEN
+ * pivot row = j;
+ * ELSE
+ * pivot row = m;
+ *
+ * Note: If you absolutely want to use a given pivot order, then set u=0.0.
+ *
+ * Return value: 0 success;
+ * i > 0 U(i,i) is exactly zero.
+ *
+ */
+ int fsupc; /* first column in the supernode */
+ int nsupc; /* no of columns in the supernode */
+ int nsupr; /* no of rows in the supernode */
+ int lptr; /* points to the starting subscript of the supernode */
+ int pivptr, old_pivptr, diag, diagind;
+ float pivmax, rtemp, thresh;
+ float temp;
+ float *lu_sup_ptr;
+ float *lu_col_ptr;
+ int *lsub_ptr;
+ int isub, icol, k, itemp;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ flops_t *ops = stat->ops;
+
+ /* Initialize pointers */
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ fsupc = (Glu->xsup)[(Glu->supno)[jcol]];
+ nsupc = jcol - fsupc; /* excluding jcol; nsupc >= 0 */
+ lptr = xlsub[fsupc];
+ nsupr = xlsub[fsupc+1] - lptr;
+ lu_sup_ptr = &lusup[xlusup[fsupc]]; /* start of the current supernode */
+ lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
+ lsub_ptr = &lsub[lptr]; /* start of row indices of the supernode */
+
+#ifdef DEBUG
+if ( jcol == MIN_COL ) {
+ printf("Before cdiv: col %d\n", jcol);
+ for (k = nsupc; k < nsupr; k++)
+ printf(" lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
+}
+#endif
+
+ /* Determine the largest abs numerical value for partial pivoting;
+ Also search for user-specified pivot, and diagonal element. */
+ if ( *usepr ) *pivrow = iperm_r[jcol];
+ diagind = iperm_c[jcol];
+ pivmax = 0.0;
+ pivptr = nsupc;
+ diag = EMPTY;
+ old_pivptr = nsupc;
+ for (isub = nsupc; isub < nsupr; ++isub) {
+ rtemp = fabs (lu_col_ptr[isub]);
+ if ( rtemp > pivmax ) {
+ pivmax = rtemp;
+ pivptr = isub;
+ }
+ if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
+ if ( lsub_ptr[isub] == diagind ) diag = isub;
+ }
+
+ /* Test for singularity */
+ if ( pivmax == 0.0 ) {
+ *pivrow = lsub_ptr[pivptr];
+ perm_r[*pivrow] = jcol;
+ *usepr = 0;
+ return (jcol+1);
+ }
+
+ thresh = u * pivmax;
+
+ /* Choose appropriate pivotal element by our policy. */
+ if ( *usepr ) {
+ rtemp = fabs (lu_col_ptr[old_pivptr]);
+ if ( rtemp != 0.0 && rtemp >= thresh )
+ pivptr = old_pivptr;
+ else
+ *usepr = 0;
+ }
+ if ( *usepr == 0 ) {
+ /* Use diagonal pivot? */
+ if ( diag >= 0 ) { /* diagonal exists */
+ rtemp = fabs (lu_col_ptr[diag]);
+ if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
+ }
+ *pivrow = lsub_ptr[pivptr];
+ }
+
+ /* Record pivot row */
+ perm_r[*pivrow] = jcol;
+
+ /* Interchange row subscripts */
+ if ( pivptr != nsupc ) {
+ itemp = lsub_ptr[pivptr];
+ lsub_ptr[pivptr] = lsub_ptr[nsupc];
+ lsub_ptr[nsupc] = itemp;
+
+ /* Interchange numerical values as well, for the whole snode, such
+ * that L is indexed the same way as A.
+ */
+ for (icol = 0; icol <= nsupc; icol++) {
+ itemp = pivptr + icol * nsupr;
+ temp = lu_sup_ptr[itemp];
+ lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
+ lu_sup_ptr[nsupc + icol*nsupr] = temp;
+ }
+ } /* if */
+
+ /* cdiv operation */
+ ops[FACT] += nsupr - nsupc;
+
+ temp = 1.0 / lu_col_ptr[nsupc];
+ for (k = nsupc+1; k < nsupr; k++)
+ lu_col_ptr[k] *= temp;
+
+ return 0;
+}
+
diff --git a/intern/opennl/superlu/spruneL.c b/intern/opennl/superlu/spruneL.c
new file mode 100644
index 00000000000..59702706375
--- /dev/null
+++ b/intern/opennl/superlu/spruneL.c
@@ -0,0 +1,149 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+void
+spruneL(
+ const int jcol, /* in */
+ const int *perm_r, /* in */
+ const int pivrow, /* in */
+ const int nseg, /* in */
+ const int *segrep, /* in */
+ const int *repfnz, /* in */
+ int *xprune, /* out */
+ GlobalLU_t *Glu /* modified - global LU data structures */
+ )
+{
+/*
+ * Purpose
+ * =======
+ * Prunes the L-structure of supernodes whose L-structure
+ * contains the current pivot row "pivrow"
+ *
+ */
+ float utemp;
+ int jsupno, irep, irep1, kmin, kmax, krow, movnum;
+ int i, ktemp, minloc, maxloc;
+ int do_prune; /* logical variable */
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ /*
+ * For each supernode-rep irep in U[*,j]
+ */
+ jsupno = supno[jcol];
+ for (i = 0; i < nseg; i++) {
+
+ irep = segrep[i];
+ irep1 = irep + 1;
+ do_prune = FALSE;
+
+ /* Don't prune with a zero U-segment */
+ if ( repfnz[irep] == EMPTY )
+ continue;
+
+ /* If a snode overlaps with the next panel, then the U-segment
+ * is fragmented into two parts -- irep and irep1. We should let
+ * pruning occur at the rep-column in irep1's snode.
+ */
+ if ( supno[irep] == supno[irep1] ) /* Don't prune */
+ continue;
+
+ /*
+ * If it has not been pruned & it has a nonz in row L[pivrow,i]
+ */
+ if ( supno[irep] != jsupno ) {
+ if ( xprune[irep] >= xlsub[irep1] ) {
+ kmin = xlsub[irep];
+ kmax = xlsub[irep1] - 1;
+ for (krow = kmin; krow <= kmax; krow++)
+ if ( lsub[krow] == pivrow ) {
+ do_prune = TRUE;
+ break;
+ }
+ }
+
+ if ( do_prune ) {
+
+ /* Do a quicksort-type partition
+ * movnum=TRUE means that the num values have to be exchanged.
+ */
+ movnum = FALSE;
+ if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
+ movnum = TRUE;
+
+ while ( kmin <= kmax ) {
+
+ if ( perm_r[lsub[kmax]] == EMPTY )
+ kmax--;
+ else if ( perm_r[lsub[kmin]] != EMPTY )
+ kmin++;
+ else { /* kmin below pivrow, and kmax above pivrow:
+ * interchange the two subscripts
+ */
+ ktemp = lsub[kmin];
+ lsub[kmin] = lsub[kmax];
+ lsub[kmax] = ktemp;
+
+ /* If the supernode has only one column, then we
+ * only keep one set of subscripts. For any subscript
+ * interchange performed, similar interchange must be
+ * done on the numerical values.
+ */
+ if ( movnum ) {
+ minloc = xlusup[irep] + (kmin - xlsub[irep]);
+ maxloc = xlusup[irep] + (kmax - xlsub[irep]);
+ utemp = lusup[minloc];
+ lusup[minloc] = lusup[maxloc];
+ lusup[maxloc] = utemp;
+ }
+
+ kmin++;
+ kmax--;
+
+ }
+
+ } /* while */
+
+ xprune[irep] = kmin; /* Pruning */
+
+#ifdef CHK_PRUNE
+ printf(" After spruneL(),using col %d: xprune[%d] = %d\n",
+ jcol, irep, kmin);
+#endif
+ } /* if do_prune */
+
+ } /* if */
+
+ } /* for each U-segment... */
+}
diff --git a/intern/opennl/superlu/ssnode_bmod.c b/intern/opennl/superlu/ssnode_bmod.c
new file mode 100644
index 00000000000..fe97abd9ff6
--- /dev/null
+++ b/intern/opennl/superlu/ssnode_bmod.c
@@ -0,0 +1,117 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+
+/*
+ * Performs numeric block updates within the relaxed snode.
+ */
+int
+ssnode_bmod (
+ const int jcol, /* in */
+ const int fsupc, /* in */
+ float *dense, /* in */
+ float *tempv, /* working array */
+ GlobalLU_t *Glu, /* modified */
+ SuperLUStat_t *stat /* output */
+ )
+{
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ int incx = 1, incy = 1;
+ float alpha = -1.0, beta = 1.0;
+#endif
+
+ int luptr, nsupc, nsupr, nrow;
+ int isub, irow, i, iptr;
+ register int ufirst, nextlu;
+ int *lsub, *xlsub;
+ float *lusup;
+ int *xlusup;
+ flops_t *ops = stat->ops;
+
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+
+ nextlu = xlusup[jcol];
+
+ /*
+ * Process the supernodal portion of L\U[*,j]
+ */
+ for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
+ irow = lsub[isub];
+ lusup[nextlu] = dense[irow];
+ dense[irow] = 0;
+ ++nextlu;
+ }
+
+ xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
+
+ if ( fsupc < jcol ) {
+
+ luptr = xlusup[fsupc];
+ nsupr = xlsub[fsupc+1] - xlsub[fsupc];
+ nsupc = jcol - fsupc; /* Excluding jcol */
+ ufirst = xlusup[jcol]; /* Points to the beginning of column
+ jcol in supernode L\U(jsupno). */
+ nrow = nsupr - nsupc;
+
+ ops[TRSV] += nsupc * (nsupc - 1);
+ ops[GEMV] += 2 * nrow * nsupc;
+
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr,
+ &lusup[ufirst], &incx );
+ SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#else
+ strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr,
+ &lusup[ufirst], &incx );
+ sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
+ &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
+#endif
+#else
+ slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
+ smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
+ &lusup[ufirst], &tempv[0] );
+
+ /* Scatter tempv[*] into lusup[*] */
+ iptr = ufirst + nsupc;
+ for (i = 0; i < nrow; i++) {
+ lusup[iptr++] -= tempv[i];
+ tempv[i] = 0.0;
+ }
+#endif
+
+ }
+
+ return 0;
+}
diff --git a/intern/opennl/superlu/ssnode_dfs.c b/intern/opennl/superlu/ssnode_dfs.c
new file mode 100644
index 00000000000..c8974237a9a
--- /dev/null
+++ b/intern/opennl/superlu/ssnode_dfs.c
@@ -0,0 +1,106 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+ssnode_dfs (
+ const int jcol, /* in - start of the supernode */
+ const int kcol, /* in - end of the supernode */
+ const int *asub, /* in */
+ const int *xa_begin, /* in */
+ const int *xa_end, /* in */
+ int *xprune, /* out */
+ int *marker, /* modified */
+ GlobalLU_t *Glu /* modified */
+ )
+{
+/* Purpose
+ * =======
+ * ssnode_dfs() - Determine the union of the row structures of those
+ * columns within the relaxed snode.
+ * Note: The relaxed snodes are leaves of the supernodal etree, therefore,
+ * the portion outside the rectangular supernode must be zero.
+ *
+ * Return value
+ * ============
+ * 0 success;
+ * >0 number of bytes allocated when run out of memory.
+ *
+ */
+ register int i, k, ifrom, ito, nextl, new_next;
+ int nsuper, krow, kmark, mem_error;
+ int *xsup, *supno;
+ int *lsub, *xlsub;
+ int nzlmax;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ nzlmax = Glu->nzlmax;
+
+ nsuper = ++supno[jcol]; /* Next available supernode number */
+ nextl = xlsub[jcol];
+
+ for (i = jcol; i <= kcol; i++) {
+ /* For each nonzero in A[*,i] */
+ for (k = xa_begin[i]; k < xa_end[i]; k++) {
+ krow = asub[k];
+ kmark = marker[krow];
+ if ( kmark != kcol ) { /* First time visit krow */
+ marker[krow] = kcol;
+ lsub[nextl++] = krow;
+ if ( nextl >= nzlmax ) {
+ if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ }
+ }
+ supno[i] = nsuper;
+ }
+
+ /* Supernode > 1, then make a copy of the subscripts for pruning */
+ if ( jcol < kcol ) {
+ new_next = nextl + (nextl - xlsub[jcol]);
+ while ( new_next > nzlmax ) {
+ if ( (mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) )
+ return (mem_error);
+ lsub = Glu->lsub;
+ }
+ ito = nextl;
+ for (ifrom = xlsub[jcol]; ifrom < nextl; )
+ lsub[ito++] = lsub[ifrom++];
+ for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
+ nextl = ito;
+ }
+
+ xsup[nsuper+1] = kcol + 1;
+ supno[kcol+1] = nsuper;
+ xprune[kcol] = nextl;
+ xlsub[kcol+1] = nextl;
+
+ return 0;
+}
+
diff --git a/intern/opennl/superlu/ssp_blas2.c b/intern/opennl/superlu/ssp_blas2.c
new file mode 100644
index 00000000000..347f9ab5fd4
--- /dev/null
+++ b/intern/opennl/superlu/ssp_blas2.c
@@ -0,0 +1,469 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ * File name: ssp_blas2.c
+ * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
+ */
+
+#include "ssp_defs.h"
+
+/*
+ * Function prototypes
+ */
+void susolve(int, int, float*, float*);
+void slsolve(int, int, float*, float*);
+void smatvec(int, int, int, float*, float*, float*);
+int strsv_(char*, char*, char*, int*, float*, int*, float*, int*);
+
+int
+sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
+ SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
+{
+/*
+ * Purpose
+ * =======
+ *
+ * sp_strsv() solves one of the systems of equations
+ * A*x = b, or A'*x = b,
+ * where b and x are n element vectors and A is a sparse unit , or
+ * non-unit, upper or lower triangular matrix.
+ * No test for singularity or near-singularity is included in this
+ * routine. Such tests must be performed before calling this routine.
+ *
+ * Parameters
+ * ==========
+ *
+ * uplo - (input) char*
+ * On entry, uplo specifies whether the matrix is an upper or
+ * lower triangular matrix as follows:
+ * uplo = 'U' or 'u' A is an upper triangular matrix.
+ * uplo = 'L' or 'l' A is a lower triangular matrix.
+ *
+ * trans - (input) char*
+ * On entry, trans specifies the equations to be solved as
+ * follows:
+ * trans = 'N' or 'n' A*x = b.
+ * trans = 'T' or 't' A'*x = b.
+ * trans = 'C' or 'c' A'*x = b.
+ *
+ * diag - (input) char*
+ * On entry, diag specifies whether or not A is unit
+ * triangular as follows:
+ * diag = 'U' or 'u' A is assumed to be unit triangular.
+ * diag = 'N' or 'n' A is not assumed to be unit
+ * triangular.
+ *
+ * L - (input) SuperMatrix*
+ * The factor L from the factorization Pr*A*Pc=L*U. Use
+ * compressed row subscripts storage for supernodes,
+ * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
+ *
+ * U - (input) SuperMatrix*
+ * The factor U from the factorization Pr*A*Pc=L*U.
+ * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
+ *
+ * x - (input/output) float*
+ * Before entry, the incremented array X must contain the n
+ * element right-hand side vector b. On exit, X is overwritten
+ * with the solution vector x.
+ *
+ * info - (output) int*
+ * If *info = -i, the i-th argument had an illegal value.
+ *
+ */
+#ifdef _CRAY
+ _fcd ftcs1 = _cptofcd("L", strlen("L")),
+ ftcs2 = _cptofcd("N", strlen("N")),
+ ftcs3 = _cptofcd("U", strlen("U"));
+#endif
+ SCformat *Lstore;
+ NCformat *Ustore;
+ float *Lval, *Uval;
+ int incx = 1;
+ int nrow;
+ int fsupc, nsupr, nsupc, luptr, istart, irow;
+ int i, k, iptr, jcol;
+ float *work;
+ flops_t solve_ops;
+
+ /* Test the input parameters */
+ *info = 0;
+ if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
+ else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
+ !lsame_(trans, "C")) *info = -2;
+ else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
+ else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
+ else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
+ if ( *info ) {
+ i = -(*info);
+ xerbla_("sp_strsv", &i);
+ return 0;
+ }
+
+ Lstore = L->Store;
+ Lval = Lstore->nzval;
+ Ustore = U->Store;
+ Uval = Ustore->nzval;
+ solve_ops = 0;
+
+ if ( !(work = floatCalloc(L->nrow)) )
+ ABORT("Malloc fails for work in sp_strsv().");
+
+ if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
+
+ if ( lsame_(uplo, "L") ) {
+ /* Form x := inv(L)*x */
+ if ( L->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+ nrow = nsupr - nsupc;
+
+ solve_ops += nsupc * (nsupc - 1);
+ solve_ops += 2 * nrow * nsupc;
+
+ if ( nsupc == 1 ) {
+ for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
+ irow = L_SUB(iptr);
+ ++luptr;
+ x[irow] -= x[fsupc] * Lval[luptr];
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+
+ SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
+ &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#else
+ strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+
+ sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
+ &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
+#endif
+#else
+ slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
+
+ smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
+ &x[fsupc], &work[0] );
+#endif
+
+ iptr = istart + nsupc;
+ for (i = 0; i < nrow; ++i, ++iptr) {
+ irow = L_SUB(iptr);
+ x[irow] -= work[i]; /* Scatter */
+ work[i] = 0.0;
+
+ }
+ }
+ } /* for k ... */
+
+ } else {
+ /* Form x := inv(U)*x */
+
+ if ( U->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = Lstore->nsuper; k >= 0; k--) {
+ fsupc = L_FST_SUPC(k);
+ nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += nsupc * (nsupc + 1);
+
+ if ( nsupc == 1 ) {
+ x[fsupc] /= Lval[luptr];
+ for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
+ irow = U_SUB(i);
+ x[irow] -= x[fsupc] * Uval[i];
+ }
+ } else {
+#ifdef USE_VENDOR_BLAS
+#ifdef _CRAY
+ STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+#else
+ susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
+#endif
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
+ i++) {
+ irow = U_SUB(i);
+ x[irow] -= x[jcol] * Uval[i];
+ }
+ }
+ }
+ } /* for k ... */
+
+ }
+ } else { /* Form x := inv(A')*x */
+
+ if ( lsame_(uplo, "L") ) {
+ /* Form x := inv(L')*x */
+ if ( L->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = Lstore->nsuper; k >= 0; --k) {
+ fsupc = L_FST_SUPC(k);
+ istart = L_SUB_START(fsupc);
+ nsupr = L_SUB_START(fsupc+1) - istart;
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ solve_ops += 2 * (nsupr - nsupc) * nsupc;
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ iptr = istart + nsupc;
+ for (i = L_NZ_START(jcol) + nsupc;
+ i < L_NZ_START(jcol+1); i++) {
+ irow = L_SUB(iptr);
+ x[jcol] -= x[irow] * Lval[i];
+ iptr++;
+ }
+ }
+
+ if ( nsupc > 1 ) {
+ solve_ops += nsupc * (nsupc - 1);
+#ifdef _CRAY
+ ftcs1 = _cptofcd("L", strlen("L"));
+ ftcs2 = _cptofcd("T", strlen("T"));
+ ftcs3 = _cptofcd("U", strlen("U"));
+ STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+ }
+ }
+ } else {
+ /* Form x := inv(U')*x */
+ if ( U->nrow == 0 ) return 0; /* Quick return */
+
+ for (k = 0; k <= Lstore->nsuper; k++) {
+ fsupc = L_FST_SUPC(k);
+ nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
+ nsupc = L_FST_SUPC(k+1) - fsupc;
+ luptr = L_NZ_START(fsupc);
+
+ for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
+ solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
+ for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
+ irow = U_SUB(i);
+ x[jcol] -= x[irow] * Uval[i];
+ }
+ }
+
+ solve_ops += nsupc * (nsupc + 1);
+
+ if ( nsupc == 1 ) {
+ x[fsupc] /= Lval[luptr];
+ } else {
+#ifdef _CRAY
+ ftcs1 = _cptofcd("U", strlen("U"));
+ ftcs2 = _cptofcd("T", strlen("T"));
+ ftcs3 = _cptofcd("N", strlen("N"));
+ STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#else
+ strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
+ &x[fsupc], &incx);
+#endif
+ }
+ } /* for k ... */
+ }
+ }
+
+ stat->ops[SOLVE] += solve_ops;
+ SUPERLU_FREE(work);
+ return 0;
+}
+
+
+
+
+int
+sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
+ int incx, float beta, float *y, int incy)
+{
+/* Purpose
+ =======
+
+ sp_sgemv() performs one of the matrix-vector operations
+ y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
+ where alpha and beta are scalars, x and y are vectors and A is a
+ sparse A->nrow by A->ncol matrix.
+
+ Parameters
+ ==========
+
+ TRANS - (input) char*
+ On entry, TRANS specifies the operation to be performed as
+ follows:
+ TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
+ TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
+ TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
+
+ ALPHA - (input) float
+ On entry, ALPHA specifies the scalar alpha.
+
+ A - (input) SuperMatrix*
+ Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ Currently, the type of A can be:
+ Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
+ In the future, more general A can be handled.
+
+ X - (input) float*, array of DIMENSION at least
+ ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+ and at least
+ ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+ Before entry, the incremented array X must contain the
+ vector x.
+
+ INCX - (input) int
+ On entry, INCX specifies the increment for the elements of
+ X. INCX must not be zero.
+
+ BETA - (input) float
+ On entry, BETA specifies the scalar beta. When BETA is
+ supplied as zero then Y need not be set on input.
+
+ Y - (output) float*, array of DIMENSION at least
+ ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+ and at least
+ ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+ Before entry with BETA non-zero, the incremented array Y
+ must contain the vector y. On exit, Y is overwritten by the
+ updated vector y.
+
+ INCY - (input) int
+ On entry, INCY specifies the increment for the elements of
+ Y. INCY must not be zero.
+
+ ==== Sparse Level 2 Blas routine.
+*/
+
+ /* Local variables */
+ NCformat *Astore;
+ float *Aval;
+ int info;
+ float temp;
+ int lenx, leny, i, j, irow;
+ int iy, jx, jy, kx, ky;
+ int notran;
+
+ notran = lsame_(trans, "N");
+ Astore = A->Store;
+ Aval = Astore->nzval;
+
+ /* Test the input parameters */
+ info = 0;
+ if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
+ else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
+ else if (incx == 0) info = 5;
+ else if (incy == 0) info = 8;
+ if (info != 0) {
+ xerbla_("sp_sgemv ", &info);
+ return 0;
+ }
+
+ /* Quick return if possible. */
+ if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
+ return 0;
+
+ /* Set LENX and LENY, the lengths of the vectors x and y, and set
+ up the start points in X and Y. */
+ if (lsame_(trans, "N")) {
+ lenx = A->ncol;
+ leny = A->nrow;
+ } else {
+ lenx = A->nrow;
+ leny = A->ncol;
+ }
+ if (incx > 0) kx = 0;
+ else kx = - (lenx - 1) * incx;
+ if (incy > 0) ky = 0;
+ else ky = - (leny - 1) * incy;
+
+ /* Start the operations. In this version the elements of A are
+ accessed sequentially with one pass through A. */
+ /* First form y := beta*y. */
+ if (beta != 1.) {
+ if (incy == 1) {
+ if (beta == 0.)
+ for (i = 0; i < leny; ++i) y[i] = 0.;
+ else
+ for (i = 0; i < leny; ++i) y[i] = beta * y[i];
+ } else {
+ iy = ky;
+ if (beta == 0.)
+ for (i = 0; i < leny; ++i) {
+ y[iy] = 0.;
+ iy += incy;
+ }
+ else
+ for (i = 0; i < leny; ++i) {
+ y[iy] = beta * y[iy];
+ iy += incy;
+ }
+ }
+ }
+
+ if (alpha == 0.) return 0;
+
+ if ( notran ) {
+ /* Form y := alpha*A*x + y. */
+ jx = kx;
+ if (incy == 1) {
+ for (j = 0; j < A->ncol; ++j) {
+ if (x[jx] != 0.) {
+ temp = alpha * x[jx];
+ for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+ irow = Astore->rowind[i];
+ y[irow] += temp * Aval[i];
+ }
+ }
+ jx += incx;
+ }
+ } else {
+ ABORT("Not implemented.");
+ }
+ } else {
+ /* Form y := alpha*A'*x + y. */
+ jy = ky;
+ if (incx == 1) {
+ for (j = 0; j < A->ncol; ++j) {
+ temp = 0.;
+ for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
+ irow = Astore->rowind[i];
+ temp += Aval[i] * x[irow];
+ }
+ y[jy] += alpha * temp;
+ jy += incy;
+ }
+ } else {
+ ABORT("Not implemented.");
+ }
+ }
+ return 0;
+} /* sp_sgemv */
+
+
+
diff --git a/intern/opennl/superlu/ssp_blas3.c b/intern/opennl/superlu/ssp_blas3.c
new file mode 100644
index 00000000000..19086077c4c
--- /dev/null
+++ b/intern/opennl/superlu/ssp_blas3.c
@@ -0,0 +1,121 @@
+
+
+/*
+ * -- SuperLU routine (version 2.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * November 15, 1997
+ *
+ */
+/*
+ * File name: sp_blas3.c
+ * Purpose: Sparse BLAS3, using some dense BLAS3 operations.
+ */
+
+#include "ssp_defs.h"
+#include "util.h"
+
+int
+sp_sgemm(char *transa, int n,
+ float alpha, SuperMatrix *A, float *b, int ldb,
+ float beta, float *c, int ldc)
+{
+/* Purpose
+ =======
+
+ sp_s performs one of the matrix-matrix operations
+
+ C := alpha*op( A )*op( B ) + beta*C,
+
+ where op( X ) is one of
+
+ op( X ) = X or op( X ) = X' or op( X ) = conjg( X' ),
+
+ alpha and beta are scalars, and A, B and C are matrices, with op( A )
+ an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+
+
+ Parameters
+ ==========
+
+ TRANSA - (input) char*
+ On entry, TRANSA specifies the form of op( A ) to be used in
+ the matrix multiplication as follows:
+ TRANSA = 'N' or 'n', op( A ) = A.
+ TRANSA = 'T' or 't', op( A ) = A'.
+ TRANSA = 'C' or 'c', op( A ) = conjg( A' ).
+ Unchanged on exit.
+
+ TRANSB - (input) char*
+ On entry, TRANSB specifies the form of op( B ) to be used in
+ the matrix multiplication as follows:
+ TRANSB = 'N' or 'n', op( B ) = B.
+ TRANSB = 'T' or 't', op( B ) = B'.
+ TRANSB = 'C' or 'c', op( B ) = conjg( B' ).
+ Unchanged on exit.
+
+ M - (input) int
+ On entry, M specifies the number of rows of the matrix
+ op( A ) and of the matrix C. M must be at least zero.
+ Unchanged on exit.
+
+ N - (input) int
+ On entry, N specifies the number of columns of the matrix
+ op( B ) and the number of columns of the matrix C. N must be
+ at least zero.
+ Unchanged on exit.
+
+ K - (input) int
+ On entry, K specifies the number of columns of the matrix
+ op( A ) and the number of rows of the matrix op( B ). K must
+ be at least zero.
+ Unchanged on exit.
+
+ ALPHA - (input) float
+ On entry, ALPHA specifies the scalar alpha.
+
+ A - (input) SuperMatrix*
+ Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
+ Currently, the type of A can be:
+ Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
+ In the future, more general A can be handled.
+
+ B - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is
+ n when TRANSB = 'N' or 'n', and is k otherwise.
+ Before entry with TRANSB = 'N' or 'n', the leading k by n
+ part of the array B must contain the matrix B, otherwise
+ the leading n by k part of the array B must contain the
+ matrix B.
+ Unchanged on exit.
+
+ LDB - (input) int
+ On entry, LDB specifies the first dimension of B as declared
+ in the calling (sub) program. LDB must be at least max( 1, n ).
+ Unchanged on exit.
+
+ BETA - (input) float
+ On entry, BETA specifies the scalar beta. When BETA is
+ supplied as zero then C need not be set on input.
+
+ C - FLOAT PRECISION array of DIMENSION ( LDC, n ).
+ Before entry, the leading m by n part of the array C must
+ contain the matrix C, except when beta is zero, in which
+ case C need not be set on entry.
+ On exit, the array C is overwritten by the m by n matrix
+ ( alpha*op( A )*B + beta*C ).
+
+ LDC - (input) int
+ On entry, LDC specifies the first dimension of C as declared
+ in the calling (sub)program. LDC must be at least max(1,m).
+ Unchanged on exit.
+
+ ==== Sparse Level 3 Blas routine.
+*/
+ int incx = 1, incy = 1;
+ int j;
+
+ for (j = 0; j < n; ++j) {
+ sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
+ }
+ return 0;
+}
diff --git a/intern/opennl/superlu/ssp_defs.h b/intern/opennl/superlu/ssp_defs.h
new file mode 100644
index 00000000000..5b4e86b175b
--- /dev/null
+++ b/intern/opennl/superlu/ssp_defs.h
@@ -0,0 +1,234 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */
+#define __SUPERLU_sSP_DEFS
+
+/*
+ * File name: ssp_defs.h
+ * Purpose: Sparse matrix types and function prototypes
+ * History:
+ */
+
+#ifdef _CRAY
+#include <fortran.h>
+#include <string.h>
+#endif
+
+/* Define my integer type int_t */
+typedef int int_t; /* default */
+
+#include "Cnames.h"
+#include "supermatrix.h"
+#include "util.h"
+
+
+/*
+ * Global data structures used in LU factorization -
+ *
+ * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
+ * (xsup,supno): supno[i] is the supernode no to which i belongs;
+ * xsup(s) points to the beginning of the s-th supernode.
+ * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
+ * xsup 0 1 2 4 7 12
+ * Note: dfs will be performed on supernode rep. relative to the new
+ * row pivoting ordering
+ *
+ * (xlsub,lsub): lsub[*] contains the compressed subscript of
+ * rectangular supernodes; xlsub[j] points to the starting
+ * location of the j-th column in lsub[*]. Note that xlsub
+ * is indexed by column.
+ * Storage: original row subscripts
+ *
+ * During the course of sparse LU factorization, we also use
+ * (xlsub,lsub) for the purpose of symmetric pruning. For each
+ * supernode {s,s+1,...,t=s+r} with first column s and last
+ * column t, the subscript set
+ * lsub[j], j=xlsub[s], .., xlsub[s+1]-1
+ * is the structure of column s (i.e. structure of this supernode).
+ * It is used for the storage of numerical values.
+ * Furthermore,
+ * lsub[j], j=xlsub[t], .., xlsub[t+1]-1
+ * is the structure of the last column t of this supernode.
+ * It is for the purpose of symmetric pruning. Therefore, the
+ * structural subscripts can be rearranged without making physical
+ * interchanges among the numerical values.
+ *
+ * However, if the supernode has only one column, then we
+ * only keep one set of subscripts. For any subscript interchange
+ * performed, similar interchange must be done on the numerical
+ * values.
+ *
+ * The last column structures (for pruning) will be removed
+ * after the numercial LU factorization phase.
+ *
+ * (xlusup,lusup): lusup[*] contains the numerical values of the
+ * rectangular supernodes; xlusup[j] points to the starting
+ * location of the j-th column in storage vector lusup[*]
+ * Note: xlusup is indexed by column.
+ * Each rectangular supernode is stored by column-major
+ * scheme, consistent with Fortran 2-dim array storage.
+ *
+ * (xusub,ucol,usub): ucol[*] stores the numerical values of
+ * U-columns outside the rectangular supernodes. The row
+ * subscript of nonzero ucol[k] is stored in usub[k].
+ * xusub[i] points to the starting location of column i in ucol.
+ * Storage: new row subscripts; that is subscripts of PA.
+ */
+typedef struct {
+ int *xsup; /* supernode and column mapping */
+ int *supno;
+ int *lsub; /* compressed L subscripts */
+ int *xlsub;
+ float *lusup; /* L supernodes */
+ int *xlusup;
+ float *ucol; /* U columns */
+ int *usub;
+ int *xusub;
+ int nzlmax; /* current max size of lsub */
+ int nzumax; /* " " " ucol */
+ int nzlumax; /* " " " lusup */
+ int n; /* number of columns in the matrix */
+ LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
+} GlobalLU_t;
+
+typedef struct {
+ float for_lu;
+ float total_needed;
+ int expansions;
+} mem_usage_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Driver routines */
+extern void
+sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
+ SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
+extern void
+sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
+ char *, float *, float *, SuperMatrix *, SuperMatrix *,
+ void *, int, SuperMatrix *, SuperMatrix *,
+ float *, float *, float *, float *,
+ mem_usage_t *, SuperLUStat_t *, int *);
+
+/* Supernodal LU factor related */
+extern void
+sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
+ int *, int *, Stype_t, Dtype_t, Mtype_t);
+extern void
+sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
+ int *, int *, Stype_t, Dtype_t, Mtype_t);
+extern void
+sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
+extern void
+sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
+ Stype_t, Dtype_t, Mtype_t);
+extern void
+sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *,
+ int *, int *, int *, int *, int *,
+ Stype_t, Dtype_t, Mtype_t);
+extern void
+sCopy_Dense_Matrix(int, int, float *, int, float *, int);
+
+extern void countnz (const int, int *, int *, int *, GlobalLU_t *);
+extern void fixupL (const int, const int *, GlobalLU_t *);
+
+extern void sallocateA (int, int, float **, int **, int **);
+extern void sgstrf (superlu_options_t*, SuperMatrix*,
+ int, int, int*, void *, int, int *, int *,
+ SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
+extern int ssnode_dfs (const int, const int, const int *, const int *,
+ const int *, int *, int *, GlobalLU_t *);
+extern int ssnode_bmod (const int, const int, float *,
+ float *, GlobalLU_t *, SuperLUStat_t*);
+extern void spanel_dfs (const int, const int, const int, SuperMatrix *,
+ int *, int *, float *, int *, int *, int *,
+ int *, int *, int *, int *, GlobalLU_t *);
+extern void spanel_bmod (const int, const int, const int, const int,
+ float *, float *, int *, int *,
+ GlobalLU_t *, SuperLUStat_t*);
+extern int scolumn_dfs (const int, const int, int *, int *, int *, int *,
+ int *, int *, int *, int *, int *, GlobalLU_t *);
+extern int scolumn_bmod (const int, const int, float *,
+ float *, int *, int *, int,
+ GlobalLU_t *, SuperLUStat_t*);
+extern int scopy_to_ucol (int, int, int *, int *, int *,
+ float *, GlobalLU_t *);
+extern int spivotL (const int, const float, int *, int *,
+ int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
+extern void spruneL (const int, const int *, const int, const int,
+ const int *, const int *, int *, GlobalLU_t *);
+extern void sreadmt (int *, int *, int *, float **, int **, int **);
+extern void sGenXtrue (int, int, float *, int);
+extern void sFillRHS (trans_t, int, float *, int, SuperMatrix *,
+ SuperMatrix *);
+extern void sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
+ SuperMatrix *, SuperLUStat_t*, int *);
+
+
+/* Driver related */
+
+extern void sgsequ (SuperMatrix *, float *, float *, float *,
+ float *, float *, int *);
+extern void slaqgs (SuperMatrix *, float *, float *, float,
+ float, float, char *);
+extern void sgscon (char *, SuperMatrix *, SuperMatrix *,
+ float, float *, SuperLUStat_t*, int *);
+extern float sPivotGrowth(int, SuperMatrix *, int *,
+ SuperMatrix *, SuperMatrix *);
+extern void sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
+ SuperMatrix *, int *, int *, char *, float *,
+ float *, SuperMatrix *, SuperMatrix *,
+ float *, float *, SuperLUStat_t*, int *);
+
+extern int sp_strsv (char *, char *, char *, SuperMatrix *,
+ SuperMatrix *, float *, SuperLUStat_t*, int *);
+extern int sp_sgemv (char *, float, SuperMatrix *, float *,
+ int, float, float *, int);
+
+extern int sp_sgemm (char *, int, float,
+ SuperMatrix *, float *, int, float,
+ float *, int);
+
+/* Memory-related */
+extern int sLUMemInit (fact_t, void *, int, int, int, int, int,
+ SuperMatrix *, SuperMatrix *,
+ GlobalLU_t *, int **, float **);
+extern void sSetRWork (int, int, float *, float **, float **);
+extern void sLUWorkFree (int *, float *, GlobalLU_t *);
+extern int sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
+
+extern float *floatMalloc(int);
+extern float *floatCalloc(int);
+extern int smemory_usage(const int, const int, const int, const int);
+extern int sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
+
+/* Auxiliary routines */
+extern void sreadhb(int *, int *, int *, float **, int **, int **);
+extern void sCompRow_to_CompCol(int, int, int, float*, int*, int*,
+ float **, int **, int **);
+extern void sfill (float *, int, float);
+extern void sinf_norm_error (int, SuperMatrix *, float *);
+extern void PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
+ float, float, float *, float *, char *);
+
+/* Routines for debugging */
+extern void sPrint_CompCol_Matrix(char *, SuperMatrix *);
+extern void sPrint_SuperNode_Matrix(char *, SuperMatrix *);
+extern void sPrint_Dense_Matrix(char *, SuperMatrix *);
+extern void print_lu_col(char *, int, int, int *, GlobalLU_t *);
+extern void check_tempv(int, float *);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif /* __SUPERLU_sSP_DEFS */
+
diff --git a/intern/opennl/superlu/strsv.c b/intern/opennl/superlu/strsv.c
new file mode 100644
index 00000000000..2f6a92c0d0d
--- /dev/null
+++ b/intern/opennl/superlu/strsv.c
@@ -0,0 +1,331 @@
+
+/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, int *n,
+ float *a, int *lda, float *x, int *incx)
+{
+
+
+ /* System generated locals */
+ int i__1, i__2;
+
+ /* Local variables */
+ static int info;
+ static float temp;
+ static int i, j;
+ extern int lsame_(char *, char *);
+ static int ix, jx, kx;
+ extern /* Subroutine */ int xerbla_(char *, int *);
+ static int nounit;
+
+
+/* Purpose
+ =======
+
+ STRSV solves one of the systems of equations
+
+ A*x = b, or A'*x = b,
+
+ where b and x are n element vectors and A is an n by n unit, or
+ non-unit, upper or lower triangular matrix.
+
+ No test for singularity or near-singularity is included in this
+ routine. Such tests must be performed before calling this routine.
+
+ Parameters
+ ==========
+
+ UPLO - CHARACTER*1.
+ On entry, UPLO specifies whether the matrix is an upper or
+ lower triangular matrix as follows:
+
+ UPLO = 'U' or 'u' A is an upper triangular matrix.
+
+ UPLO = 'L' or 'l' A is a lower triangular matrix.
+
+ Unchanged on exit.
+
+ TRANS - CHARACTER*1.
+ On entry, TRANS specifies the equations to be solved as
+ follows:
+
+ TRANS = 'N' or 'n' A*x = b.
+
+ TRANS = 'T' or 't' A'*x = b.
+
+ TRANS = 'C' or 'c' A'*x = b.
+
+ Unchanged on exit.
+
+ DIAG - CHARACTER*1.
+ On entry, DIAG specifies whether or not A is unit
+ triangular as follows:
+
+ DIAG = 'U' or 'u' A is assumed to be unit triangular.
+
+ DIAG = 'N' or 'n' A is not assumed to be unit
+ triangular.
+
+ Unchanged on exit.
+
+ N - INTEGER.
+ On entry, N specifies the order of the matrix A.
+ N must be at least zero.
+ Unchanged on exit.
+
+ A - REAL array of DIMENSION ( LDA, n ).
+ Before entry with UPLO = 'U' or 'u', the leading n by n
+ upper triangular part of the array A must contain the upper
+
+ triangular matrix and the strictly lower triangular part of
+
+ A is not referenced.
+ Before entry with UPLO = 'L' or 'l', the leading n by n
+ lower triangular part of the array A must contain the lower
+
+ triangular matrix and the strictly upper triangular part of
+
+ A is not referenced.
+ Note that when DIAG = 'U' or 'u', the diagonal elements of
+
+ A are not referenced either, but are assumed to be unity.
+ Unchanged on exit.
+
+ LDA - INTEGER.
+ On entry, LDA specifies the first dimension of A as declared
+
+ in the calling (sub) program. LDA must be at least
+ max( 1, n ).
+ Unchanged on exit.
+
+ X - REAL array of dimension at least
+ ( 1 + ( n - 1 )*abs( INCX ) ).
+ Before entry, the incremented array X must contain the n
+ element right-hand side vector b. On exit, X is overwritten
+
+ with the solution vector x.
+
+ INCX - INTEGER.
+ On entry, INCX specifies the increment for the elements of
+ X. INCX must not be zero.
+ Unchanged on exit.
+
+
+ Level 2 Blas routine.
+
+ -- Written on 22-October-1986.
+ Jack Dongarra, Argonne National Lab.
+ Jeremy Du Croz, Nag Central Office.
+ Sven Hammarling, Nag Central Office.
+ Richard Hanson, Sandia National Labs.
+
+
+
+ Test the input parameters.
+
+
+ Parameter adjustments
+ Function Body */
+#define X(I) x[(I)-1]
+
+#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
+
+ info = 0;
+ if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+ info = 1;
+ } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") &&
+ ! lsame_(trans, "C")) {
+ info = 2;
+ } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) {
+ info = 3;
+ } else if (*n < 0) {
+ info = 4;
+ } else if (*lda < ((1 > *n)? 1: *n)) {
+ info = 6;
+ } else if (*incx == 0) {
+ info = 8;
+ }
+ if (info != 0) {
+ xerbla_("STRSV ", &info);
+ return 0;
+ }
+
+/* Quick return if possible. */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+ nounit = lsame_(diag, "N");
+
+/* Set up the start point in X if the increment is not unity. This
+ will be ( N - 1 )*INCX too small for descending loops. */
+
+ if (*incx <= 0) {
+ kx = 1 - (*n - 1) * *incx;
+ } else if (*incx != 1) {
+ kx = 1;
+ }
+
+/* Start the operations. In this version the elements of A are
+ accessed sequentially with one pass through A. */
+
+ if (lsame_(trans, "N")) {
+
+/* Form x := inv( A )*x. */
+
+ if (lsame_(uplo, "U")) {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ if (X(j) != 0.f) {
+ if (nounit) {
+ X(j) /= A(j,j);
+ }
+ temp = X(j);
+ for (i = j - 1; i >= 1; --i) {
+ X(i) -= temp * A(i,j);
+/* L10: */
+ }
+ }
+/* L20: */
+ }
+ } else {
+ jx = kx + (*n - 1) * *incx;
+ for (j = *n; j >= 1; --j) {
+ if (X(jx) != 0.f) {
+ if (nounit) {
+ X(jx) /= A(j,j);
+ }
+ temp = X(jx);
+ ix = jx;
+ for (i = j - 1; i >= 1; --i) {
+ ix -= *incx;
+ X(ix) -= temp * A(i,j);
+/* L30: */
+ }
+ }
+ jx -= *incx;
+/* L40: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ if (X(j) != 0.f) {
+ if (nounit) {
+ X(j) /= A(j,j);
+ }
+ temp = X(j);
+ i__2 = *n;
+ for (i = j + 1; i <= *n; ++i) {
+ X(i) -= temp * A(i,j);
+/* L50: */
+ }
+ }
+/* L60: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ if (X(jx) != 0.f) {
+ if (nounit) {
+ X(jx) /= A(j,j);
+ }
+ temp = X(jx);
+ ix = jx;
+ i__2 = *n;
+ for (i = j + 1; i <= *n; ++i) {
+ ix += *incx;
+ X(ix) -= temp * A(i,j);
+/* L70: */
+ }
+ }
+ jx += *incx;
+/* L80: */
+ }
+ }
+ }
+ } else {
+
+/* Form x := inv( A' )*x. */
+
+ if (lsame_(uplo, "U")) {
+ if (*incx == 1) {
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ temp = X(j);
+ i__2 = j - 1;
+ for (i = 1; i <= j-1; ++i) {
+ temp -= A(i,j) * X(i);
+/* L90: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(j) = temp;
+/* L100: */
+ }
+ } else {
+ jx = kx;
+ i__1 = *n;
+ for (j = 1; j <= *n; ++j) {
+ temp = X(jx);
+ ix = kx;
+ i__2 = j - 1;
+ for (i = 1; i <= j-1; ++i) {
+ temp -= A(i,j) * X(ix);
+ ix += *incx;
+/* L110: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(jx) = temp;
+ jx += *incx;
+/* L120: */
+ }
+ }
+ } else {
+ if (*incx == 1) {
+ for (j = *n; j >= 1; --j) {
+ temp = X(j);
+ i__1 = j + 1;
+ for (i = *n; i >= j+1; --i) {
+ temp -= A(i,j) * X(i);
+/* L130: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(j) = temp;
+/* L140: */
+ }
+ } else {
+ kx += (*n - 1) * *incx;
+ jx = kx;
+ for (j = *n; j >= 1; --j) {
+ temp = X(jx);
+ ix = kx;
+ i__1 = j + 1;
+ for (i = *n; i >= j+1; --i) {
+ temp -= A(i,j) * X(ix);
+ ix -= *incx;
+/* L150: */
+ }
+ if (nounit) {
+ temp /= A(j,j);
+ }
+ X(jx) = temp;
+ jx -= *incx;
+/* L160: */
+ }
+ }
+ }
+ }
+
+ return 0;
+
+/* End of STRSV . */
+
+} /* strsv_ */
+
diff --git a/intern/opennl/superlu/superlu_timer.c b/intern/opennl/superlu/superlu_timer.c
new file mode 100644
index 00000000000..798fd59d4ea
--- /dev/null
+++ b/intern/opennl/superlu/superlu_timer.c
@@ -0,0 +1,55 @@
+/*
+ * Purpose
+ * =======
+ * Returns the time in seconds used by the process.
+ *
+ * Note: the timer function call is machine dependent. Use conditional
+ * compilation to choose the appropriate function.
+ *
+ */
+
+/* We want this flag, safer than putting in build system */
+#define NO_TIMER
+
+#ifdef SUN
+/*
+ * It uses the system call gethrtime(3C), which is accurate to
+ * nanoseconds.
+*/
+#include <sys/time.h>
+
+double SuperLU_timer_() {
+ return ( (double)gethrtime() / 1e9 );
+}
+
+#else
+
+#ifndef NO_TIMER
+#include <sys/types.h>
+#include <sys/times.h>
+#include <time.h>
+#include <sys/time.h>
+#endif
+
+#ifndef CLK_TCK
+#define CLK_TCK 60
+#endif
+
+double SuperLU_timer_()
+{
+#ifdef NO_TIMER
+ /* no sys/times.h on WIN32 */
+ double tmp;
+ tmp = 0.0;
+#else
+ struct tms use;
+ double tmp;
+ times(&use);
+ tmp = use.tms_utime;
+ tmp += use.tms_stime;
+#endif
+ return (double)(tmp) / CLK_TCK;
+}
+
+#endif
+
diff --git a/intern/opennl/superlu/supermatrix.h b/intern/opennl/superlu/supermatrix.h
new file mode 100644
index 00000000000..665e22dc91f
--- /dev/null
+++ b/intern/opennl/superlu/supermatrix.h
@@ -0,0 +1,140 @@
+#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */
+#define __SUPERLU_SUPERMATRIX
+
+/********************************************
+ * The matrix types are defined as follows. *
+ ********************************************/
+typedef enum {
+ SLU_NC, /* column-wise, no supernode */
+ SLU_NR, /* row-wize, no supernode */
+ SLU_SC, /* column-wise, supernode */
+ SLU_SR, /* row-wise, supernode */
+ SLU_NCP, /* column-wise, column-permuted, no supernode
+ (The consecutive columns of nonzeros, after permutation,
+ may not be stored contiguously.) */
+ SLU_DN /* Fortran style column-wise storage for dense matrix */
+} Stype_t;
+
+typedef enum {
+ SLU_S, /* single */
+ SLU_D, /* double */
+ SLU_C, /* single complex */
+ SLU_Z /* double complex */
+} Dtype_t;
+
+typedef enum {
+ SLU_GE, /* general */
+ SLU_TRLU, /* lower triangular, unit diagonal */
+ SLU_TRUU, /* upper triangular, unit diagonal */
+ SLU_TRL, /* lower triangular */
+ SLU_TRU, /* upper triangular */
+ SLU_SYL, /* symmetric, store lower half */
+ SLU_SYU, /* symmetric, store upper half */
+ SLU_HEL, /* Hermitian, store lower half */
+ SLU_HEU /* Hermitian, store upper half */
+} Mtype_t;
+
+typedef struct {
+ Stype_t Stype; /* Storage type: interprets the storage structure
+ pointed to by *Store. */
+ Dtype_t Dtype; /* Data type. */
+ Mtype_t Mtype; /* Matrix type: describes the mathematical property of
+ the matrix. */
+ int_t nrow; /* number of rows */
+ int_t ncol; /* number of columns */
+ void *Store; /* pointer to the actual storage of the matrix */
+} SuperMatrix;
+
+/***********************************************
+ * The storage schemes are defined as follows. *
+ ***********************************************/
+
+/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ void *nzval; /* pointer to array of nonzero values, packed by column */
+ int_t *rowind; /* pointer to array of row indices of the nonzeros */
+ int_t *colptr; /* pointer to array of beginning of columns in nzval[]
+ and rowind[] */
+ /* Note:
+ Zero-based indexing is used;
+ colptr[] has ncol+1 entries, the last one pointing
+ beyond the last column, so that colptr[ncol] = nnz. */
+} NCformat;
+
+/* Stype == NR (Also known as row compressed storage (RCS). */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ void *nzval; /* pointer to array of nonzero values, packed by row */
+ int_t *colind; /* pointer to array of column indices of the nonzeros */
+ int_t *rowptr; /* pointer to array of beginning of rows in nzval[]
+ and colind[] */
+ /* Note:
+ Zero-based indexing is used;
+ nzval[] and colind[] are of the same length, nnz;
+ rowptr[] has nrow+1 entries, the last one pointing
+ beyond the last column, so that rowptr[nrow] = nnz. */
+} NRformat;
+
+/* Stype == SC */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ int_t nsuper; /* number of supernodes, minus 1 */
+ void *nzval; /* pointer to array of nonzero values, packed by column */
+ int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */
+ int_t *rowind; /* pointer to array of compressed row indices of
+ rectangular supernodes */
+ int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] */
+ int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column
+ j belongs; mapping from column to supernode number. */
+ int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th
+ supernode; mapping from supernode number to column.
+ e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12)
+ sup_to_col: 0 1 2 4 7 12 (nsuper=4) */
+ /* Note:
+ Zero-based indexing is used;
+ nzval_colptr[], rowind_colptr[], col_to_sup and
+ sup_to_col[] have ncol+1 entries, the last one
+ pointing beyond the last column.
+ For col_to_sup[], only the first ncol entries are
+ defined. For sup_to_col[], only the first nsuper+2
+ entries are defined. */
+} SCformat;
+
+/* Stype == NCP */
+typedef struct {
+ int_t nnz; /* number of nonzeros in the matrix */
+ void *nzval; /* pointer to array of nonzero values, packed by column */
+ int_t *rowind;/* pointer to array of row indices of the nonzeros */
+ /* Note: nzval[]/rowind[] always have the same length */
+ int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[]
+ and rowind[] */
+ int_t *colend;/* colend[j] points to one past the last element of column
+ j in nzval[] and rowind[] */
+ /* Note:
+ Zero-based indexing is used;
+ The consecutive columns of the nonzeros may not be
+ contiguous in storage, because the matrix has been
+ postmultiplied by a column permutation matrix. */
+} NCPformat;
+
+/* Stype == DN */
+typedef struct {
+ int_t lda; /* leading dimension */
+ void *nzval; /* array of size lda*ncol to represent a dense matrix */
+} DNformat;
+
+
+
+/*********************************************************
+ * Macros used for easy access of sparse matrix entries. *
+ *********************************************************/
+#define L_SUB_START(col) ( Lstore->rowind_colptr[col] )
+#define L_SUB(ptr) ( Lstore->rowind[ptr] )
+#define L_NZ_START(col) ( Lstore->nzval_colptr[col] )
+#define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] )
+#define U_NZ_START(col) ( Ustore->colptr[col] )
+#define U_SUB(ptr) ( Ustore->rowind[ptr] )
+
+
+#endif /* __SUPERLU_SUPERMATRIX */
diff --git a/intern/opennl/superlu/sutil.c b/intern/opennl/superlu/sutil.c
new file mode 100644
index 00000000000..4689f34968a
--- /dev/null
+++ b/intern/opennl/superlu/sutil.c
@@ -0,0 +1,478 @@
+
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include "ssp_defs.h"
+
+void
+sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
+ float *nzval, int *rowind, int *colptr,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ NCformat *Astore;
+
+ A->Stype = stype;
+ A->Dtype = dtype;
+ A->Mtype = mtype;
+ A->nrow = m;
+ A->ncol = n;
+ A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
+ if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
+ Astore = A->Store;
+ Astore->nnz = nnz;
+ Astore->nzval = nzval;
+ Astore->rowind = rowind;
+ Astore->colptr = colptr;
+}
+
+void
+sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
+ float *nzval, int *colind, int *rowptr,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ NRformat *Astore;
+
+ A->Stype = stype;
+ A->Dtype = dtype;
+ A->Mtype = mtype;
+ A->nrow = m;
+ A->ncol = n;
+ A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
+ if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
+ Astore = A->Store;
+ Astore->nnz = nnz;
+ Astore->nzval = nzval;
+ Astore->colind = colind;
+ Astore->rowptr = rowptr;
+}
+
+/* Copy matrix A into matrix B. */
+void
+sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
+{
+ NCformat *Astore, *Bstore;
+ int ncol, nnz, i;
+
+ B->Stype = A->Stype;
+ B->Dtype = A->Dtype;
+ B->Mtype = A->Mtype;
+ B->nrow = A->nrow;;
+ B->ncol = ncol = A->ncol;
+ Astore = (NCformat *) A->Store;
+ Bstore = (NCformat *) B->Store;
+ Bstore->nnz = nnz = Astore->nnz;
+ for (i = 0; i < nnz; ++i)
+ ((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
+ for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
+ for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
+}
+
+
+void
+sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ DNformat *Xstore;
+
+ X->Stype = stype;
+ X->Dtype = dtype;
+ X->Mtype = mtype;
+ X->nrow = m;
+ X->ncol = n;
+ X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
+ if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
+ Xstore = (DNformat *) X->Store;
+ Xstore->lda = ldx;
+ Xstore->nzval = (float *) x;
+}
+
+void
+sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
+ float *Y, int ldy)
+{
+/*
+ *
+ * Purpose
+ * =======
+ *
+ * Copies a two-dimensional matrix X to another matrix Y.
+ */
+ int i, j;
+
+ for (j = 0; j < N; ++j)
+ for (i = 0; i < M; ++i)
+ Y[i + j*ldy] = X[i + j*ldx];
+}
+
+void
+sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
+ float *nzval, int *nzval_colptr, int *rowind,
+ int *rowind_colptr, int *col_to_sup, int *sup_to_col,
+ Stype_t stype, Dtype_t dtype, Mtype_t mtype)
+{
+ SCformat *Lstore;
+
+ L->Stype = stype;
+ L->Dtype = dtype;
+ L->Mtype = mtype;
+ L->nrow = m;
+ L->ncol = n;
+ L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
+ if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
+ Lstore = L->Store;
+ Lstore->nnz = nnz;
+ Lstore->nsuper = col_to_sup[n];
+ Lstore->nzval = nzval;
+ Lstore->nzval_colptr = nzval_colptr;
+ Lstore->rowind = rowind;
+ Lstore->rowind_colptr = rowind_colptr;
+ Lstore->col_to_sup = col_to_sup;
+ Lstore->sup_to_col = sup_to_col;
+
+}
+
+
+/*
+ * Convert a row compressed storage into a column compressed storage.
+ */
+void
+sCompRow_to_CompCol(int m, int n, int nnz,
+ float *a, int *colind, int *rowptr,
+ float **at, int **rowind, int **colptr)
+{
+ register int i, j, col, relpos;
+ int *marker;
+
+ /* Allocate storage for another copy of the matrix. */
+ *at = (float *) floatMalloc(nnz);
+ *rowind = (int *) intMalloc(nnz);
+ *colptr = (int *) intMalloc(n+1);
+ marker = (int *) intCalloc(n);
+
+ /* Get counts of each column of A, and set up column pointers */
+ for (i = 0; i < m; ++i)
+ for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
+ (*colptr)[0] = 0;
+ for (j = 0; j < n; ++j) {
+ (*colptr)[j+1] = (*colptr)[j] + marker[j];
+ marker[j] = (*colptr)[j];
+ }
+
+ /* Transfer the matrix into the compressed column storage. */
+ for (i = 0; i < m; ++i) {
+ for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
+ col = colind[j];
+ relpos = marker[col];
+ (*rowind)[relpos] = i;
+ (*at)[relpos] = a[j];
+ ++marker[col];
+ }
+ }
+
+ SUPERLU_FREE(marker);
+}
+
+
+void
+sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
+{
+ NCformat *Astore;
+ register int i,n;
+ float *dp;
+
+ printf("\nCompCol matrix %s:\n", what);
+ printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+ n = A->ncol;
+ Astore = (NCformat *) A->Store;
+ dp = (float *) Astore->nzval;
+ printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
+ printf("nzval: ");
+ for (i = 0; i < Astore->colptr[n]; ++i) printf("%f ", dp[i]);
+ printf("\nrowind: ");
+ for (i = 0; i < Astore->colptr[n]; ++i) printf("%d ", Astore->rowind[i]);
+ printf("\ncolptr: ");
+ for (i = 0; i <= n; ++i) printf("%d ", Astore->colptr[i]);
+ printf("\n");
+ fflush(stdout);
+}
+
+void
+sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
+{
+ SCformat *Astore;
+ register int i, j, k, c, d, n, nsup;
+ float *dp;
+ int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
+
+ printf("\nSuperNode matrix %s:\n", what);
+ printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+ n = A->ncol;
+ Astore = (SCformat *) A->Store;
+ dp = (float *) Astore->nzval;
+ col_to_sup = Astore->col_to_sup;
+ sup_to_col = Astore->sup_to_col;
+ rowind_colptr = Astore->rowind_colptr;
+ rowind = Astore->rowind;
+ printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
+ A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
+ printf("nzval:\n");
+ for (k = 0; k <= Astore->nsuper; ++k) {
+ c = sup_to_col[k];
+ nsup = sup_to_col[k+1] - c;
+ for (j = c; j < c + nsup; ++j) {
+ d = Astore->nzval_colptr[j];
+ for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
+ printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
+ }
+ }
+ }
+#if 0
+ for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f ", dp[i]);
+#endif
+ printf("\nnzval_colptr: ");
+ for (i = 0; i <= n; ++i) printf("%d ", Astore->nzval_colptr[i]);
+ printf("\nrowind: ");
+ for (i = 0; i < Astore->rowind_colptr[n]; ++i)
+ printf("%d ", Astore->rowind[i]);
+ printf("\nrowind_colptr: ");
+ for (i = 0; i <= n; ++i) printf("%d ", Astore->rowind_colptr[i]);
+ printf("\ncol_to_sup: ");
+ for (i = 0; i < n; ++i) printf("%d ", col_to_sup[i]);
+ printf("\nsup_to_col: ");
+ for (i = 0; i <= Astore->nsuper+1; ++i)
+ printf("%d ", sup_to_col[i]);
+ printf("\n");
+ fflush(stdout);
+}
+
+void
+sPrint_Dense_Matrix(char *what, SuperMatrix *A)
+{
+ DNformat *Astore;
+ register int i;
+ float *dp;
+
+ printf("\nDense matrix %s:\n", what);
+ printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
+ Astore = (DNformat *) A->Store;
+ dp = (float *) Astore->nzval;
+ printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+ printf("\nnzval: ");
+ for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
+ printf("\n");
+ fflush(stdout);
+}
+
+/*
+ * Diagnostic print of column "jcol" in the U/L factor.
+ */
+void
+sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
+{
+ int i, k, fsupc;
+ int *xsup, *supno;
+ int *xlsub, *lsub;
+ float *lusup;
+ int *xlusup;
+ float *ucol;
+ int *usub, *xusub;
+
+ xsup = Glu->xsup;
+ supno = Glu->supno;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ lusup = Glu->lusup;
+ xlusup = Glu->xlusup;
+ ucol = Glu->ucol;
+ usub = Glu->usub;
+ xusub = Glu->xusub;
+
+ printf("%s", msg);
+ printf("col %d: pivrow %d, supno %d, xprune %d\n",
+ jcol, pivrow, supno[jcol], xprune[jcol]);
+
+ printf("\tU-col:\n");
+ for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
+ printf("\t%d%10.4f\n", usub[i], ucol[i]);
+ printf("\tL-col in rectangular snode:\n");
+ fsupc = xsup[supno[jcol]]; /* first col of the snode */
+ i = xlsub[fsupc];
+ k = xlusup[jcol];
+ while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
+ printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
+ i++; k++;
+ }
+ fflush(stdout);
+}
+
+
+/*
+ * Check whether tempv[] == 0. This should be true before and after
+ * calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
+ */
+void scheck_tempv(int n, float *tempv)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (tempv[i] != 0.0)
+ {
+ fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
+ ABORT("scheck_tempv");
+ }
+ }
+}
+
+
+void
+sGenXtrue(int n, int nrhs, float *x, int ldx)
+{
+ int i, j;
+ for (j = 0; j < nrhs; ++j)
+ for (i = 0; i < n; ++i) {
+ x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
+ }
+}
+
+/*
+ * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
+ */
+void
+sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
+ SuperMatrix *A, SuperMatrix *B)
+{
+ NCformat *Astore;
+ float *Aval;
+ DNformat *Bstore;
+ float *rhs;
+ float one = 1.0;
+ float zero = 0.0;
+ int ldc;
+ char transc[1];
+
+ Astore = A->Store;
+ Aval = (float *) Astore->nzval;
+ Bstore = B->Store;
+ rhs = Bstore->nzval;
+ ldc = Bstore->lda;
+
+ if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
+ else *(unsigned char *)transc = 'T';
+
+ sp_sgemm(transc, nrhs, one, A,
+ x, ldx, zero, rhs, ldc);
+
+}
+
+/*
+ * Fills a float precision array with a given value.
+ */
+void
+sfill(float *a, int alen, float dval)
+{
+ register int i;
+ for (i = 0; i < alen; i++) a[i] = dval;
+}
+
+
+
+/*
+ * Check the inf-norm of the error vector
+ */
+void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
+{
+ DNformat *Xstore;
+ float err, xnorm;
+ float *Xmat, *soln_work;
+ int i, j;
+
+ Xstore = X->Store;
+ Xmat = Xstore->nzval;
+
+ for (j = 0; j < nrhs; j++) {
+ soln_work = &Xmat[j*Xstore->lda];
+ err = xnorm = 0.0;
+ for (i = 0; i < X->nrow; i++) {
+ err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
+ xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
+ }
+ err = err / xnorm;
+ printf("||X - Xtrue||/||X|| = %e\n", err);
+ }
+}
+
+
+
+/* Print performance of the code. */
+void
+sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
+ float rpg, float rcond, float *ferr,
+ float *berr, char *equed, SuperLUStat_t *stat)
+{
+ SCformat *Lstore;
+ NCformat *Ustore;
+ double *utime;
+ flops_t *ops;
+
+ utime = stat->utime;
+ ops = stat->ops;
+
+ if ( utime[FACT] != 0. )
+ printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
+ ops[FACT]*1e-6/utime[FACT]);
+ printf("Identify relaxed snodes = %8.2f\n", utime[RELAX]);
+ if ( utime[SOLVE] != 0. )
+ printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
+ ops[SOLVE]*1e-6/utime[SOLVE]);
+
+ Lstore = (SCformat *) L->Store;
+ Ustore = (NCformat *) U->Store;
+ printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
+ printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
+ printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
+
+ printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
+ mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
+ mem_usage->expansions);
+
+ printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
+ printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
+ utime[FACT], ops[FACT]*1e-6/utime[FACT],
+ utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
+ utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
+
+ printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
+ printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
+ rpg, rcond, ferr[0], berr[0], equed);
+
+}
+
+
+
+
+int print_float_vec(char *what, int n, float *vec)
+{
+ int i;
+ printf("%s: n %d\n", what, n);
+ for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
+ return 0;
+}
+
diff --git a/intern/opennl/superlu/util.c b/intern/opennl/superlu/util.c
new file mode 100644
index 00000000000..3c49d714d1c
--- /dev/null
+++ b/intern/opennl/superlu/util.c
@@ -0,0 +1,391 @@
+/*
+ * -- SuperLU routine (version 3.0) --
+ * Univ. of California Berkeley, Xerox Palo Alto Research Center,
+ * and Lawrence Berkeley National Lab.
+ * October 15, 2003
+ *
+ */
+/*
+ Copyright (c) 1994 by Xerox Corporation. All rights reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use or copy this program for any
+ purpose, provided the above notices are retained on all copies.
+ Permission to modify the code and to distribute modified code is
+ granted, provided the above notices are retained, and a notice that
+ the code was modified is included with the above copyright notice.
+*/
+
+#include <math.h>
+#include "ssp_defs.h"
+#include "util.h"
+
+/*
+ * Global statistics variale
+ */
+
+void superlu_abort_and_exit(char* msg)
+{
+ fprintf(stderr, msg);
+ exit (-1);
+}
+
+/*
+ * Set the default values for the options argument.
+ */
+void set_default_options(superlu_options_t *options)
+{
+ options->Fact = DOFACT;
+ options->Equil = YES;
+ options->ColPerm = COLAMD;
+ options->DiagPivotThresh = 1.0;
+ options->Trans = NOTRANS;
+ options->IterRefine = NOREFINE;
+ options->SymmetricMode = NO;
+ options->PivotGrowth = NO;
+ options->ConditionNumber = NO;
+ options->PrintStat = YES;
+}
+
+/* Deallocate the structure pointing to the actual storage of the matrix. */
+void
+Destroy_SuperMatrix_Store(SuperMatrix *A)
+{
+ SUPERLU_FREE ( A->Store );
+}
+
+void
+Destroy_CompCol_Matrix(SuperMatrix *A)
+{
+ SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
+ SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
+ SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
+ SUPERLU_FREE( A->Store );
+}
+
+void
+Destroy_CompRow_Matrix(SuperMatrix *A)
+{
+ SUPERLU_FREE( ((NRformat *)A->Store)->colind );
+ SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
+ SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
+ SUPERLU_FREE( A->Store );
+}
+
+void
+Destroy_SuperNode_Matrix(SuperMatrix *A)
+{
+ SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
+ SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
+ SUPERLU_FREE ( A->Store );
+}
+
+/* A is of type Stype==NCP */
+void
+Destroy_CompCol_Permuted(SuperMatrix *A)
+{
+ SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
+ SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
+ SUPERLU_FREE ( A->Store );
+}
+
+/* A is of type Stype==DN */
+void
+Destroy_Dense_Matrix(SuperMatrix *A)
+{
+ DNformat* Astore = A->Store;
+ SUPERLU_FREE (Astore->nzval);
+ SUPERLU_FREE ( A->Store );
+}
+
+/*
+ * Reset repfnz[] for the current column
+ */
+void
+resetrep_col (const int nseg, const int *segrep, int *repfnz)
+{
+ int i, irep;
+
+ for (i = 0; i < nseg; i++) {
+ irep = segrep[i];
+ repfnz[irep] = EMPTY;
+ }
+}
+
+
+/*
+ * Count the total number of nonzeros in factors L and U, and in the
+ * symmetrically reduced L.
+ */
+void
+countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
+{
+ int nsuper, fsupc, i, j;
+ int nnzL0, jlen, irep;
+ int *xsup, *xlsub;
+
+ xsup = Glu->xsup;
+ xlsub = Glu->xlsub;
+ *nnzL = 0;
+ *nnzU = (Glu->xusub)[n];
+ nnzL0 = 0;
+ nsuper = (Glu->supno)[n];
+
+ if ( n <= 0 ) return;
+
+ /*
+ * For each supernode
+ */
+ for (i = 0; i <= nsuper; i++) {
+ fsupc = xsup[i];
+ jlen = xlsub[fsupc+1] - xlsub[fsupc];
+
+ for (j = fsupc; j < xsup[i+1]; j++) {
+ *nnzL += jlen;
+ *nnzU += j - fsupc + 1;
+ jlen--;
+ }
+ irep = xsup[i+1] - 1;
+ nnzL0 += xprune[irep] - xlsub[irep];
+ }
+
+ /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
+}
+
+
+
+/*
+ * Fix up the data storage lsub for L-subscripts. It removes the subscript
+ * sets for structural pruning, and applies permuation to the remaining
+ * subscripts.
+ */
+void
+fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
+{
+ register int nsuper, fsupc, nextl, i, j, k, jstrt;
+ int *xsup, *lsub, *xlsub;
+
+ if ( n <= 1 ) return;
+
+ xsup = Glu->xsup;
+ lsub = Glu->lsub;
+ xlsub = Glu->xlsub;
+ nextl = 0;
+ nsuper = (Glu->supno)[n];
+
+ /*
+ * For each supernode ...
+ */
+ for (i = 0; i <= nsuper; i++) {
+ fsupc = xsup[i];
+ jstrt = xlsub[fsupc];
+ xlsub[fsupc] = nextl;
+ for (j = jstrt; j < xlsub[fsupc+1]; j++) {
+ lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
+ nextl++;
+ }
+ for (k = fsupc+1; k < xsup[i+1]; k++)
+ xlsub[k] = nextl; /* Other columns in supernode i */
+
+ }
+
+ xlsub[n] = nextl;
+}
+
+
+/*
+ * Diagnostic print of segment info after panel_dfs().
+ */
+void print_panel_seg(int n, int w, int jcol, int nseg,
+ int *segrep, int *repfnz)
+{
+ int j, k;
+
+ for (j = jcol; j < jcol+w; j++) {
+ printf("\tcol %d:\n", j);
+ for (k = 0; k < nseg; k++)
+ printf("\t\tseg %d, segrep %d, repfnz %d\n", k,
+ segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
+ }
+
+}
+
+
+void
+StatInit(SuperLUStat_t *stat)
+{
+ register int i, w, panel_size, relax;
+
+ panel_size = sp_ienv(1);
+ relax = sp_ienv(2);
+ w = SUPERLU_MAX(panel_size, relax);
+ stat->panel_histo = intCalloc(w+1);
+ stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
+ if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
+ stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
+ if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
+ for (i = 0; i < NPHASES; ++i) {
+ stat->utime[i] = 0.;
+ stat->ops[i] = 0.;
+ }
+}
+
+
+void
+StatPrint(SuperLUStat_t *stat)
+{
+ double *utime;
+ flops_t *ops;
+
+ utime = stat->utime;
+ ops = stat->ops;
+ printf("Factor time = %8.2f\n", utime[FACT]);
+ if ( utime[FACT] != 0.0 )
+ printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
+ ops[FACT]*1e-6/utime[FACT]);
+
+ printf("Solve time = %8.2f\n", utime[SOLVE]);
+ if ( utime[SOLVE] != 0.0 )
+ printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
+ ops[SOLVE]*1e-6/utime[SOLVE]);
+
+}
+
+
+void
+StatFree(SuperLUStat_t *stat)
+{
+ SUPERLU_FREE(stat->panel_histo);
+ SUPERLU_FREE(stat->utime);
+ SUPERLU_FREE(stat->ops);
+}
+
+
+flops_t
+LUFactFlops(SuperLUStat_t *stat)
+{
+ return (stat->ops[FACT]);
+}
+
+flops_t
+LUSolveFlops(SuperLUStat_t *stat)
+{
+ return (stat->ops[SOLVE]);
+}
+
+
+
+
+
+/*
+ * Fills an integer array with a given value.
+ */
+void ifill(int *a, int alen, int ival)
+{
+ register int i;
+ for (i = 0; i < alen; i++) a[i] = ival;
+}
+
+
+
+/*
+ * Get the statistics of the supernodes
+ */
+#define NBUCKS 10
+static int max_sup_size;
+
+void super_stats(int nsuper, int *xsup)
+{
+ register int nsup1 = 0;
+ int i, isize, whichb, bl, bh;
+ int bucket[NBUCKS];
+
+ max_sup_size = 0;
+
+ for (i = 0; i <= nsuper; i++) {
+ isize = xsup[i+1] - xsup[i];
+ if ( isize == 1 ) nsup1++;
+ if ( max_sup_size < isize ) max_sup_size = isize;
+ }
+
+ printf(" Supernode statistics:\n\tno of super = %d\n", nsuper+1);
+ printf("\tmax supernode size = %d\n", max_sup_size);
+ printf("\tno of size 1 supernodes = %d\n", nsup1);
+
+ /* Histogram of the supernode sizes */
+ ifill (bucket, NBUCKS, 0);
+
+ for (i = 0; i <= nsuper; i++) {
+ isize = xsup[i+1] - xsup[i];
+ whichb = (float) isize / max_sup_size * NBUCKS;
+ if (whichb >= NBUCKS) whichb = NBUCKS - 1;
+ bucket[whichb]++;
+ }
+
+ printf("\tHistogram of supernode sizes:\n");
+ for (i = 0; i < NBUCKS; i++) {
+ bl = (float) i * max_sup_size / NBUCKS;
+ bh = (float) (i+1) * max_sup_size / NBUCKS;
+ printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
+ }
+
+}
+
+
+float SpaSize(int n, int np, float sum_npw)
+{
+ return (sum_npw*8 + np*8 + n*4)/1024.;
+}
+
+float DenseSize(int n, float sum_nw)
+{
+ return (sum_nw*8 + n*8)/1024.;;
+}
+
+
+
+/*
+ * Check whether repfnz[] == EMPTY after reset.
+ */
+void check_repfnz(int n, int w, int jcol, int *repfnz)
+{
+ int jj, k;
+
+ for (jj = jcol; jj < jcol+w; jj++)
+ for (k = 0; k < n; k++)
+ if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
+ fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
+ k, repfnz[(jj-jcol)*n + k]);
+ ABORT("check_repfnz");
+ }
+}
+
+
+/* Print a summary of the testing results. */
+void
+PrintSumm(char *type, int nfail, int nrun, int nerrs)
+{
+ if ( nfail > 0 )
+ printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
+ type, nfail, nrun);
+ else
+ printf("All tests for %3s driver passed the threshold (%6d tests run)\n", type, nrun);
+
+ if ( nerrs > 0 )
+ printf("%6d error messages recorded\n", nerrs);
+}
+
+
+int print_int_vec(char *what, int n, int *vec)
+{
+ int i;
+ printf("%s\n", what);
+ for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
+ return 0;
+}
diff --git a/intern/opennl/superlu/util.h b/intern/opennl/superlu/util.h
new file mode 100644
index 00000000000..1a3526d4e7e
--- /dev/null
+++ b/intern/opennl/superlu/util.h
@@ -0,0 +1,267 @@
+#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
+#define __SUPERLU_UTIL
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+/*
+#ifndef __STDC__
+#include <malloc.h>
+#endif
+*/
+#include <assert.h>
+
+/***********************************************************************
+ * Macros
+ ***********************************************************************/
+#define FIRSTCOL_OF_SNODE(i) (xsup[i])
+/* No of marker arrays used in the symbolic factorization,
+ each of size n */
+#define NO_MARKER 3
+#define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) )
+
+#ifndef USER_ABORT
+#define USER_ABORT(msg) superlu_abort_and_exit(msg)
+#endif
+
+#define ABORT(err_msg) \
+ { char msg[256];\
+ sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
+ USER_ABORT(msg); }
+
+
+#ifndef USER_MALLOC
+#if 1
+#define USER_MALLOC(size) superlu_malloc(size)
+#else
+/* The following may check out some uninitialized data */
+#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
+#endif
+#endif
+
+#define SUPERLU_MALLOC(size) USER_MALLOC(size)
+
+#ifndef USER_FREE
+#define USER_FREE(addr) superlu_free(addr)
+#endif
+
+#define SUPERLU_FREE(addr) USER_FREE(addr)
+
+#define CHECK_MALLOC(where) { \
+ extern int superlu_malloc_total; \
+ printf("%s: malloc_total %d Bytes\n", \
+ where, superlu_malloc_total); \
+}
+
+#define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) )
+#define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) )
+
+/***********************************************************************
+ * Constants
+ ***********************************************************************/
+#define EMPTY (-1)
+/*#define NO (-1)*/
+#define FALSE 0
+#define TRUE 1
+
+/***********************************************************************
+ * Enumerate types
+ ***********************************************************************/
+typedef enum {NO, YES} yes_no_t;
+typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;
+typedef enum {NOROWPERM, LargeDiag, MY_PERMR} rowperm_t;
+typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t;
+typedef enum {NOTRANS, TRANS, CONJ} trans_t;
+typedef enum {NOEQUIL, ROW, COL, BOTH} DiagScale_t;
+typedef enum {NOREFINE, SINGLE=1, SLU_DOUBLE, EXTRA} IterRefine_t;
+typedef enum {LUSUP, UCOL, LSUB, USUB} MemType;
+typedef enum {HEAD, TAIL} stack_end_t;
+typedef enum {SYSTEM, USER} LU_space_t;
+
+/*
+ * The following enumerate type is used by the statistics variable
+ * to keep track of flop count and time spent at various stages.
+ *
+ * Note that not all of the fields are disjoint.
+ */
+typedef enum {
+ COLPERM, /* find a column ordering that minimizes fills */
+ RELAX, /* find artificial supernodes */
+ ETREE, /* compute column etree */
+ EQUIL, /* equilibrate the original matrix */
+ FACT, /* perform LU factorization */
+ RCOND, /* estimate reciprocal condition number */
+ SOLVE, /* forward and back solves */
+ REFINE, /* perform iterative refinement */
+ SLU_FLOAT, /* time spent in floating-point operations */
+ TRSV, /* fraction of FACT spent in xTRSV */
+ GEMV, /* fraction of FACT spent in xGEMV */
+ FERR, /* estimate error bounds after iterative refinement */
+ NPHASES /* total number of phases */
+} PhaseType;
+
+
+/***********************************************************************
+ * Type definitions
+ ***********************************************************************/
+typedef float flops_t;
+typedef unsigned char Logical;
+
+/*
+ *-- This contains the options used to control the solve process.
+ *
+ * Fact (fact_t)
+ * Specifies whether or not the factored form of the matrix
+ * A is supplied on entry, and if not, how the matrix A should
+ * be factorizaed.
+ * = DOFACT: The matrix A will be factorized from scratch, and the
+ * factors will be stored in L and U.
+ * = SamePattern: The matrix A will be factorized assuming
+ * that a factorization of a matrix with the same sparsity
+ * pattern was performed prior to this one. Therefore, this
+ * factorization will reuse column permutation vector
+ * ScalePermstruct->perm_c and the column elimination tree
+ * LUstruct->etree.
+ * = SamePattern_SameRowPerm: The matrix A will be factorized
+ * assuming that a factorization of a matrix with the same
+ * sparsity pattern and similar numerical values was performed
+ * prior to this one. Therefore, this factorization will reuse
+ * both row and column scaling factors R and C, and the
+ * both row and column permutation vectors perm_r and perm_c,
+ * distributed data structure set up from the previous symbolic
+ * factorization.
+ * = FACTORED: On entry, L, U, perm_r and perm_c contain the
+ * factored form of A. If DiagScale is not NOEQUIL, the matrix
+ * A has been equilibrated with scaling factors R and C.
+ *
+ * Equil (yes_no_t)
+ * Specifies whether to equilibrate the system (scale A's row and
+ * columns to have unit norm).
+ *
+ * ColPerm (colperm_t)
+ * Specifies what type of column permutation to use to reduce fill.
+ * = NATURAL: use the natural ordering
+ * = MMD_ATA: use minimum degree ordering on structure of A'*A
+ * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
+ * = COLAMD: use approximate minimum degree column ordering
+ * = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[]
+ *
+ * Trans (trans_t)
+ * Specifies the form of the system of equations:
+ * = NOTRANS: A * X = B (No transpose)
+ * = TRANS: A**T * X = B (Transpose)
+ * = CONJ: A**H * X = B (Transpose)
+ *
+ * IterRefine (IterRefine_t)
+ * Specifies whether to perform iterative refinement.
+ * = NO: no iterative refinement
+ * = WorkingPrec: perform iterative refinement in working precision
+ * = ExtraPrec: perform iterative refinement in extra precision
+ *
+ * PrintStat (yes_no_t)
+ * Specifies whether to print the solver's statistics.
+ *
+ * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
+ * Specifies the threshold used for a diagonal entry to be an
+ * acceptable pivot.
+ *
+ * PivotGrowth (yes_no_t)
+ * Specifies whether to compute the reciprocal pivot growth.
+ *
+ * ConditionNumber (ues_no_t)
+ * Specifies whether to compute the reciprocal condition number.
+ *
+ * RowPerm (rowperm_t) (only for SuperLU_DIST)
+ * Specifies whether to permute rows of the original matrix.
+ * = NO: not to permute the rows
+ * = LargeDiag: make the diagonal large relative to the off-diagonal
+ * = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[]
+ *
+ * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
+ * Specifies whether to replace the tiny diagonals by
+ * sqrt(epsilon)*||A|| during LU factorization.
+ *
+ * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
+ * Specifies whether the initialization has been performed to the
+ * triangular solve.
+ *
+ * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
+ * Specifies whether the initialization has been performed to the
+ * sparse matrix-vector multiplication routine needed in iterative
+ * refinement.
+ */
+typedef struct {
+ fact_t Fact;
+ yes_no_t Equil;
+ colperm_t ColPerm;
+ trans_t Trans;
+ IterRefine_t IterRefine;
+ yes_no_t PrintStat;
+ yes_no_t SymmetricMode;
+ double DiagPivotThresh;
+ yes_no_t PivotGrowth;
+ yes_no_t ConditionNumber;
+ rowperm_t RowPerm;
+ yes_no_t ReplaceTinyPivot;
+ yes_no_t SolveInitialized;
+ yes_no_t RefineInitialized;
+} superlu_options_t;
+
+typedef struct {
+ int *panel_histo; /* histogram of panel size distribution */
+ double *utime; /* running time at various phases */
+ flops_t *ops; /* operation count at various phases */
+ int TinyPivots; /* number of tiny pivots */
+ int RefineSteps; /* number of iterative refinement steps */
+} SuperLUStat_t;
+
+
+/***********************************************************************
+ * Prototypes
+ ***********************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void Destroy_SuperMatrix_Store(SuperMatrix *);
+extern void Destroy_CompCol_Matrix(SuperMatrix *);
+extern void Destroy_CompRow_Matrix(SuperMatrix *);
+extern void Destroy_SuperNode_Matrix(SuperMatrix *);
+extern void Destroy_CompCol_Permuted(SuperMatrix *);
+extern void Destroy_Dense_Matrix(SuperMatrix *);
+extern void get_perm_c(int, SuperMatrix *, int *);
+extern void set_default_options(superlu_options_t *options);
+extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
+ SuperMatrix*);
+extern void superlu_abort_and_exit(char*);
+extern void *superlu_malloc (size_t);
+extern int *intMalloc (int);
+extern int *intCalloc (int);
+extern void superlu_free (void*);
+extern void SetIWork (int, int, int, int *, int **, int **, int **,
+ int **, int **, int **, int **);
+extern int sp_coletree (int *, int *, int *, int, int, int *);
+extern void relax_snode (const int, int *, const int, int *, int *);
+extern void heap_relax_snode (const int, int *, const int, int *, int *);
+extern void resetrep_col (const int, const int *, int *);
+extern int spcoletree (int *, int *, int *, int, int, int *);
+extern int *TreePostorder (int, int *);
+extern double SuperLU_timer_ ();
+extern int sp_ienv (int);
+extern int lsame_ (char *, char *);
+extern int xerbla_ (char *, int *);
+extern void ifill (int *, int, int);
+extern void snode_profile (int, int *);
+extern void super_stats (int, int *);
+extern void PrintSumm (char *, int, int, int);
+extern void StatInit(SuperLUStat_t *);
+extern void StatPrint (SuperLUStat_t *);
+extern void StatFree(SuperLUStat_t *);
+extern void print_panel_seg(int, int, int, int, int *, int *);
+extern void check_repfnz(int, int, int, int *);
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif /* __SUPERLU_UTIL */
diff --git a/intern/opennl/superlu/xerbla.c b/intern/opennl/superlu/xerbla.c
new file mode 100644
index 00000000000..cb94fa71d95
--- /dev/null
+++ b/intern/opennl/superlu/xerbla.c
@@ -0,0 +1,43 @@
+
+#include <stdio.h>
+
+/* Subroutine */ int xerbla_(char *srname, int *info)
+{
+/* -- LAPACK auxiliary routine (version 2.0) --
+ Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+ Courant Institute, Argonne National Lab, and Rice University
+ September 30, 1994
+
+
+ Purpose
+ =======
+
+ XERBLA is an error handler for the LAPACK routines.
+ It is called by an LAPACK routine if an input parameter has an
+ invalid value. A message is printed and execution stops.
+
+ Installers may consider modifying the STOP statement in order to
+ call system-specific exception-handling facilities.
+
+ Arguments
+ =========
+
+ SRNAME (input) CHARACTER*6
+ The name of the routine which called XERBLA.
+
+ INFO (input) INT
+ The position of the invalid parameter in the parameter list
+
+ of the calling routine.
+
+ =====================================================================
+*/
+
+ printf("** On entry to %6s, parameter number %2d had an illegal value\n",
+ srname, *info);
+
+/* End of XERBLA */
+
+ return 0;
+} /* xerbla_ */
+
diff --git a/source/Makefile b/source/Makefile
index a952fd35274..ccf4bb53043 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -87,6 +87,8 @@ PYPLAYERLIB ?= $(PYLIB)
GRPLIB += $(OCGDIR)/blender/renderconverter/$(DEBUG_DIR)librenderconverter.a
GRPLIB += $(OCGDIR)/blender/render/$(DEBUG_DIR)librender.a
GRPLIB += $(OCGDIR)/blender/radiosity/$(DEBUG_DIR)libradiosity.a
+ GRPLIB += $(NAN_OPENNL)/lib/$(DEBUG_DIR)libopennl.a
+ GRPLIB += $(NAN_SUPERLU)/lib/$(DEBUG_DIR)libsuperlu.a
GRPLIB += $(OCGDIR)/blender/python/$(DEBUG_DIR)libpython.a
diff --git a/source/nan_definitions.mk b/source/nan_definitions.mk
index 6156203d87b..697c9093d58 100644
--- a/source/nan_definitions.mk
+++ b/source/nan_definitions.mk
@@ -81,6 +81,8 @@ endif
export NAN_GHOST ?= $(LCGDIR)/ghost
export NAN_TEST_VERBOSITY ?= 1
export NAN_BMFONT ?= $(LCGDIR)/bmfont
+ export NAN_OPENNL ?= $(LCGDIR)/opennl
+ export NAN_SUPERLU ?= $(LCGDIR)/superlu
ifeq ($(FREE_WINDOWS), true)
export NAN_FTGL ?= $(LCGDIR)/gcc/ftgl
else