diff options
Diffstat (limited to 'irstlm')
-rw-r--r-- | irstlm/.cdtbuild | 47 | ||||
-rw-r--r-- | irstlm/.cdtproject | 15 | ||||
-rw-r--r-- | irstlm/.cvsignore | 2 | ||||
-rw-r--r-- | irstlm/.project | 19 | ||||
-rw-r--r-- | irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs | 13 | ||||
-rw-r--r-- | irstlm/Makefile.am | 5 | ||||
-rw-r--r-- | irstlm/README | 14 | ||||
-rw-r--r-- | irstlm/config.h.in | 22 | ||||
-rw-r--r-- | irstlm/configure.in | 11 | ||||
-rwxr-xr-x | irstlm/depcomp | 522 | ||||
-rwxr-xr-x | irstlm/install-sh | 322 | ||||
-rw-r--r-- | irstlm/irstlm.vcproj | 347 | ||||
-rwxr-xr-x | irstlm/missing | 353 | ||||
-rw-r--r-- | irstlm/src/Makefile.am | 19 | ||||
-rw-r--r-- | irstlm/src/cmd.c | 661 | ||||
-rw-r--r-- | irstlm/src/cmd.h | 68 | ||||
-rw-r--r-- | irstlm/src/compile-lm.cpp | 124 | ||||
-rw-r--r-- | irstlm/src/dictionary.cpp | 418 | ||||
-rw-r--r-- | irstlm/src/dictionary.h | 209 | ||||
-rw-r--r-- | irstlm/src/htable.cpp | 261 | ||||
-rw-r--r-- | irstlm/src/htable.h | 125 | ||||
-rw-r--r-- | irstlm/src/index.h | 19 | ||||
-rw-r--r-- | irstlm/src/lmtable.cpp | 728 | ||||
-rw-r--r-- | irstlm/src/lmtable.h | 245 | ||||
-rw-r--r-- | irstlm/src/mempool.cpp | 516 | ||||
-rw-r--r-- | irstlm/src/mempool.h | 181 | ||||
-rw-r--r-- | irstlm/src/ngram.cpp | 214 | ||||
-rw-r--r-- | irstlm/src/ngram.h | 117 |
28 files changed, 0 insertions, 5597 deletions
diff --git a/irstlm/.cdtbuild b/irstlm/.cdtbuild deleted file mode 100644 index 5c8f99dd4..000000000 --- a/irstlm/.cdtbuild +++ /dev/null @@ -1,47 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<?fileVersion 3.0.0?> - -<ManagedProjectBuildInfo> -<project id="irstlm.cdt.managedbuild.target.gnu.lib.1070956508" name="Static Library (Gnu)" projectType="cdt.managedbuild.target.gnu.lib"> -<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.8750958" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug"> -<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1732402088" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug"> -<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.208381076" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug"/> -<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1534243185" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug"> -<option id="gnu.cpp.compiler.option.debugging.gprof.1713594612" superClass="gnu.cpp.compiler.option.debugging.gprof" value="true" valueType="boolean"/> -</tool> -<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.1727542516" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/> -<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.1884793796" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug"/> -<macros/> -</toolChain> -</configuration> -<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.1538178030" name="Release" parent="cdt.managedbuild.config.gnu.lib.release"> -<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.508823597" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release"> -<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.723647841" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release"/> -<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1586280207" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release"/> -<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.1518934657" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/> -<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.1672118671" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release"/> -<macros/> -</toolChain> -</configuration> -<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.1333974501" name="DebugNBest" parent="cdt.managedbuild.config.gnu.lib.debug"> -<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.365917155" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug"> -<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.100325283" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug"/> -<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.955425850" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug"> -<option id="gnu.cpp.compiler.option.debugging.gprof.2034209861" superClass="gnu.cpp.compiler.option.debugging.gprof" value="true" valueType="boolean"/> -</tool> -<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.370966026" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/> -<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.720742733" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug"/> -<macros/> -</toolChain> -</configuration> -<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.32089835" name="ReleaseNBest" parent="cdt.managedbuild.config.gnu.lib.release"> -<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.158080822" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release"> -<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.1803995257" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release"/> -<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.152646939" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release"/> -<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.58978613" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/> -<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.2018125558" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release"/> -<macros/> -</toolChain> -</configuration> -</project> -</ManagedProjectBuildInfo> diff --git a/irstlm/.cdtproject b/irstlm/.cdtproject deleted file mode 100644 index 41c23c46b..000000000 --- a/irstlm/.cdtproject +++ /dev/null @@ -1,15 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<?eclipse-cdt version="2.0"?> - -<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake"> -<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/> -<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/> -<extension id="org.eclipse.cdt.core.domsourceindexer" point="org.eclipse.cdt.core.CIndexer"/> -<data> -<item id="org.eclipse.cdt.core.pathentry"> -<pathentry kind="src" path=""/> -<pathentry kind="out" path=""/> -<pathentry kind="con" path="org.eclipse.cdt.managedbuilder.MANAGED_CONTAINER"/> -</item> -</data> -</cdtproject> diff --git a/irstlm/.cvsignore b/irstlm/.cvsignore deleted file mode 100644 index 9816a999f..000000000 --- a/irstlm/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Debug* -Release* diff --git a/irstlm/.project b/irstlm/.project deleted file mode 100644 index bb14e64fb..000000000 --- a/irstlm/.project +++ /dev/null @@ -1,19 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<projectDescription> - <name>irstlm</name> - <comment></comment> - <projects> - </projects> - <buildSpec> - <buildCommand> - <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name> - <arguments> - </arguments> - </buildCommand> - </buildSpec> - <natures> - <nature>org.eclipse.cdt.core.cnature</nature> - <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature> - <nature>org.eclipse.cdt.core.ccnature</nature> - </natures> -</projectDescription> diff --git a/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs b/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs deleted file mode 100644 index d0951526a..000000000 --- a/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs +++ /dev/null @@ -1,13 +0,0 @@ -#Thu Jul 27 11:13:57 EDT 2006 -=\=\=\=\=\=\= -<<<<<<<=org.eclipse.cdt.managedbuilder.core.prefs ->>>>>>>=1.2 -eclipse.preferences.version=1 -environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.1333974501=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n -environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.8750958=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n -environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.1538178030=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n -environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.32089835=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n -environment/project=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n -environment/project/cdt.managedbuild.config.gnu.lib.debug.1333974501=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n -environment/project/cdt.managedbuild.config.gnu.lib.debug.8750958=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n -environment/project/cdt.managedbuild.config.gnu.lib.release.1538178030=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n diff --git a/irstlm/Makefile.am b/irstlm/Makefile.am deleted file mode 100644 index 4566b2680..000000000 --- a/irstlm/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -# not a GNU package. You can remove this line, if -# have all needed files, that a GNU package needs -AUTOMAKE_OPTIONS = foreign -SUBDIRS = src - diff --git a/irstlm/README b/irstlm/README deleted file mode 100644 index 17a9b920f..000000000 --- a/irstlm/README +++ /dev/null @@ -1,14 +0,0 @@ -To build: - - aclocal - autoconf - automake - - ./configure --with-prefix=PATH TO INSTALL (probably `pwd`) - make - make install - -*Make install is important since it creates the include/ and lib/ directories -that client software will depend on. - - diff --git a/irstlm/config.h.in b/irstlm/config.h.in deleted file mode 100644 index b292ea963..000000000 --- a/irstlm/config.h.in +++ /dev/null @@ -1,22 +0,0 @@ -/* config.h.in. Generated from configure.in by autoheader. */ - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Version number of package */ -#undef VERSION diff --git a/irstlm/configure.in b/irstlm/configure.in deleted file mode 100644 index c2ad8dda5..000000000 --- a/irstlm/configure.in +++ /dev/null @@ -1,11 +0,0 @@ -AC_INIT(src) - -AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(irstlm, 1.0) - -AC_PROG_CXX -AC_LANG_CPLUSPLUS -AC_PROG_RANLIB -#AM_PROG_LIBTOOL - -AC_OUTPUT(Makefile src/Makefile) diff --git a/irstlm/depcomp b/irstlm/depcomp deleted file mode 100755 index 11e2d3bfe..000000000 --- a/irstlm/depcomp +++ /dev/null @@ -1,522 +0,0 @@ -#! /bin/sh -# depcomp - compile a program generating dependencies as side-effects - -scriptversion=2004-05-31.23 - -# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>. - -case $1 in - '') - echo "$0: No command. Try \`$0 --help' for more information." 1>&2 - exit 1; - ;; - -h | --h*) - cat <<\EOF -Usage: depcomp [--help] [--version] PROGRAM [ARGS] - -Run PROGRAMS ARGS to compile a file, generating dependencies -as side-effects. - -Environment variables: - depmode Dependency tracking mode. - source Source file read by `PROGRAMS ARGS'. - object Object file output by `PROGRAMS ARGS'. - DEPDIR directory where to store dependencies. - depfile Dependency file to output. - tmpdepfile Temporary file to use when outputing dependencies. - libtool Whether libtool is used (yes/no). - -Report bugs to <bug-automake@gnu.org>. -EOF - exit 0 - ;; - -v | --v*) - echo "depcomp $scriptversion" - exit 0 - ;; -esac - -if test -z "$depmode" || test -z "$source" || test -z "$object"; then - echo "depcomp: Variables source, object and depmode must be set" 1>&2 - exit 1 -fi - -# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. -depfile=${depfile-`echo "$object" | - sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} -tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} - -rm -f "$tmpdepfile" - -# Some modes work just like other modes, but use different flags. We -# parameterize here, but still list the modes in the big case below, -# to make depend.m4 easier to write. Note that we *cannot* use a case -# here, because this file can only contain one case statement. -if test "$depmode" = hp; then - # HP compiler uses -M and no extra arg. - gccflag=-M - depmode=gcc -fi - -if test "$depmode" = dashXmstdout; then - # This is just like dashmstdout with a different argument. - dashmflag=-xM - depmode=dashmstdout -fi - -case "$depmode" in -gcc3) -## gcc 3 implements dependency tracking that does exactly what -## we want. Yay! Note: for some reason libtool 1.4 doesn't like -## it if -MD -MP comes after the -MF stuff. Hmm. - "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - mv "$tmpdepfile" "$depfile" - ;; - -gcc) -## There are various ways to get dependency output from gcc. Here's -## why we pick this rather obscure method: -## - Don't want to use -MD because we'd like the dependencies to end -## up in a subdir. Having to rename by hand is ugly. -## (We might end up doing this anyway to support other compilers.) -## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like -## -MM, not -M (despite what the docs say). -## - Using -M directly means running the compiler twice (even worse -## than renaming). - if test -z "$gccflag"; then - gccflag=-MD, - fi - "$@" -Wp,"$gccflag$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - echo "$object : \\" > "$depfile" - alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -## The second -e expression handles DOS-style file names with drive letters. - sed -e 's/^[^:]*: / /' \ - -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" -## This next piece of magic avoids the `deleted header file' problem. -## The problem is that when a header file which appears in a .P file -## is deleted, the dependency causes make to die (because there is -## typically no way to rebuild the header). We avoid this by adding -## dummy dependencies for each header file. Too bad gcc doesn't do -## this for us directly. - tr ' ' ' -' < "$tmpdepfile" | -## Some versions of gcc put a space before the `:'. On the theory -## that the space means something, we add a space to the output as -## well. -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -sgi) - if test "$libtool" = yes; then - "$@" "-Wp,-MDupdate,$tmpdepfile" - else - "$@" -MDupdate "$tmpdepfile" - fi - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - - if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files - echo "$object : \\" > "$depfile" - - # Clip off the initial element (the dependent). Don't try to be - # clever and replace this with sed code, as IRIX sed won't handle - # lines with more than a fixed number of characters (4096 in - # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; - # the IRIX cc adds comments like `#:fec' to the end of the - # dependency line. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ - tr ' -' ' ' >> $depfile - echo >> $depfile - - # The second pass generates a dummy entry for each header file. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ - >> $depfile - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -aix) - # The C for AIX Compiler uses -M and outputs the dependencies - # in a .u file. In older versions, this file always lives in the - # current directory. Also, the AIX compiler puts `$object:' at the - # start of each line; $object doesn't have directory information. - # Version 6 uses the directory in both cases. - stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` - tmpdepfile="$stripped.u" - if test "$libtool" = yes; then - "$@" -Wc,-M - else - "$@" -M - fi - stat=$? - - if test -f "$tmpdepfile"; then : - else - stripped=`echo "$stripped" | sed 's,^.*/,,'` - tmpdepfile="$stripped.u" - fi - - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - - if test -f "$tmpdepfile"; then - outname="$stripped.o" - # Each line is of the form `foo.o: dependent.h'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" - sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -icc) - # Intel's C compiler understands `-MD -MF file'. However on - # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c - # ICC 7.0 will fill foo.d with something like - # foo.o: sub/foo.c - # foo.o: sub/foo.h - # which is wrong. We want: - # sub/foo.o: sub/foo.c - # sub/foo.o: sub/foo.h - # sub/foo.c: - # sub/foo.h: - # ICC 7.1 will output - # foo.o: sub/foo.c sub/foo.h - # and will wrap long lines using \ : - # foo.o: sub/foo.c ... \ - # sub/foo.h ... \ - # ... - - "$@" -MD -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - # Each line is of the form `foo.o: dependent.h', - # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process this invocation - # correctly. Breaking it into two sed invocations is a workaround. - sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | - sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -tru64) - # The Tru64 compiler uses -MD to generate dependencies as a side - # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. - # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put - # dependencies in `foo.d' instead, so we check for that too. - # Subdirectories are respected. - dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` - test "x$dir" = "x$object" && dir= - base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` - - if test "$libtool" = yes; then - # Dependencies are output in .lo.d with libtool 1.4. - # With libtool 1.5 they are output both in $dir.libs/$base.o.d - # and in $dir.libs/$base.o.d and $dir$base.o.d. We process the - # latter, because the former will be cleaned when $dir.libs is - # erased. - tmpdepfile1="$dir.libs/$base.lo.d" - tmpdepfile2="$dir$base.o.d" - tmpdepfile3="$dir.libs/$base.d" - "$@" -Wc,-MD - else - tmpdepfile1="$dir$base.o.d" - tmpdepfile2="$dir$base.d" - tmpdepfile3="$dir$base.d" - "$@" -MD - fi - - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" - exit $stat - fi - - if test -f "$tmpdepfile1"; then - tmpdepfile="$tmpdepfile1" - elif test -f "$tmpdepfile2"; then - tmpdepfile="$tmpdepfile2" - else - tmpdepfile="$tmpdepfile3" - fi - if test -f "$tmpdepfile"; then - sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" - # That's a tab and a space in the []. - sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" - else - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -#nosideeffect) - # This comment above is used by automake to tell side-effect - # dependency tracking mechanisms from slower ones. - -dashmstdout) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - test -z "$dashmflag" && dashmflag=-M - # Require at least two characters before searching for `:' - # in the target name. This is to cope with DOS-style filenames: - # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. - "$@" $dashmflag | - sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - tr ' ' ' -' < "$tmpdepfile" | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -dashXmstdout) - # This case only exists to satisfy depend.m4. It is never actually - # run, as this mode is specially recognized in the preamble. - exit 1 - ;; - -makedepend) - "$@" || exit $? - # Remove any Libtool call - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - # X makedepend - shift - cleared=no - for arg in "$@"; do - case $cleared in - no) - set ""; shift - cleared=yes ;; - esac - case "$arg" in - -D*|-I*) - set fnord "$@" "$arg"; shift ;; - # Strip any option that makedepend may not understand. Remove - # the object too, otherwise makedepend will parse it as a source file. - -*|$object) - ;; - *) - set fnord "$@" "$arg"; shift ;; - esac - done - obj_suffix="`echo $object | sed 's/^.*\././'`" - touch "$tmpdepfile" - ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - sed '1,2d' "$tmpdepfile" | tr ' ' ' -' | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" "$tmpdepfile".bak - ;; - -cpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - "$@" -E | - sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | - sed '$ s: \\$::' > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - cat < "$tmpdepfile" >> "$depfile" - sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -msvisualcpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o, - # because we must use -o when running libtool. - "$@" || exit $? - IFS=" " - for arg - do - case "$arg" in - "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") - set fnord "$@" - shift - shift - ;; - *) - set fnord "$@" "$arg" - shift - shift - ;; - esac - done - "$@" -E | - sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" - echo " " >> "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -none) - exec "$@" - ;; - -*) - echo "Unknown depmode $depmode" 1>&2 - exit 1 - ;; -esac - -exit 0 - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/irstlm/install-sh b/irstlm/install-sh deleted file mode 100755 index dd97db7aa..000000000 --- a/irstlm/install-sh +++ /dev/null @@ -1,322 +0,0 @@ -#!/bin/sh -# install - install a program, script, or datafile - -scriptversion=2004-09-10.20 - -# This originates from X11R5 (mit/util/scripts/install.sh), which was -# later released in X11R6 (xc/config/util/install.sh) with the -# following copyright and license. -# -# Copyright (C) 1994 X Consortium -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- -# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# Except as contained in this notice, the name of the X Consortium shall not -# be used in advertising or otherwise to promote the sale, use or other deal- -# ings in this Software without prior written authorization from the X Consor- -# tium. -# -# -# FSF changes to this file are in the public domain. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -chmodcmd="$chmodprog 0755" -chowncmd= -chgrpcmd= -stripcmd= -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src= -dst= -dir_arg= -dstarg= -no_target_directory= - -usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE - or: $0 [OPTION]... SRCFILES... DIRECTORY - or: $0 [OPTION]... -t DIRECTORY SRCFILES... - or: $0 [OPTION]... -d DIRECTORIES... - -In the 1st form, copy SRCFILE to DSTFILE. -In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. -In the 4th, create DIRECTORIES. - -Options: --c (ignored) --d create directories instead of installing files. --g GROUP $chgrpprog installed files to GROUP. --m MODE $chmodprog installed files to MODE. --o USER $chownprog installed files to USER. --s $stripprog installed files. --t DIRECTORY install into DIRECTORY. --T report an error if DSTFILE is a directory. ---help display this help and exit. ---version display version info and exit. - -Environment variables override the default commands: - CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG -" - -while test -n "$1"; do - case $1 in - -c) shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - --help) echo "$usage"; exit 0;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -s) stripcmd=$stripprog - shift - continue;; - - -t) dstarg=$2 - shift - shift - continue;; - - -T) no_target_directory=true - shift - continue;; - - --version) echo "$0 $scriptversion"; exit 0;; - - *) # When -d is used, all remaining arguments are directories to create. - # When -t is used, the destination is already specified. - test -n "$dir_arg$dstarg" && break - # Otherwise, the last argument is the destination. Remove it from $@. - for arg - do - if test -n "$dstarg"; then - # $@ is not empty: it contains at least $arg. - set fnord "$@" "$dstarg" - shift # fnord - fi - shift # arg - dstarg=$arg - done - break;; - esac -done - -if test -z "$1"; then - if test -z "$dir_arg"; then - echo "$0: no input file specified." >&2 - exit 1 - fi - # It's OK to call `install-sh -d' without argument. - # This can happen when creating conditional directories. - exit 0 -fi - -for src -do - # Protect names starting with `-'. - case $src in - -*) src=./$src ;; - esac - - if test -n "$dir_arg"; then - dst=$src - src= - - if test -d "$dst"; then - mkdircmd=: - chmodcmd= - else - mkdircmd=$mkdirprog - fi - else - # Waiting for this to be detected by the "$cpprog $src $dsttmp" command - # might cause directories to be created, which would be especially bad - # if $src (and thus $dsttmp) contains '*'. - if test ! -f "$src" && test ! -d "$src"; then - echo "$0: $src does not exist." >&2 - exit 1 - fi - - if test -z "$dstarg"; then - echo "$0: no destination specified." >&2 - exit 1 - fi - - dst=$dstarg - # Protect names starting with `-'. - case $dst in - -*) dst=./$dst ;; - esac - - # If destination is a directory, append the input filename; won't work - # if double slashes aren't ignored. - if test -d "$dst"; then - if test -n "$no_target_directory"; then - echo "$0: $dstarg: Is a directory" >&2 - exit 1 - fi - dst=$dst/`basename "$src"` - fi - fi - - # This sed command emulates the dirname command. - dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - - # Make sure that the destination directory exists. - - # Skip lots of stat calls in the usual case. - if test ! -d "$dstdir"; then - defaultIFS=' - ' - IFS="${IFS-$defaultIFS}" - - oIFS=$IFS - # Some sh's can't handle IFS=/ for some reason. - IFS='%' - set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'` - IFS=$oIFS - - pathcomp= - - while test $# -ne 0 ; do - pathcomp=$pathcomp$1 - shift - if test ! -d "$pathcomp"; then - $mkdirprog "$pathcomp" - # mkdir can fail with a `File exist' error in case several - # install-sh are creating the directory concurrently. This - # is OK. - test -d "$pathcomp" || exit - fi - pathcomp=$pathcomp/ - done - fi - - if test -n "$dir_arg"; then - $doit $mkdircmd "$dst" \ - && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \ - && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \ - && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \ - && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; } - - else - dstfile=`basename "$dst"` - - # Make a couple of temp file names in the proper directory. - dsttmp=$dstdir/_inst.$$_ - rmtmp=$dstdir/_rm.$$_ - - # Trap to clean up those temp files at exit. - trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 - trap '(exit $?); exit' 1 2 13 15 - - # Copy the file name to the temp name. - $doit $cpprog "$src" "$dsttmp" && - - # and set any options; do chmod last to preserve setuid bits. - # - # If any of these fail, we abort the whole thing. If we want to - # ignore errors from any of these, just make sure not to ignore - # errors from the above "$doit $cpprog $src $dsttmp" command. - # - { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ - && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ - && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ - && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } && - - # Now rename the file to the real destination. - { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \ - || { - # The rename failed, perhaps because mv can't rename something else - # to itself, or perhaps because mv is so ancient that it does not - # support -f. - - # Now remove or move aside any old file at destination location. - # We try this two ways since rm can't unlink itself on some - # systems and the destination file might be busy for other - # reasons. In this case, the final cleanup might fail but the new - # file should still install successfully. - { - if test -f "$dstdir/$dstfile"; then - $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \ - || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \ - || { - echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 - (exit 1); exit - } - else - : - fi - } && - - # Now rename the file to the real destination. - $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" - } - } - fi || { (exit 1); exit; } -done - -# The final little trick to "correctly" pass the exit status to the exit trap. -{ - (exit 0); exit -} - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/irstlm/irstlm.vcproj b/irstlm/irstlm.vcproj deleted file mode 100644 index 0ceb12191..000000000 --- a/irstlm/irstlm.vcproj +++ /dev/null @@ -1,347 +0,0 @@ -<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="8.00"
- Name="irstlm"
- ProjectGUID="{19C023D8-67DE-4609-9C89-3152EF95995D}"
- RootNamespace="irstlm"
- Keyword="ManagedCProj"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="3"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- WholeProgramOptimization="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="2"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="DebugNBest|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="3"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="ReleaseNBest|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- WholeProgramOptimization="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="2"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- <AssemblyReference
- RelativePath="System.dll"
- AssemblyName="System, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"
- />
- <AssemblyReference
- RelativePath="System.Data.dll"
- AssemblyName="System.Data, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=x86"
- />
- <AssemblyReference
- RelativePath="System.XML.dll"
- AssemblyName="System.Xml, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"
- />
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
- UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
- >
- <File
- RelativePath=".\src\cmd.c"
- >
- </File>
- <File
- RelativePath=".\src\dictionary.cpp"
- >
- </File>
- <File
- RelativePath=".\src\htable.cpp"
- >
- </File>
- <File
- RelativePath=".\src\lmtable.cpp"
- >
- </File>
- <File
- RelativePath=".\src\mempool.cpp"
- >
- </File>
- <File
- RelativePath=".\src\ngram.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl;inc;xsd"
- UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
- >
- <File
- RelativePath=".\src\cmd.h"
- >
- </File>
- <File
- RelativePath=".\src\dictionary.h"
- >
- </File>
- <File
- RelativePath=".\src\htable.h"
- >
- </File>
- <File
- RelativePath=".\src\index.h"
- >
- </File>
- <File
- RelativePath=".\src\lmtable.h"
- >
- </File>
- <File
- RelativePath=".\src\mempool.h"
- >
- </File>
- <File
- RelativePath=".\src\ngram.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
- UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/irstlm/missing b/irstlm/missing deleted file mode 100755 index 64b5f901d..000000000 --- a/irstlm/missing +++ /dev/null @@ -1,353 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. - -scriptversion=2004-09-07.08 - -# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004 -# Free Software Foundation, Inc. -# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -run=: - -# In the cases where this matters, `missing' is being run in the -# srcdir already. -if test -f configure.ac; then - configure_ac=configure.ac -else - configure_ac=configure.in -fi - -msg="missing on your system" - -case "$1" in ---run) - # Try to run requested program, and just exit if it succeeds. - run= - shift - "$@" && exit 0 - # Exit code 63 means version mismatch. This often happens - # when the user try to use an ancient version of a tool on - # a file that requires a minimum version. In this case we - # we should proceed has if the program had been absent, or - # if --run hadn't been passed. - if test $? = 63; then - run=: - msg="probably too old" - fi - ;; - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - --run try to run the given command, and emulate it if it fails - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - help2man touch the output file - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - tar try tar, gnutar, gtar, then tar without non-portable flags - yacc create \`y.tab.[ch]', if possible, from existing .[ch] - -Send bug reports to <bug-automake@gnu.org>." - exit 0 - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing $scriptversion (GNU Automake)" - exit 0 - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - -esac - -# Now exit if we have it, but it failed. Also exit now if we -# don't have it and --version was passed (most likely to detect -# the program). -case "$1" in - lex|yacc) - # Not GNU programs, they don't have --version. - ;; - - tar) - if test -n "$run"; then - echo 1>&2 "ERROR: \`tar' requires --run" - exit 1 - elif test "x$2" = "x--version" || test "x$2" = "x--help"; then - exit 1 - fi - ;; - - *) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - elif test "x$2" = "x--version" || test "x$2" = "x--help"; then - # Could not run --version or --help. This is probably someone - # running `$TOOL --version' or `$TOOL --help' to check whether - # $TOOL exists and not knowing $TOOL uses missing. - exit 1 - fi - ;; -esac - -# If it does not exist, or fails to run (possibly an outdated version), -# try to emulate it. -case "$1" in - aclocal*) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acinclude.m4' or \`${configure_ac}'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`${configure_ac}'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acconfig.h' or \`${configure_ac}'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake*) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - autom4te) - echo 1>&2 "\ -WARNING: \`$1' is needed, but is $msg. - You might have modified some files without having the - proper tools for further handling them. - You can get \`$1' as part of \`Autoconf' from any GNU - archive site." - - file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` - test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo "#! /bin/sh" - echo "# Created by GNU Automake missing as a replacement of" - echo "# $ $@" - echo "exit 0" - chmod +x $file - exit 1 - fi - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' $msg. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - help2man) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a dependency of a manual page. You may need the - \`Help2man' package in order for those modifications to take - effect. You can get \`Help2man' from any GNU archive site." - - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` - fi - if [ -f "$file" ]; then - touch $file - else - test -z "$file" || exec >$file - echo ".ab help2man is required to generate this page" - exit 1 - fi - ;; - - makeinfo) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` - fi - touch $file - ;; - - tar) - shift - - # We have already tried tar in the generic part. - # Look for gnutar/gtar before invocation to avoid ugly error - # messages. - if (gnutar --version > /dev/null 2>&1); then - gnutar "$@" && exit 0 - fi - if (gtar --version > /dev/null 2>&1); then - gtar "$@" && exit 0 - fi - firstarg="$1" - if shift; then - case "$firstarg" in - *o*) - firstarg=`echo "$firstarg" | sed s/o//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - case "$firstarg" in - *h*) - firstarg=`echo "$firstarg" | sed s/h//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - fi - - echo 1>&2 "\ -WARNING: I can't seem to be able to run \`tar' with the given arguments. - You may want to install GNU tar or Free paxutils, or check the - command line arguments." - exit 1 - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and is $msg. - You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequisites for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/irstlm/src/Makefile.am b/irstlm/src/Makefile.am deleted file mode 100644 index ea26d6945..000000000 --- a/irstlm/src/Makefile.am +++ /dev/null @@ -1,19 +0,0 @@ -lib_LIBRARIES = libirstlm.a - -libirstlm_a_SOURCES = \ - dictionary.cpp \ - htable.cpp \ - lmtable.cpp \ - mempool.cpp \ - ngram.cpp - -library_includedir=$(includedir) -library_include_HEADERS = dictionary.h lmtable.h ngram.h - -bin_PROGRAMS = compile-lm - -AM_LDFLAGS=-L . -LIBS=-lirstlm - -compile_lm_SOURCES = compile-lm.cpp - diff --git a/irstlm/src/cmd.c b/irstlm/src/cmd.c deleted file mode 100644 index aeb36d7b9..000000000 --- a/irstlm/src/cmd.c +++ /dev/null @@ -1,661 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <string.h> - -#include "cmd.h" - -static Enum_T BoolEnum[] = { - { "FALSE", 0 }, - { "TRUE", 1 }, - { 0, 0 } -}; - -#ifdef NEEDSTRDUP -char *strdup(); -#endif - -#define FALSE 0 -#define TRUE 1 - -#define LINSIZ 10240 -#define MAXPARAM 256 - -static char *GetLine(), - **str2array(); -static int Scan(), - SetParam(), - SetEnum(), - SetSubrange(), - SetStrArray(), - SetGte(), - SetLte(), - CmdError(), - EnumError(), - SubrangeError(), - GteError(), - LteError(), - PrintParam(), - PrintEnum(), - PrintStrArray(); - -static Cmd_T cmds[MAXPARAM+1]; -static char *SepString = " \t\n"; - -#if defined(__STDC__) -#include <stdarg.h> -int DeclareParams(char *ParName, ...) -#else -#include <varargs.h> -int DeclareParams(ParName, va_alist) -char *ParName; -va_dcl -#endif -{ - va_list args; - static int ParamN = 0; - int j, - c; - char *s; - -#if defined(__STDC__) - va_start(args, ParName); -#else - va_start(args); -#endif - for(;ParName;) { - if(ParamN==MAXPARAM) { - fprintf(stderr, "Too many parameters !!\n"); - break; - } - for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++) - ; - if(!c) { - fprintf(stderr, - "Warning: parameter \"%s\" declared twice.\n", - ParName); - } - for(c=ParamN; c>j; c--) { - cmds[c] = cmds[c-1]; - } - cmds[j].Name = ParName; - cmds[j].Type = va_arg(args, int); - cmds[j].Val = va_arg(args, void *); - switch(cmds[j].Type) { - case CMDENUMTYPE: /* get the pointer to Enum_T struct */ - cmds[j].p = va_arg(args, void *); - break; - case CMDSUBRANGETYPE: /* get the two extremes */ - cmds[j].p = (void*) calloc(2, sizeof(int)); - ((int*)cmds[j].p)[0] = va_arg(args, int); - ((int*)cmds[j].p)[1] = va_arg(args, int); - break; - case CMDGTETYPE: /* get lower or upper bound */ - case CMDLTETYPE: - cmds[j].p = (void*) calloc(1, sizeof(int)); - ((int*)cmds[j].p)[0] = va_arg(args, int); - break; - case CMDSTRARRAYTYPE: /* get the separators string */ - cmds[j].p = (s=va_arg(args, char*)) - ? (void*)strdup(s) : 0; - break; - case CMDBOOLTYPE: - cmds[j].Type = CMDENUMTYPE; - cmds[j].p = BoolEnum; - break; - case CMDDOUBLETYPE: /* nothing else is needed */ - case CMDINTTYPE: - case CMDSTRINGTYPE: - break; - default: - fprintf(stderr, "%s: %s %d %s \"%s\"\n", - "DeclareParam()", "Unknown Type", - cmds[j].Type, "for parameter", cmds[j].Name); - exit(1); - } - ParamN++; - ParName = va_arg(args, char *); - } - cmds[ParamN].Name = NULL; - va_end(args); - return 0; -} - -int GetParams(n, a, CmdFileName) -int *n; -char ***a; -char *CmdFileName; -{ - char *Line, - *ProgName; - int argc = *n; - char **argv = *a, - *s; - FILE *fp; - int IsPipe; - -#ifdef MSDOS -#define PATHSEP '\\' - char *dot = NULL; -#else -#define PATHSEP '/' -#endif - - if(!(Line=malloc(LINSIZ))) { - fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n", - LINSIZ); - exit(1); - } - if((ProgName=strrchr(*argv, PATHSEP))) { - ++ProgName; - } else { - ProgName = *argv; - } -#ifdef MSDOS - if(dot=strchr(ProgName, '.')) *dot = 0; -#endif - --argc; - ++argv; - for(;;) { - if(argc && argv[0][0]=='-' && argv[0][1]=='=') { - CmdFileName = argv[0]+2; - ++argv; - --argc; - } - if(!CmdFileName) { - break; - } - IsPipe = !strncmp(CmdFileName, "@@", 2); - fp = IsPipe - ? popen(CmdFileName+2, "r") - : strcmp(CmdFileName, "-") - ? fopen(CmdFileName, "r") - : stdin; - if(!fp) { - fprintf(stderr, "Unable to open command file %s\n", - CmdFileName); - exit(1); - } - while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) { - if(Scan(ProgName, cmds, Line)) { - CmdError(Line); - } - } - if(fp!=stdin) { - if(IsPipe) pclose(fp); else fclose(fp); - } - CmdFileName = NULL; - } - while(argc && **argv=='-' && (s=strchr(*argv, '='))) { - *s = ' '; - sprintf(Line, "%s/%s", ProgName, *argv+1); - *s = '='; - if(Scan(ProgName, cmds, Line)) CmdError(*argv); - --argc; - ++argv; - } - *n = argc; - *a = argv; -#ifdef MSDOS - if(dot) *dot = '.'; -#endif - free(Line); - return 0; -} - -int PrintParams(ValFlag, fp) -int ValFlag; -FILE *fp; -{ - int i; - - fflush(fp); - if(ValFlag) { - fprintf(fp, "Parameters Values:\n"); - } else { - fprintf(fp, "Parameters:\n"); - } - for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp); - fprintf(fp, "\n"); - fflush(fp); - return 0; -} - -int SPrintParams(a, pfx) -char ***a, - *pfx; -{ - int l, - n; - Cmd_T *cmd; - - if(!pfx) pfx=""; - l = strlen(pfx); - for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr; - a[0] = calloc(n, sizeof(char*)); - for(n=0, cmd=cmds; cmd->Name; cmd++) { - if(!cmd->ArgStr) continue; - a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2); - sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr); - ++n; - } - return n; -} - -static int CmdError(opt) -char *opt; -{ - fprintf(stderr, "Invalid option \"%s\"\n", opt); - fprintf(stderr, "This program expectes the following parameters:\n"); - PrintParams(FALSE, stderr); - exit(0); -} - -static int PrintParam(cmd, ValFlag, fp) -Cmd_T *cmd; -int ValFlag; -FILE *fp; -{ - fprintf(fp, "%4s", ""); - switch(cmd->Type) { - case CMDDOUBLETYPE: - fprintf(fp, "%s", cmd->Name); - if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val); - fprintf(fp, "\n"); - break; - case CMDENUMTYPE: - PrintEnum(cmd, ValFlag, fp); - break; - case CMDINTTYPE: - case CMDSUBRANGETYPE: - case CMDGTETYPE: - case CMDLTETYPE: - fprintf(fp, "%s", cmd->Name); - if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val); - fprintf(fp, "\n"); - break; - case CMDSTRINGTYPE: - fprintf(fp, "%s", cmd->Name); - if(ValFlag) { - if(*(char **)cmd->Val) { - fprintf(fp, ": \"%s\"", *(char **)cmd->Val); - } else { - fprintf(fp, ": %s", "NULL"); - } - } - fprintf(fp, "\n"); - break; - case CMDSTRARRAYTYPE: - PrintStrArray(cmd, ValFlag, fp); - break; - default: - fprintf(stderr, "%s: %s %d %s \"%s\"\n", - "PrintParam", - "Unknown Type", - cmd->Type, - "for parameter", - cmd->Name); - exit(1); - } - return 0; -} - -static char *GetLine(fp, n, Line) -FILE *fp; -int n; -char *Line; -{ - int j, - l, - offs=0; - - for(;;) { - if(!fgets(Line+offs, n-offs, fp)) { - return NULL; - } - if(Line[offs]=='#') continue; - l = strlen(Line+offs)-1; - Line[offs+l] = 0; - for(j=offs; Line[j] && isspace(Line[j]); j++, l--) - ; - if(l<1) continue; - if(j > offs) { - char *s = Line+offs, - *q = Line+j; - - while((*s++=*q++)) - ; - } - if(Line[offs+l-1]=='\\') { - offs += l; - Line[offs-1] = ' '; - } else { - break; - } - } - return Line; -} - -static int Scan(ProgName, cmds, Line) -char *ProgName, - *Line; -Cmd_T *cmds; -{ - char *q, - *p; - int i, - hl, - HasToMatch = FALSE, - c0, - c; - - p = Line+strspn(Line, SepString); - if(!(hl=strcspn(p, SepString))) { - return 0; - } - if((q=strchr(p, '/')) && q-p<hl) { - *q = 0; - if(strcmp(p, ProgName)) { - *q = '/'; - return 0; - } - *q = '/'; - HasToMatch=TRUE; - p = q+1; - } - if(!(hl = strcspn(p, SepString))) { - return 0; - } - c0 = p[hl]; - p[hl] = 0; - for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++) - ; - p[hl] = c0; - if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString)); - return HasToMatch && c; -} - -static int SetParam(cmd, s) -Cmd_T *cmd; -char *s; -{ - if(!*s && cmd->Type != CMDSTRINGTYPE) { - fprintf(stderr, - "WARNING: No value specified for parameter \"%s\"\n", - cmd->Name); - return 0; - } - switch(cmd->Type) { - case CMDDOUBLETYPE: - if(sscanf(s, "%lf", (double*)cmd->Val)!=1) { - fprintf(stderr, - "Float value required for parameter \"%s\"\n", - cmd->Name); - exit(1); - } - break; - case CMDENUMTYPE: - SetEnum(cmd, s); - break; - case CMDINTTYPE: - if(sscanf(s, "%d", (int*)cmd->Val)!=1) { - fprintf(stderr, - "Integer value required for parameter \"%s\"\n", - cmd->Name); - exit(1); - } - break; - case CMDSTRINGTYPE: - *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL")) - ? strdup(s) - : 0; - break; - case CMDSTRARRAYTYPE: - SetStrArray(cmd, s); - break; - case CMDGTETYPE: - SetGte(cmd, s); - break; - case CMDLTETYPE: - SetLte(cmd, s); - break; - case CMDSUBRANGETYPE: - SetSubrange(cmd, s); - break; - default: - fprintf(stderr, "%s: %s %d %s \"%s\"\n", - "SetParam", - "Unknown Type", - cmd->Type, - "for parameter", - cmd->Name); - exit(1); - } - cmd->ArgStr = strdup(s); - return 0; -} - -static int SetEnum(cmd, s) -Cmd_T *cmd; -char *s; -{ - Enum_T *en; - - for(en=(Enum_T *)cmd->p; en->Name; en++) { - if(*en->Name && !strcmp(s, en->Name)) { - *(int *) cmd->Val = en->Idx; - return 0; - } - } - return EnumError(cmd, s); -} - -static int SetSubrange(cmd, s) -Cmd_T *cmd; -char *s; -{ - int n; - - if(sscanf(s, "%d", &n)!=1) { - fprintf(stderr, - "Integer value required for parameter \"%s\"\n", - cmd->Name); - exit(1); - } - if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) { - return SubrangeError(cmd, n); - } - *(int *)cmd->Val = n; - return 0; -} - -static int SetGte(cmd, s) -Cmd_T *cmd; -char *s; -{ - int n; - - if(sscanf(s, "%d", &n)!=1) { - fprintf(stderr, - "Integer value required for parameter \"%s\"\n", - cmd->Name); - exit(1); - } - if(n<*(int *)cmd->p) { - return GteError(cmd, n); - } - *(int *)cmd->Val = n; - return 0; -} - -static int SetStrArray(cmd, s) -Cmd_T *cmd; -char *s; -{ - *(char***)cmd->Val = str2array(s, (char*)cmd->p); - return 0; -} - -static int SetLte(cmd, s) -Cmd_T *cmd; -char *s; -{ - int n; - - if(sscanf(s, "%d", &n)!=1) { - fprintf(stderr, - "Integer value required for parameter \"%s\"\n", - cmd->Name); - exit(1); - } - if(n > *(int *)cmd->p) { - return LteError(cmd, n); - } - *(int *)cmd->Val = n; - return 0; -} - -static int EnumError(cmd, s) -Cmd_T *cmd; -char *s; -{ - Enum_T *en; - - fprintf(stderr, - "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name); - fprintf(stderr, "Valid values are:\n"); - for(en=(Enum_T *)cmd->p; en->Name; en++) { - if(*en->Name) { - fprintf(stderr, " %s\n", en->Name); - } - } - fprintf(stderr, "\n"); - exit(1); -} - -static int GteError(cmd, n) -Cmd_T *cmd; -int n; -{ - fprintf(stderr, - "Value %d out of range for parameter \"%s\"\n", n, cmd->Name); - fprintf(stderr, "Valid values must be greater than or equal to %d\n", - *(int *)cmd->p); - exit(1); -} - -static int LteError(cmd, n) -Cmd_T *cmd; -int n; -{ - fprintf(stderr, - "Value %d out of range for parameter \"%s\"\n", n, cmd->Name); - fprintf(stderr, "Valid values must be less than or equal to %d\n", - *(int *)cmd->p); - exit(1); -} - -static int SubrangeError(cmd, n) -Cmd_T *cmd; -int n; -{ - fprintf(stderr, - "Value %d out of range for parameter \"%s\"\n", n, cmd->Name); - fprintf(stderr, "Valid values range from %d to %d\n", - *(int *)cmd->p, *((int *)cmd->p+1)); - exit(1); -} - -static int PrintEnum(cmd, ValFlag, fp) -Cmd_T *cmd; -int ValFlag; -FILE *fp; -{ - Enum_T *en; - - fprintf(fp, "%s", cmd->Name); - if(ValFlag) { - for(en=(Enum_T *)cmd->p; en->Name; en++) { - if(*en->Name && en->Idx==*(int *)cmd->Val) { - fprintf(fp, ": %s", en->Name); - } - } - } - fprintf(fp, "\n"); - return 0; -} - -static int PrintStrArray(cmd, ValFlag, fp) -Cmd_T *cmd; -int ValFlag; -FILE *fp; -{ - char *indent, - **s = *(char***)cmd->Val; - int l = 4+strlen(cmd->Name); - - fprintf(fp, "%s", cmd->Name); - indent = malloc(l+2); - memset(indent, ' ', l+1); - indent[l+1] = 0; - if(ValFlag) { - fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : ""); - if(s) while(*s) { - fprintf(fp, "\n%s %s", indent, *s++); - } - } - free(indent); - fprintf(fp, "\n"); - return 0; -} - -static char **str2array(s, sep) -char *s, - *sep; -{ - char *p, - **a; - int n = 0, - l; - - if(!sep) sep = SepString; - p = s += strspn(s, sep); - while(*p) { - p += strcspn(p, sep); - p += strspn(p, sep); - ++n; - } - a = calloc(n+1, sizeof(char *)); - p = s; - n = 0; - while(*p) { - l = strcspn(p, sep); - a[n] = malloc(l+1); - memcpy(a[n], p, l); - a[n][l] = 0; - ++n; - p += l; - p += strspn(p, sep); - } - return a; -} diff --git a/irstlm/src/cmd.h b/irstlm/src/cmd.h deleted file mode 100644 index 708905f6f..000000000 --- a/irstlm/src/cmd.h +++ /dev/null @@ -1,68 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -#if !defined(CMD_H) - -#define CMD_H - -#define CMDDOUBLETYPE 1 -#define CMDENUMTYPE 2 -#define CMDINTTYPE 3 -#define CMDSTRINGTYPE 4 -#define CMDSUBRANGETYPE 5 -#define CMDGTETYPE 6 -#define CMDLTETYPE 7 -#define CMDSTRARRAYTYPE 8 -#define CMDBOOLTYPE 9 - -typedef struct { - char *Name; - int Idx; -} Enum_T; - -typedef struct { - int Type; - char *Name, - *ArgStr; - void *Val, - *p; -} Cmd_T; - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__STDC__) -int DeclareParams(char *, ...); -#else -int DeclareParams(); -#endif - -int GetParams(int *n, char ***a,char *CmdFileName), - SPrintParams(), - PrintParams(); - -#ifdef __cplusplus -} -#endif -#endif - - - diff --git a/irstlm/src/compile-lm.cpp b/irstlm/src/compile-lm.cpp deleted file mode 100644 index 17c152fcf..000000000 --- a/irstlm/src/compile-lm.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit, compile LM - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - - -#include <iostream> -#include <fstream> -#include <vector> -#include <string> -#include <stdlib.h> - -#include "lmtable.h" - - -/* GLOBAL OPTIONS ***************/ -std::string sn = "0"; -std::string sres = "0"; -std::string sdecay = "0.95"; -/********************************/ - -void usage(const char *msg = 0) { - if (msg) { std::cerr << msg << std::endl; } - std::cerr << "Usage: compile-lm [options] input-file.lm [output-file.blm]" << std::endl; - if (!msg) std::cerr << std::endl - << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl - << " a compiled representation that the IRST LM toolkit can quickly" << std::endl - << " read and process." << std::endl << std::endl; - std::cerr << "Options:\n -r=RESOLUTION\n -d=DECAY\n -n=NGRAM SIZE <required>\n\n"; -} - -bool starts_with(const std::string &s, const std::string &pre) { - if (pre.size() > s.size()) return false; - - if (pre == s) return true; - std::string pre_equals(pre+'='); - if (pre_equals.size() > s.size()) return false; - return (s.substr(0,pre_equals.size()) == pre_equals); -} - -std::string get_param(const std::string& opt, int argc, const char **argv, int& argi) -{ - std::string::size_type equals = opt.find_first_of('='); - if (equals != std::string::npos && equals < opt.size()-1) { - return opt.substr(equals+1); - } - std::string nexto; - if (argi + 1 < argc) { - nexto = argv[++argi]; - } else { - usage((opt + " requires a value!").c_str()); - exit(1); - } - return nexto; -} - -void handle_option(const std::string& opt, int argc, const char **argv, int& argi) -{ - if (opt == "--help" || opt == "-h") { usage(); exit(1); } - if (starts_with(opt, "--resolution") || starts_with(opt, "-r")) - sres = get_param(opt, argc, argv, argi); - else if (starts_with(opt, "--decay") || starts_with(opt, "-d")) - sdecay = get_param(opt, argc, argv, argi); - else if (starts_with(opt, "--ngram-size") || starts_with(opt, "-n")) - sn = get_param(opt, argc, argv, argi); - else { - usage(("Don't understand option " + opt).c_str()); - exit(1); - } -} - -int main(int argc, const char **argv) -{ - if (argc < 2) { usage(); exit(1); } - std::vector<std::string> files; - for (int i=1; i < argc; i++) { - std::string opt = argv[i]; - if (opt[0] == '-') { handle_option(opt, argc, argv, i); } - else files.push_back(opt); - } - if (files.size() > 2) { usage("Too many arguments"); exit(1); } - if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); } - double decay = strtod(sdecay.c_str(),0); - int resolution = strtol(sres.c_str(),0,10); - int ngram_size = strtol(sn.c_str(),0,10); - if (ngram_size < 1) { usage("Please specify an ngram size greater than or equal 1 with -n"); exit(1); } - std::string infile = files[0]; - if (files.size() == 1) { - std::string::size_type p = infile.rfind('/'); - if (p != std::string::npos && ((p+1) < infile.size())) { - files.push_back(infile.substr(p+1) + ".blm"); - } else { - files.push_back(infile + ".blm"); - } - } - std::string outfile = files[1]; - std::cout << "Using decay=" << decay << ", resolution=" << resolution << std::endl; - std::cout << "Reading " << infile << "..." << std::endl; - std::ifstream inp(infile.c_str()); - if (!inp.good()) { - std::cerr << "Failed to open " << infile << "!\n"; - exit(1); - } - lmtable lmt(inp); - std::cout << "Saving to " << outfile << std::endl; - lmt.savebin(outfile.c_str()); - return 0; -} - diff --git a/irstlm/src/dictionary.cpp b/irstlm/src/dictionary.cpp deleted file mode 100644 index ab53116a2..000000000 --- a/irstlm/src/dictionary.cpp +++ /dev/null @@ -1,418 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -#include <iomanip> -#include <iostream> -#include <fstream> -#include "mempool.h" -#include "htable.h" -#include "dictionary.h" -#include "index.h" - -using namespace std; - -dictionary::dictionary(char *filename,int size,char* isymb,char* oovlexfile){ - - // unitialized memory - if (oovlexfile!=NULL) - oovlex=new dictionary(oovlexfile,size,isymb,NULL); - else - oovlex=(dictionary *)NULL; - - htb = new htable(size/LOAD_FACTOR); - tb = new dict_entry[size]; - st = new strstack(size * 10); - - for (int i=0;i<size;i++) tb[i].freq=0; - - is=(char*) NULL; - intsymb(isymb); - - oov_code = -1; - in_oov_lex=0; - n = 0; - N = 0; - dubv = 0; - lim = size; - ifl=0; //increment flag - - if (filename==NULL) return; - - std::ifstream inp(filename,ios::in); - - if (!inp){ - cerr << "cannot open " << filename << "\n"; - exit(1); - } - - char buffer[100]; - - inp >> setw(100) >> buffer; - - inp.close(); - - if ((strncmp(buffer,"dict",4)==0) || - (strncmp(buffer,"DICT",4)==0)) - load(filename); - else - generate(filename); - - cerr << "loaded \n"; - - -} - - - -void dictionary::generate(char *filename){ - - char buffer[MAX_WORD]; - int k; - - ifstream inp(filename,ios::in); - - if (!inp){ - cerr << "cannot open " << filename << "\n"; - exit(1); - } - - cerr << "dict:"; - - ifl=1; k=0; - while (inp >> setw(MAX_WORD) >> buffer){ - - if (strlen(buffer)==(MAX_WORD-1)){ - cerr << "dictionary: a too long word was read (" - << buffer << ")\n"; - }; - - - if (strlen(buffer)==0){ - cerr << "zero lenght word!\n"; - continue; - } - - //if (is && (strlen(buffer)==1) && !index(is,buffer[0])) - if (is && (strlen(buffer)==1) && (index(is,buffer[0])!=NULL)) - continue; //skip over the interruption symbol - - incfreq(encode(buffer),1); - - if (!(++k % 1000000)) cerr << "."; - } - ifl=0; - cerr << "\n"; - - inp.close(); - -} - -void dictionary::load(char* filename){ - char header[100]; - char buffer[MAX_WORD]; - char *addr; - int freqflag=0; - - ifstream inp(filename,ios::in); - - if (!inp){ - cerr << "\ncannot open " << filename << "\n"; - exit(1); - } - - cerr << "dict:"; - - inp.getline(header,100); - if (strncmp(header,"DICT",4)==0) - freqflag=1; - else - if (strncmp(header,"dict",4)!=0){ - cerr << "\ndictionary file " << filename << " has a wrong header\n"; - exit(1); - } - - - while (inp >> setw(MAX_WORD) >> buffer){ - - if (strlen(buffer)==(MAX_WORD-1)){ - cerr << "\ndictionary: a too long word was read (" - << buffer << ")\n"; - }; - - tb[n].word=st->push(buffer); - tb[n].code=n; - - if (freqflag) - inp >> tb[n].freq; - else - tb[n].freq=0; - - if ((addr=htb->search((char *)&tb[n].word,HT_ENTER))) - if (addr!=(char *)&tb[n].word){ - cerr << "dictionary::loadtxt wrong entry was found (" - << buffer << ") in position " << n << "\n"; - exit(1); - } - - N+=tb[n].freq; - - if (strcmp(buffer,OOV())==0) oov_code=n; - - if (++n==lim) grow(); - - } - - inp.close(); -} - - -void dictionary::load(std::istream& inp){ - - char buffer[MAX_WORD]; - char *addr; - int size; - - inp >> size; - - for (int i=0;i<size;i++){ - - inp >> buffer; - - tb[n].word=st->push(buffer); - tb[n].code=n; - inp >> tb[n].freq; - N+=tb[n].freq; - - if ((addr=htb->search((char *)&tb[n].word,HT_ENTER))) - if (addr!=(char *)&tb[n].word){ - cerr << "dictionary::loadtxt wrong entry was found (" - << buffer << ") in position " << n << "\n"; - exit(1); - } - - if (strcmp(tb[n].word,OOV())==0) - oov_code=n; - - if (++n==lim) grow(); - } - inp.getline(buffer,MAX_WORD-1); -} - -void dictionary::save(std::ostream& out){ - out << n << "\n"; - for (int i=0;i<n;i++) - out << tb[i].word << " " << tb[i].freq << "\n"; -} - - -int cmpdictentry(const void *a,const void *b){ - dict_entry *ae=(dict_entry *)a; - dict_entry *be=(dict_entry *)b; - return be->freq-ae->freq; -} - -dictionary::dictionary(dictionary* d){ - - //transfer values - - n=d->n; //total entries - N=d->N; //total frequency - lim=d->lim; //limit of entries - oov_code=-1; //code od oov must be re-defined - ifl=0; //increment flag=0; - dubv=d->dubv; //dictionary upperbound transferred - in_oov_lex=0; //does not copy oovlex; - - - //creates a sorted copy of the table - - tb = new dict_entry[lim]; - htb = new htable(lim/LOAD_FACTOR); - st = new strstack(lim * 10); - - for (int i=0;i<n;i++){ - tb[i].code=d->tb[i].code; - tb[i].freq=d->tb[i].freq; - tb[i].word=st->push(d->tb[i].word); - } - - //sort all entries according to frequency - cerr << "sorting dictionary ..."; - qsort(tb,n,sizeof(dict_entry),cmpdictentry); - cerr << "done\n"; - - for (int i=0;i<n;i++){ - - //eventually re-assign oov code - if (d->oov_code==tb[i].code) oov_code=i; - - tb[i].code=i; - htb->search((char *)&tb[i].word,HT_ENTER); - }; - -} - - - -dictionary::~dictionary(){ - delete htb; - delete st; - delete [] tb; -} - -void dictionary::stat(){ - cout << "dictionary class statistics\n"; - cout << "size " << n - << " used memory " - << (lim * sizeof(int) + - htb->used() + - st->used())/1024 << " Kb\n"; -} - -void dictionary::grow(){ - - delete htb; - - cerr << "+\b"; - - dict_entry *tb2=new dict_entry[lim+GROWTH_STEP]; - - memcpy(tb2,tb,sizeof(dict_entry) * lim ); - - delete [] tb; tb=tb2; - - htb=new htable((lim+GROWTH_STEP)/LOAD_FACTOR); - - for (int i=0;i<lim;i++) - - htb->search((char *)&tb[i].word,HT_ENTER); - - for (int i=lim;i<lim+GROWTH_STEP;i++) tb[i].freq=0; - - lim+=GROWTH_STEP; - - -} - -void dictionary::save(char *filename,int freqflag){ - - std::ofstream out(filename,ios::out); - - if (!out){ - cerr << "cannot open " << filename << "\n"; - } - - // header - if (freqflag) - out << "DICTIONARY 0 " << n << "\n"; - else - out << "dictionary 0 " << n << "\n"; - - for (int i=0;i<n;i++){ - out << tb[i].word; - if (freqflag) - out << " " << tb[i].freq; - out << "\n"; - } - - out.close(); -} - - -int dictionary::getcode(const char *w){ - dict_entry* ptr=(dict_entry *)htb->search((char *)&w,HT_FIND); - if (ptr==NULL) return -1; - return ptr->code; -} - -int dictionary::encode(const char *w){ - - //case of strange characters - if (strlen(w)==0){cerr << "0";w=OOV();} - - dict_entry* ptr; - - if ((ptr=(dict_entry *)htb->search((char *)&w,HT_FIND))!=NULL) - return ptr->code; - else{ - if (!ifl){ //do not extend dictionary - if (oov_code==-1){ //did not use OOV yet - cerr << "starting to use OOV words [" << w << "]\n"; - tb[n].word=st->push(OOV()); - htb->search((char *)&tb[n].word,HT_ENTER); - tb[n].code=n; - tb[n].freq=0; - oov_code=n; - if (++n==lim) grow(); - } - //if there is an oov lexicon, check if this word belongs to - dict_entry* oovptr; - if (oovlex){ - if ((oovptr=(dict_entry *)oovlex->htb->search((char *)&w,HT_FIND))!=NULL){ - in_oov_lex=1; - oov_lex_code=oovptr->code; - }else - in_oov_lex=0; - } - return encode(OOV()); - } - else{ //extend dictionary - tb[n].word=st->push((char *)w); - htb->search((char *)&tb[n].word,HT_ENTER); - tb[n].code=n; - tb[n].freq=0; - if (++n==lim) grow(); - return n-1; - } - } -} - - -char *dictionary::decode(int c){ - if (c>=0 && c < n) - return tb[c].word; - else{ - cerr << "decode: code out of boundary\n"; - return OOV(); - } -} - - -dictionary_iter::dictionary_iter(dictionary *dict) : m_dict(dict) { - m_dict->htb->scan(HT_INIT); -} - -dict_entry* dictionary_iter::next() { - return (dict_entry*)m_dict->htb->scan(HT_CONT); -} - - - - - -/* -main(int argc,char **argv){ - dictionary d(argv[1],40000); - d.stat(); - cout << "ROMA" << d.decode(0) << "\n"; - cout << "ROMA:" << d.encode("ROMA") << "\n"; - d.save(argv[2]); -} -*/ diff --git a/irstlm/src/dictionary.h b/irstlm/src/dictionary.h deleted file mode 100644 index 494b240d2..000000000 --- a/irstlm/src/dictionary.h +++ /dev/null @@ -1,209 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -/* - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef MF_DICTIONARY_H -#define MF_DICTIONARY_H - -#include <string.h> -#include <iostream> - -#define MAX_WORD 100 -#define LOAD_FACTOR 5 - -#ifndef GROWTH_STEP -#define GROWTH_STEP 100000 -#endif - -#ifndef DICT_INITSIZE -#define DICT_INITSIZE 100000 -#endif - - -//Begin of sentence symbol -#ifndef BOS_ -#define BOS_ "<s>" -#endif - - -//End of sentence symbol -#ifndef EOS_ -#define EOS_ "</s>" -#endif - -//End of sentence symbol -#ifndef OOV_ -#define OOV_ "_unk_" -#endif - - -typedef struct{ - char *word; - int code; - int freq; -}dict_entry; - -class strstack; -class htable; - -class dictionary{ - strstack *st; //!< stack of strings - dict_entry *tb; //!< entry table - htable *htb; //!< hash table - int n; //!< number of entries - int N; //!< total frequency - int lim; //!< limit of entries - int oov_code; //!< code assigned to oov words - char* is; //!< interruption symbol list - char ifl; //!< increment flag - int dubv; //!< dictionary size upper bound - int in_oov_lex; //!< flag - int oov_lex_code; //< dictionary - char* oov_str; //!< oov string - - public: - - friend class dictionary_iter; - - dictionary* oovlex; //<! additional dictionary - - inline int dub(){return dubv;} - inline int dub(int value){return (dubv=value);} - - inline char *OOV(){return (OOV_);} - inline char *BoS(){return (BOS_);} - inline char *EoS(){return (EOS_);} - - inline int oovcode(int v=-1){return oov_code=(v>=0?v:oov_code);} - - inline char *intsymb(char* isymb=NULL){ - if (isymb==NULL) return is; - if (is!=NULL) delete [] is; - is=new char[strlen(isymb+1)]; - strcpy(is,isymb); - return is=isymb; - } - - inline int incflag(){return ifl;} - inline int incflag(int v){return ifl=v;} - inline int oovlexsize(){return oovlex?oovlex->n:0;} - inline int inoovlex(){return in_oov_lex;} - inline int oovlexcode(){return oov_lex_code;} - - - int isprintable(char* w){ - char buffer[MAX_WORD]; - sprintf(buffer,"%s",w); - return strcmp(w,buffer)==0; - } - - inline void genoovcode(){ - int c=encode(OOV()); - std::cerr << "OOV code is "<< c << std::endl; - oovcode(c); - } - - inline dictionary* oovlexp(char *fname=NULL){ - if (fname==NULL) return oovlex; - if (oovlex!=NULL) delete oovlex; - oovlex=new dictionary(fname,DICT_INITSIZE); - return oovlex; - } - - inline int setoovrate(double oovrate){ - encode(OOV()); //be sure OOV code exists - int oovfreq=(int)(oovrate * totfreq()); - std::cerr << "setting OOV rate to: " << oovrate << " -- freq= " << oovfreq << std::endl; - return freq(oovcode(),oovfreq); - - return 1; - } - - - inline int incfreq(int code,int value){N+=value;return tb[code].freq+=value;} - - inline int multfreq(int code,double value){ - N+=(int)(value * tb[code].freq)-tb[code].freq; - return tb[code].freq=(int)(value * tb[code].freq); - } - - inline int freq(int code,int value=-1){ - if (value>=0){ - N+=value-tb[code].freq; - tb[code].freq=value; - } - return tb[code].freq; - } - - inline int totfreq(){return N;} - - void grow(); - //dictionary(int size=400,char* isym=NULL,char* oovlex=NULL); - dictionary(char *filename=NULL,int size=DICT_INITSIZE,char* isymb=NULL,char* oovlex=NULL); - dictionary(dictionary* d); - - ~dictionary(); - void generate(char *filename); - void load(char *filename); - void save(char *filename,int freqflag=0); - void load(std::istream& fd); - void save(std::ostream& fd); - - int size(){return n;}; - int getcode(const char *w); - int encode(const char *w); - char *decode(int c); - void stat(); - - void cleanfreq(){ - for (int i=0;i<n;tb[i++].freq=0); - N=0; - } - -}; - -class dictionary_iter { - public: - dictionary_iter(dictionary *dict); - dict_entry* next(); - private: - dictionary* m_dict; -}; - -#endif - diff --git a/irstlm/src/htable.cpp b/irstlm/src/htable.cpp deleted file mode 100644 index 1f56723a2..000000000 --- a/irstlm/src/htable.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -#include <iostream> -#include <assert.h> -#include "mempool.h" -#include "htable.h" - -using namespace std; - -htable::htable(int n,int kl,HTYPE ht,size_t (*klf)(const char* )){ - - memory=new mempool( sizeof(entry) , BlockSize ); - - table = new entry* [ size=n ]; - - memset(table,0,sizeof(entry *) * n ); - - keylen=kl; - - htype=ht; - - keys = accesses = collisions = 0; - - keylenfunc=(klf?klf:&strlen); - -} - - -char *htable::search(char *item, HT_ACTION action) - -{ - address h; - entry *q,**p; - int i; - - //if (action == HT_FIND) - accesses++; - - h = Hash(item); - - i=(h % size); - - p = &table[h % size]; - - q=*p; - - /* - ** Follow collision chain - */ - - while (q != NULL && Comp((char *)q->key,(char *)item)) - { - p = (entry **)&q->next; - q=*p; - //if (action == HT_FIND) - collisions++; - } - - if ( - q != NULL /* found */ - || - action == HT_FIND /* not found, search only */ - || - (q = (entry *)memory->alloc()) - == - NULL /* not found, no room */ - ) - - return((q!=NULL)?(char *)q->key:(char *)NULL); - - *p = q; /* link into chain */ - /* - ** Initialize new element - */ - - q->key = item; - q->next = NULL; - keys++; - - return((char *)q->key); -} - - -char *htable::scan(HT_ACTION action){ - - char *k; - - if (action == HT_INIT) - { - scan_i=0;scan_p=table[0]; - return NULL; - } - - // if scan_p==NULL go to the first non null pointer - while ((scan_p==NULL) && (++scan_i<size)) scan_p=table[scan_i]; - - if (scan_p!=NULL) - { - k=scan_p->key; - scan_p=(entry *)scan_p->next; - return k; - }; - - return NULL; -} - - -void htable::map(ostream& co,int cols){ - - entry *p; - char* img=new char[cols+1]; - - img[cols]='\0'; - memset(img,'.',cols); - - co << "htable memory map: . (0 items), - (<5), # (>5)\n"; - - for (int i=0; i<size;i++) - { - int n=0;p=table[i]; - - while(p!=NULL){ - n++; - p=(entry *)p->next; - }; - - if (i && (i % cols)==0){ - co << img << "\n"; - memset(img,'.',cols); - } - - if (n>0) - img[i % cols]=n<=5?'-':'#'; - - } - - img[size % cols]='\0'; - co << img << "\n"; - - delete []img; -} - - -void htable::stat(){ - cout << "htable class statistics\n"; - cout << "size " << size - << " keys " << keys - << " acc " << accesses - << " coll " << collisions - << " used memory " << used()/1024 << "Kb\n"; -} - -htable::~htable() -{ - delete [] table; - delete memory; -} - -address htable::Hash(char *key) -{ - char *Key=(htype==STRPTR? *(char **)key:key); - int length=(keylen?keylen:keylenfunc(Key)); - - //cerr << "hash: " << Key << " length:" << length << "\n"; - - register address h=0; - register int i; - - for (i=0,h=0;i<length;i++) - h = h * Prime1 ^ (Key[i] - ' '); - h %= Prime2; - - return h; -} - - -int htable::Comp(char *key1, char *key2) -{ - assert(key1 && key2); - - char *Key1=(htype==STRPTR?*(char **)key1:key1); - char *Key2=(htype==STRPTR?*(char **)key2:key2); - - assert(Key1 && Key2); - - int length1=(keylen?keylen:keylenfunc(Key1)); - int length2=(keylen?keylen:keylenfunc(Key2)); - - if (length1!=length2) return 1; - - register int i; - - for (i=0;i<length1;i++) - if (Key1[i]!=Key2[i]) return 1; - return 0; -} - - - -/* -main(){ - -const int n=1000; - -htable *ht=new htable(1000/5); - - char w[n][20]; - char *c; - - for (int i=0;i<n;i++) - { - sprintf(w[i],"ciao%d",i); - ht->search((char *)&w[i],HT_ENTER); - } - - for (int i=0;i<n;i++) - if (ht->search((char *)&w[i],HT_FIND)) - cout << w[i] << " trovato\n" ; - else - cout << w[i] << " non trovato\n"; - - ht->stat(); - - delete ht; - htable *ht2=new htable(n); - for (int i=0;i<n;i++) - ht2->search((char *)&w[i],HT_ENTER); - - ht2->scan(INIT); - cout << "elenco:\n"; - while ((c=ht2->scan(CONT))!=NULL) - cout << *(char **) c << "\n"; - - ht2->map(); -} -*/ - - - - - - - diff --git a/irstlm/src/htable.h b/irstlm/src/htable.h deleted file mode 100644 index 3fd484094..000000000 --- a/irstlm/src/htable.h +++ /dev/null @@ -1,125 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -/* - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef MF_HTABLE_H -#define MF_HTABLE_H - -#include <iostream> - -#define Prime1 37 -#define Prime2 1048583 -#define BlockSize 100 - - -// Fast arithmetic, relying on powers of 2, -// and on pre-processor concatenation property - -typedef struct{ - char* key; - char* next; // secret from user -}entry; - -typedef unsigned int address; - -typedef enum {HT_FIND, //!< search: find an entry - HT_ENTER, //!< search: enter an entry - HT_INIT, //!< scan: start scan - HT_CONT //!< scan: continue scan -} HT_ACTION; - -typedef enum {STR, //!< string - STRPTR //!< pointer to string -}HTYPE; - -//! Hash Table for strings - -class htable { - int size; //!< table size - int keylen; //!< key length - HTYPE htype; //!< type of entry pointer - entry **table; //!< hash table - int scan_i; //!< scan support - entry *scan_p; //!< scan support - // statistics - long keys; //!< # of entries - long accesses; //!< # of accesses - long collisions; //!< # of collisions - - mempool *memory; //!< memory pool - - size_t (*keylenfunc)(const char*); //!< function computing key length - - public: - - //! Creates an hash table - htable(int n,int kl=0,HTYPE ht=STRPTR,size_t (*klf)(const char* )=NULL); - - //! Destroys an and hash table - ~htable(); - - //! Computes the hash function - address Hash(char *key); - - //! Compares the keys of two entries - int Comp(char *Key1,char *Key2); - - //! Searches for an item - char *search(char *item, HT_ACTION action); - - //! Scans the content - char *scan(HT_ACTION action); - - //! Prints statistics - void stat(); - - //! Print a map of memory use - void map(std::ostream& co=std::cout, int cols=80); - - //! Returns amount of used memory - int used(){return - size * sizeof(entry **) + - memory->used();}; -}; - - - -#endif - - - diff --git a/irstlm/src/index.h b/irstlm/src/index.h deleted file mode 100644 index 500587989..000000000 --- a/irstlm/src/index.h +++ /dev/null @@ -1,19 +0,0 @@ - - -#pragma once - -#ifdef WIN32 - -inline const char *index(const char *str, char search) -{ - int i=0; - while (i< strlen(str) ){ - if (str[i]==search) return &str[i]; - } - return NULL; -} - - -#endif - - diff --git a/irstlm/src/lmtable.cpp b/irstlm/src/lmtable.cpp deleted file mode 100644 index 997102da2..000000000 --- a/irstlm/src/lmtable.cpp +++ /dev/null @@ -1,728 +0,0 @@ -/* - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <iostream> -#include <stdexcept> -#include <assert.h> - -#include "math.h" -#include "mempool.h" -#include "htable.h" -#include "dictionary.h" -#include "ngram.h" -#include "lmtable.h" - -using namespace std; - -inline void error(char* message){ - cerr << message << "\n"; - throw std::runtime_error(message); -} - -lmtable::lmtable(std::istream& inp){ - - //initialization - maxlev=1; - - memset(cursize, 0, sizeof(cursize)); - memset(tbltype, 0, sizeof(tbltype)); - memset(maxsize, 0, sizeof(maxsize)); - memset(info, 0, sizeof(info)); - memset(NumCenters, 0, sizeof(NumCenters)); - - dict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL); - - //default settings is a non quantized lmtable - configure(1,isQtable=0); - - char header[1024]; - - inp >> header; cerr << header << "\n"; - - if (strncmp(header,"Qblmt",5)==0 || strncmp(header,"blmt",4)==0) - loadbin(inp, header); - else - loadtxt(inp, header); - - dict->genoovcode(); - - cerr << "OOV code is " << dict->oovcode() << "\n"; -} - - -int parseWords(char *sentence, char **words, int max) -{ - char *word; - int i = 0; - - char *const wordSeparators = " \t\r\n"; - - for (word = strtok(sentence, wordSeparators); - i < max && word != 0; - i++, word = strtok(0, wordSeparators)) - { - words[i] = word; - } - - if (i < max){words[i] = 0;} - - return i; -} - - - -//Load a LM as a text file. LM could have been generated either with the -//IRST LM toolkit or with the SRILM Toolkit. In the latter we are not -//sure that n-grams are lexically ordered (according to the 1-grams). -//However, we make the following assumption: -//"all successors of any prefix are sorted and written in contiguous lines!" -//This method also loads files processed with the quantization -//tool: qlm - -void parseline(std::istream& inp, int Order,ngram& ng,float& prob,float& bow){ - - char* words[1+ LMTMAXLEV + 1 + 1]; - int howmany; - char line[1024]; - - inp.getline(line,1024); - - howmany = parseWords(line, words, Order + 3); - assert(howmany == (Order+ 1) || howmany == (Order + 2)); - - //read words - ng.size=0; - for (int i=1;i<=Order;i++) - ng.pushw(strcmp(words[i],"<unk>")?words[i]:ng.dict->OOV()); - //read logprob/code and logbow/code - assert(sscanf(words[0],"%f",&prob)); - if (howmany==(Order+2)) - assert(sscanf(words[Order+1],"%f",&bow)); - else - bow=0.0; //this is log10prob=0 for implicit backoff -} - - -void lmtable::loadcenters(std::istream& inp,int Order){ - char line[11]; - - //first read the coodebook - cerr << Order << " read code book "; - inp >> NumCenters[Order]; - Pcenters[Order]=new float[NumCenters[Order]]; - Bcenters[Order]=(Order<maxlev?new float[NumCenters[Order]]:NULL); - - for (int c=0;c<NumCenters[Order];c++){ - inp >> Pcenters[Order][c]; - if (Order<maxlev) inp >> Bcenters[Order][c]; - }; - //empty the last line - inp.getline((char*)line,10); - -} - - -void lmtable::loadtxt(std::istream& inp, const char* header){ - - //open input stream and prepare an input string - char line[1024]; - - //prepare word dictionary - //dict=(dictionary*) new dictionary(NULL,1000000,NULL,NULL); - dict->incflag(1); - - //put here ngrams, log10 probabilities or their codes - ngram ng(dict); - float prob,bow,log10=(float)log(10.0); - - //check the header to decide if the LM is quantized or not - isQtable=(strncmp(header,"qARPA",5)==0?true:false); - - //we will configure the table later we we know the maxlev; - bool yetconfigured=false; - - cerr << "loadtxt()\n"; - - // READ ARPA Header - int Order, n; - - while (inp.getline(line,1024)){ - - bool backslash = (line[0] == '\\'); - - if (sscanf(line, "ngram %d=%d", &Order, &n) == 2) { - maxsize[Order] = n; maxlev=Order; //upadte Order - } - - if (backslash && sscanf(line, "\\%d-grams", &Order) == 1) { - - //at this point we are sure about the size of the LM - if (!yetconfigured) {configure(maxlev,isQtable);yetconfigured=true;} - - cerr << Order << "-grams: reading "; - - if (isQtable) loadcenters(inp,Order); - - //allocate space for loading the table of this level - table[Order]= new char[maxsize[Order] * nodesize(tbltype[Order])]; - - //allocate support vector to manage badly ordered n-grams - if (maxlev>1) { - startpos[Order]=new int[maxsize[Order]]; - for (int c=0;c<maxsize[Order];c++) startpos[Order][c]=-1; - } - - //prepare to read the n-grams entries - cerr << maxsize[Order] << " entries\n"; - - //WE ASSUME A WELL STRUCTURED FILE!!! - - for (int c=0;c<maxsize[Order];c++){ - - parseline(inp,Order,ng,prob,bow); - - //add to table - add(ng, - (int)(isQtable?prob:exp(prob * log10)*UNIGRAM_RESOLUTION), - (int)(isQtable?bow:exp(bow * log10)*UNIGRAM_RESOLUTION)); - } - // now we can fix table at level Order -1 - if (maxlev>1 && Order>1) checkbounds(Order-1); - } - } - - dict->incflag(0); - cerr << "done\n"; - -} - -//set all bounds of entries with no successors to the bound -//of the previous entry. - -void lmtable::checkbounds(int level){ - - char* tbl=table[level]; - char* succtbl=table[level+1]; - - LMT_TYPE ndt=tbltype[level], succndt=tbltype[level+1]; - int ndsz=nodesize(ndt), succndsz=nodesize(succndt); - - //re-order table at level+1 - char* newtbl=new char[succndsz * cursize[level+1]]; - int start,end,newstart; - - //re-order table at - newstart=0; - for (int c=0;c<cursize[level];c++){ - start=startpos[level][c]; end=bound(tbl+c*ndsz,ndt); - //is start==-1 there are no successors for this entry and end==-2 - if (end==-2) end=start; - assert(start<=end); - assert(newstart+(end-start)<=cursize[level+1]); - assert(end<=cursize[level+1]); - - if (start<end) - memcpy((void*)(newtbl + newstart * succndsz), - (void*)(succtbl + start * succndsz), - (end-start) * succndsz); - - bound(tbl+c*ndsz,ndt,newstart+(end-start)); - newstart+=(end-start); - } - delete [] table[level+1]; - table[level+1]=newtbl; - newtbl=NULL; -} - -//Add method inserts n-grams in the table structure. It is ONLY used during -//loading of LMs in text format. It searches for the prefix, then it adds the -//suffix to the last level and updates the start-end positions. - -int lmtable::add(ngram& ng,int iprob,int ibow){ - - char *found; LMT_TYPE ndt; int ndsz; - - if (ng.size>1){ - - // find the prefix starting from the first level - int start=0, end=cursize[1]; - - for (int l=1;l<ng.size;l++){ - - ndt=tbltype[l]; ndsz=nodesize(ndt); - - if (search(table[l] + (start * ndsz),ndt,l,(end-start),ndsz, - ng.wordp(ng.size-l+1),LMT_FIND, &found)){ - - //update start-end positions for next step - if (l< (ng.size-1)){ - //set start position - if (found==table[l]) start=0; //first pos in table - else start=bound(found - ndsz,ndt); //end of previous entry - - //set end position - end=bound(found,ndt); - } - } - else{ - cerr << "warning: missing back-off for ngram " << ng << "\n"; - return 0; - } - } - - // update book keeping information about level ng-size -1. - // if this is the first successor update start position - int position=(found-table[ng.size-1])/ndsz; - if (startpos[ng.size-1][position]==-1) - startpos[ng.size-1][position]=cursize[ng.size]; - - //always update ending position - bound(found,ndt,cursize[ng.size]+1); - //cout << "startpos: " << startpos[ng.size-1][position] - //<< " endpos: " << bound(found,ndt) << "\n"; - - } - - // just add at the end of table[ng.size] - - assert(cursize[ng.size]< maxsize[ng.size]); // is there enough space? - ndt=tbltype[ng.size];ndsz=nodesize(ndt); - - found=table[ng.size] + (cursize[ng.size] * ndsz); - word(found,*ng.wordp(1)); - prob(found,ndt,iprob); - if (ng.size<maxlev){bow(found,ndt,ibow);bound(found,ndt,-2);} - - cursize[ng.size]++; - - return 1; - -} - - -void *lmtable::search(char* tb, - LMT_TYPE ndt, - int lev, - int n, - int sz, - int *ngp, - LMT_ACTION action, - char **found){ - - //prepare search pattern - char w[LMTCODESIZE];putmem(w,ngp[0],0,LMTCODESIZE); - - int idx=0; // index returned by mybsearch - if (found) *found=NULL; //initialize output variable - switch(action){ - case LMT_FIND: - if (!tb || !mybsearch(tb,n,sz,(unsigned char *)w,&idx)) - return 0; - else - if (found) *found=tb + (idx * sz); - return tb + (idx * sz); - default: - error("lmtable::search: this option is available"); - }; - - return (void *)0x0; -} - - -int lmtable::mybsearch(char *ar, int n, int size, - unsigned char *key, int *idx) -{ - register int low, high; - register unsigned char *p; - register int result; - register int i; - - /* return idx with the first - position equal or greater than key */ - - /* Warning("start bsearch \n"); */ - - low = 0;high = n; *idx=0; - while (low < high) - { - *idx = (low + high) / 2; - p = (unsigned char *) (ar + (*idx * size)); - - //comparison - for (i=(LMTCODESIZE-1);i>=0;i--){ - result=key[i]-p[i]; - if (result) break; - } - - if (result < 0) - high = *idx; - else if (result > 0) - low = *idx + 1; - else - return 1; - } - - *idx=low; - - return 0; - -} - - -// saves a LM table in text format - -void lmtable::savetxt(const char* filename){ - - fstream out(filename,ios::out); - int l; - - out.precision(6); - - if (isQtable) out << "qARPA\n"; - - - ngram ng(dict,0); - - cerr << "savetxt()\n"; - - out << "\n\\data\\\n"; - for (l=1;l<=maxlev;l++){ - out << "ngram " << l << "= " << cursize[l] << "\n"; - } - - for (l=1;l<=maxlev;l++){ - - out << "\n\\" << l << "-grams:\n"; - cerr << "save: " << cursize[l] << " " << l << "-grams\n"; - if (isQtable){ - out << NumCenters[l] << "\n"; - for (int c=0;c<NumCenters[l];c++){ - out << Pcenters[l][c]; - if (l<maxlev) out << " " << Bcenters[l][c]; - out << "\n"; - } - } - - ng.size=0; - dumplm(out,ng,1,l,0,cursize[1]); - - } - - out << "\\end\\\n"; - cerr << "done\n"; -} - - -void lmtable::savebin(const char *filename){ - - fstream out(filename,ios::out); - cerr << "savebin: " << filename << "\n"; - - // print header - if (isQtable){ - out << "Qblmt " << maxlev; - for (int i=1;i<=maxlev;i++) out << " " << cursize[i]; - out << "\nNumCenters"; - for (int i=1;i<=maxlev;i++) out << " " << NumCenters[i]; - out << "\n"; - - }else{ - out << "blmt " << maxlev; - for (int i=1;i<=maxlev;i++) out << " " << cursize[i] ; - out << "\n"; - } - - dict->save(out); - - for (int i=1;i<=maxlev;i++){ - cerr << "saving " << cursize[i] << " " << i << "-grams\n"; - if (isQtable){ - out.write((char*)Pcenters[i],NumCenters[i] * sizeof(float)); - if (i<maxlev) - out.write((char *)Bcenters[i],NumCenters[i] * sizeof(float)); - } - out.write(table[i],cursize[i]*nodesize(tbltype[i])); - } - - cerr << "done\n"; -} - - -void lmtable::loadbin(std::istream& inp, const char *header){ - - cerr << "loadbin()\n"; - - // read header - inp >> maxlev; - - if (strncmp(header,"Qblmt",5)==0) isQtable=1; - else if(strncmp(header,"blmt",4)==0) isQtable=0; - else error("loadbin: wrong header"); - - configure(maxlev,isQtable); - - for (int i=1;i<=maxlev;i++){ - inp >> cursize[i]; maxsize[i]=cursize[i]; - table[i]=new char[cursize[i] * nodesize(tbltype[i])]; - } - - if (isQtable){ - cerr << "reading num centers:"; - char tmp[1024]; - inp >> tmp; - for (int i=1;i<=maxlev;i++){ - inp >> NumCenters[i];cerr << " " << NumCenters[i]; - Pcenters[i]=new float [NumCenters[i]]; - Bcenters[i]=(i<maxlev?new float [NumCenters[i]]:NULL); - } - cerr << "\n"; - } - - //dict=new dictionary(NULL,1000000,NULL,NULL); - dict->load(inp); - - for (int i=1;i<=maxlev;i++){ - if (isQtable){ - inp.read((char*)Pcenters[i],NumCenters[i] * sizeof(float)); - if (i<maxlev) inp.read((char *)Bcenters[i],NumCenters[i]*sizeof(float)); - } - cerr << "loading " << cursize[i] << " " << i << "-grams\n"; - inp.read(table[i],cursize[i]*nodesize(tbltype[i])); - } - - cerr << "done\n"; -} - - - -int lmtable::get(ngram& ng,int n,int lev){ - - // cout << "cerco:" << ng << "\n"; - - if (lev > maxlev) error("get: lev exceeds maxlevel"); - if (n < lev) error("get: ngram is too small"); - - //set boudaries for 1-gram - int offset=0,limit=cursize[1]; - - //information of table entries - char* found; LMT_TYPE ndt; - - for (int l=1;l<=lev;l++){ - - //initialize entry information - found = NULL; ndt=tbltype[l]; - - //search in table at level i - search(table[l] + (offset * nodesize(ndt)), - ndt, - l, - (limit-offset), - nodesize(ndt), - ng.wordp(n-l+1), - LMT_FIND, - &found); - - if (!found) return 0; - - if (l<maxlev){ //set start/end point for next search - - //if current offset is at the bottom also that of successors will be - if (offset+1==cursize[l]) limit=cursize[l+1]; - else limit=bound(found,ndt); - - //if current start is at the begin, then also that of successors will be - if (found==table[l]) offset=0; - else offset=bound((found - nodesize(ndt)),ndt); - - assert(offset!=-1); assert(limit!=-1); - } - } - - //put information inside ng - ng.size=n; ng.lev=lev; ng.freq=0; ng.link=found; ng.info=ndt; - ng.succ=(lev<maxlev?limit-offset:0); - - return 1; -} - - -//recursively prints the language model table - -void lmtable::dumplm(std::ostream& out,ngram ng, int ilev, int elev, int ipos,int epos){ - - LMT_TYPE ndt=tbltype[ilev]; - int ndsz=nodesize(ndt); - float log10=log(10.0); - - assert(ng.size==ilev-1); - assert(ipos>=0 && epos<=cursize[ilev] && ipos<epos); - ng.pushc(0); - - for (int i=ipos;i<epos;i++){ - *ng.wordp(1)=word(table[ilev]+i*ndsz); - if (ilev<elev){ - //get first and last successor position - int isucc=(i>0?bound(table[ilev]+(i-1)*ndsz,ndt):0); - int esucc=bound(table[ilev]+i*ndsz,ndt); - if (isucc < esucc) //there are successors! - dumplm(out,ng,ilev+1,elev,isucc,esucc); - //else - //cout << "no successors for " << ng << "\n"; - } - else{ - //out << i << " "; //this was just to count printed n-grams - int ipr=prob(table[ilev]+ i * ndsz,ndt); - out << (isQtable?ipr:log((ipr+1)/UNIGRAM_RESOLUTION)/log10) <<"\t"; - for (int k=ng.size;k>=1;k--){ - if (k<ng.size) out << " "; - out << dict->decode(*ng.wordp(k)); - } - int ibo=(int)(ilev<maxlev?bow(table[ilev]+ i * ndsz,ndt):UNIGRAM_RESOLUTION); - if (ibo!=UNIGRAM_RESOLUTION) - out << "\t" << (isQtable?ibo:log((ibo+1)/UNIGRAM_RESOLUTION)/log10); - out << "\n"; - } - } -} - -//succscan iteratively returns all successors of an ngram h for which -//get(h,h.size,h.size) returned true. - - -int lmtable::succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){ - assert(lev==h.lev+1 && h.size==lev && lev<=maxlev); - - LMT_TYPE ndt=tbltype[h.lev]; - int ndsz=nodesize(ndt); - - switch (action){ - - case LMT_INIT: - //reset ngram local indexes - - ng.size=lev; - ng.trans(h); - ng.midx[lev]=(h.link>table[h.lev]?bound(h.link-ndsz,ndt):0); - - return 1; - - case LMT_CONT: - - if (ng.midx[lev]<bound(h.link,ndt)) - { - //put current word into ng - *ng.wordp(1)=word(table[lev]+ng.midx[lev]*nodesize(tbltype[lev])); - ng.midx[lev]++; - return 1; - } - else - return 0; - - default: - cerr << "succscan: only permitted options are LMT_INIT and LMT_CONT\n"; - exit(0); - } - -} - -//maxsuffptr returns the largest suffix of an n-gram that is contained -//in the LM table. This can be used as a compact representation of the -//(n-1)-gram state of a n-gram LM. if the input k-gram has k>=n then it -//is trimmed to its n-1 suffix. - -const char *lmtable::maxsuffptr(ngram ong){ - - if (ong.size==0) return (char*) NULL; - if (ong.size>=maxlev) ong.size=maxlev-1; - - ngram ng(dict); //eventually use the <unk> word - ng.trans(ong); - - if (get(ng,ng.size,ng.size)) - return ng.link; - else{ - ong.size--; -#ifndef WIN32 -#warning maxsuffptr is not implemented -#endif - exit(1); -// return getstate(ong); - } -} - - -// returns the probability of an n-gram - -double lmtable::prob(const ngram& ong){ - - if (ong.size==0) return 0.0; - - ngram ng(dict); - ng.trans(ong); - if (ong.size>maxlev) ng.size=maxlev; - - double rbow; - int ibow,iprob; - LMT_TYPE ndt; - - if (get(ng,ng.size,ng.size)){ - ndt=(LMT_TYPE)ng.info; iprob=prob(ng.link,ndt); - return (double)(isQtable?Pcenters[ng.size][iprob] - :(iprob+1.0)/UNIGRAM_RESOLUTION); - } - else{ //size==1 means an OOV word - if (ng.size==1) return (double)1.0/UNIGRAM_RESOLUTION; - else{ // compute backoff - //set backoff state, shift n-gram, set default bow prob - bo_state(1); ng.shift();rbow=1.0; - if (get(ng)){ - ndt= (LMT_TYPE)ng.info; ibow=bow(ng.link,ndt); - rbow= (double) (isQtable?Bcenters[ng.size][ibow]:(ibow+1.0)/UNIGRAM_RESOLUTION); - } - //prepare recursion step - ng.size--; - return rbow * prob(ng); - } - } -} - - -void lmtable::stat(int level){ - int totmem=0,memory; - float mega=1024 * 1024; - - cout.precision(2); - - cout << "lmtable class statistics\n"; - - cout << "levels " << maxlev << "\n"; - for (int l=1;l<=maxlev;l++){ - memory=cursize[l] * nodesize(tbltype[l]); - cout << "lev " << l - << " entries "<< cursize[l] - << " used mem " << memory/mega << "Mb\n"; - totmem+=memory; - } - - cout << "total allocated mem " << totmem/mega << "Mb\n"; - - if (level >1 ) dict->stat(); - -} diff --git a/irstlm/src/lmtable.h b/irstlm/src/lmtable.h deleted file mode 100644 index 9e392bc9a..000000000 --- a/irstlm/src/lmtable.h +++ /dev/null @@ -1,245 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -/* - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef MF_LMTABLE_H -#define MF_LMTABLE_H - -#include "ngram.h" - -#define LMTMAXLEV 11 - -#ifndef LMTCODESIZE -#define LMTCODESIZE (int)3 -#endif - -#define SHORTSIZE (int)2 -#define PTRSIZE (int)sizeof(char *) -#define INTSIZE (int)4 -#define CHARSIZE (int)1 - -#define PROBSIZE (int)4 //use float -#define QPROBSIZE (int)1 -#define BOUNDSIZE (int)4 - -#define UNIGRAM_RESOLUTION 10000000.0 - -typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE; -typedef char* node; - -typedef enum {LMT_FIND, //!< search: find an entry - LMT_ENTER, //!< search: enter an entry - LMT_INIT, //!< scan: start scan - LMT_CONT //!< scan: continue scan -} LMT_ACTION; - - -class lmtable{ - - char* table[LMTMAXLEV]; //storage of all levels - LMT_TYPE tbltype[LMTMAXLEV]; //table type for each levels - int cursize[LMTMAXLEV]; //current size of levels - int maxsize[LMTMAXLEV]; //current size of levels - int* startpos[LMTMAXLEV]; //support vector to store start positions - - int maxlev; //max level of table - char info[100]; //information put in the header - - //probability quantization - bool isQtable; - - int NumCenters[LMTMAXLEV]; - float* Pcenters[LMTMAXLEV]; - float* Bcenters[LMTMAXLEV]; - - int lmt_oov_code; - int lmt_oov_size; - int backoff_state; - - - public: - - dictionary *dict; // dictionary - - lmtable(std::istream& in); - - ~lmtable(){ - for (int i=1;i<=maxlev;i++){ - delete [] table[i]; - if (isQtable){ - delete [] Pcenters[i]; - if (i<maxlev) delete [] Bcenters[i]; - } - } - } - - void configure(int n,bool quantized){ - maxlev=n; - if (n==1) - tbltype[1]=(quantized?QLEAF:LEAF); - else{ - for (int i=1;i<n;i++) tbltype[i]=(quantized?QINTERNAL:INTERNAL); - tbltype[n]=(quantized?QLEAF:LEAF); - } - }; - - int maxlevel(){return maxlev;}; - - void savetxt(const char *filename); - void savebin(const char *filename); - void dumplm(std::ostream& out,ngram ng, int ilev, int elev, int ipos,int epos); - - void loadtxt(std::istream& in, const char* header); - void loadbin(std::istream& in, const char* header); - - void loadcenters(std::istream& inp,int Order); - - double prob(const ngram& ng); - - void *search(char *tb,LMT_TYPE ndt,int lev,int n,int sz,int *w, - LMT_ACTION action,char **found=(char **)NULL); - - int mybsearch(char *ar, int n, int size, unsigned char *key, int *idx); - - int add(ngram& ng,int prob,int bow); - void checkbounds(int level); - - int get(ngram& ng){return get(ng,ng.size,ng.size);} - int get(ngram& ng,int n,int lev); - - int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev); - const char *maxsuffptr(ngram ong); - inline int putmem(char* ptr,int value,int offs,int size){ - assert(ptr!=NULL); - for (int i=0;i<size;i++) - ptr[offs+i]=(value >> (8 * i)) & 0xff; - return value; - }; - - inline int getmem(char* ptr,int* value,int offs,int size){ - assert(ptr!=NULL); - *value=ptr[offs] & 0xff; - for (int i=1;i<size;i++) - *value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i)); - return *value; - }; - - - int bo_state(int value=-1){ - return (value==-1?backoff_state:backoff_state=value); - }; - - - int nodesize(LMT_TYPE ndt){ - switch (ndt){ - case INTERNAL: - return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE; - case QINTERNAL: - return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE; - case QLEAF: - return LMTCODESIZE + QPROBSIZE; - case LEAF: - return LMTCODESIZE + PROBSIZE; - default: - assert(0); - return 0; - } - } - - inline int word(node nd,int value=-1) - { - int offset=0; - - if (value==-1) - getmem(nd,&value,offset,LMTCODESIZE); - else - putmem(nd,value,offset,LMTCODESIZE); - - return value; - }; - - inline int prob(node nd,LMT_TYPE ndt, int value=-1) - { - int offs=LMTCODESIZE; - int size=(ndt==QINTERNAL || ndt==QLEAF?QPROBSIZE:PROBSIZE); - - if (value==-1) - getmem(nd,&value,offs,size); - else - putmem(nd,value,offs,size); - - return value; - }; - - - inline int bow(node nd,LMT_TYPE ndt, int value=-1) - { - assert(ndt==INTERNAL || ndt==QINTERNAL); - int size=(ndt==QINTERNAL?QPROBSIZE:PROBSIZE); - int offs=LMTCODESIZE+size; - - if (value==-1) - getmem(nd,&value,offs,size); - else - putmem(nd,value,offs,size); - - return value; - }; - - inline int bound(node nd,LMT_TYPE ndt, int value=-1) - { - assert(ndt==INTERNAL || ndt==QINTERNAL); - int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE); - - if (value==-1) - getmem(nd,&value,offs,BOUNDSIZE); - else - putmem(nd,value,offs,BOUNDSIZE); - - return value; - }; - - void stat(int lev=0); - -}; - -#endif - - - - diff --git a/irstlm/src/mempool.cpp b/irstlm/src/mempool.cpp deleted file mode 100644 index 00a9777d0..000000000 --- a/irstlm/src/mempool.cpp +++ /dev/null @@ -1,516 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -// An efficient memory pool manager -// by M. Federico -// Copyright Marcello Federico, ITC-irst, 1998 - -#include <iostream> -#include <assert.h> -#include "mempool.h" - -using namespace std; - -/*! The pool contains: - - entries of size is - - tables for bs entries -*/ - -mempool::mempool(int is, int bs){ - - // item size must be multiple of memory alignment step (4 bytes) - // example: is is=9 becomes i=12 (9 + 4 - 9 %4 ) - - is=(is>(int)sizeof(char *)?is:0); - - is=is + sizeof(char *) - (is % sizeof(char *)); - - item_size = is; - - block_size = bs; - - true_size = is * bs; - - block_list = new memnode; - - block_list->block = new char[true_size]; - - memset(block_list->block,'0',true_size); - - block_list->next = 0; - - blocknum = 1; - - entries = 0; - - // build free list - - char *ptr = free_list = block_list->block; - - for (int i=0;i<block_size-1;i++) { - *(char **)ptr= ptr + item_size; - ptr+=item_size; - } - *(char **)ptr = NULL; //last item - -} - - -char * mempool::alloc(){ - - char *ptr; - - if (free_list==NULL) - { - memnode *new_block = new memnode; - - new_block->block = new char[true_size]; - - memset(new_block->block,'0',true_size); - - new_block->next = block_list; - - block_list=new_block; // update block list - - /* update free list */ - - ptr = free_list = block_list->block; - - for (int i=0;i<block_size-1;i++) { - *(char **)ptr = ptr + item_size; - ptr = ptr + item_size; - } - - *(char **)ptr=NULL; - - blocknum++; - } - - ptr = free_list; - - free_list=*(char **)ptr; - - *(char **)ptr=NULL; // reset the released item - - entries++; - - return ptr; - -} - - -int mempool::free(char* addr){ - - // do not check if it belongs to this pool !! - /* - memnode *list=block_list; - while ((list != NULL) && - ((addr < list->block) || - (addr >= (list->block + true_size)))) - list=list->next; - - if ((list==NULL) || (((addr - list->block) % item_size)!=0)) - { - //cerr << "mempool::free-> addr does not belong to this pool\n"; - return 0; - } - */ - - *(char **)addr=free_list; - free_list=addr; - - entries--; - - return 1; -} - - -mempool::~mempool() -{ - memnode *ptr; - - while (block_list !=NULL){ - ptr=block_list->next; - delete [] block_list->block; - delete block_list; - block_list=ptr; - } - -} - -void mempool::map (ostream& co){ - - co << "mempool memory map:\n"; - //percorri piu` volte la lista libera - - memnode *bl=block_list; - char *fl=free_list; - - char* img=new char[block_size+1]; - img[block_size]='\0'; - - while (bl !=NULL){ - - memset(img,'#',block_size); - - fl=free_list; - while (fl != NULL){ - if ((fl >= bl->block) - && - (fl < bl->block + true_size)) - { - img[(fl-bl->block)/item_size]='-'; - } - - fl=*(char **)fl; - } - - co << img << "\n"; - bl=bl->next; - } - delete [] img; -} - -void mempool::stat(){ - - cout << "mempool class statistics\n" - << "entries " << entries - << " blocks " << blocknum - << " used memory " << (blocknum * true_size)/1024 << " Kb\n"; -} - - - -strstack::strstack(int bs){ - - size=bs; - list=new memnode; - - list->block=new char[size]; - - list->next=0; - - memset(list->block,'\0',size); - idx=0; - - waste=0; - memory=size; - entries=0; - blocknum=1; - -} - - -void strstack::stat(){ - - cout << "strstack class statistics\n" - << "entries " << entries - << " blocks " << blocknum - << " used memory " << memory/1024 << " Kb\n"; -} - - -char *strstack::push(char *s){ - int len=strlen(s); - - if ((len+1) >= size){ - cerr << "strstack::push string is too long\n"; - exit(1); - }; - - if ((idx+len+1) >= size){ - //append a new block - //there must be space to - //put the index after - //the word - - waste+=size-idx; - blocknum++; - memory+=size; - - memnode* nd=new memnode; - nd->block=new char[size]; - nd->next=list; - - list=nd; - - memset(list->block,'\0',size); - - idx=0; - - } - - // append in current block - - strcpy(&list->block[idx],s); - - idx+=len+1; - - entries++; - - return &list->block[idx-len-1]; - -} - - -char *strstack::pop(){ - - if (list==0) return 0; - - if (idx==0){ - - // free this block and go to next - - memnode *ptr=list->next; - - delete [] list->block; - delete list; - - list=ptr; - - if (list==0) - return 0; - else - idx=size-1; - } - - //go back to first non \0 - while (idx>0) - if (list->block[idx--]!='\0') - break; - - //go back to first \0 - while (idx>0) - if (list->block[idx--]=='\0') - break; - - entries--; - - if (list->block[idx+1]=='\0') - { - idx+=2; - memset(&list->block[idx],'\0',size-idx); - return &list->block[idx]; - } - else{ - idx=0; - memset(&list->block[idx],'\0',size); - return &list->block[0]; - } -} - - -char *strstack::top(){ - - int tidx=idx; - memnode *tlist=list; - - if (tlist==0) return 0; - - if (idx==0){ - - tlist=tlist->next; - - if (tlist==0) return 0; - - tidx=size-1; - } - - //go back to first non \0 - while (tidx>0) - if (tlist->block[tidx--]!='\0') - break; - - //aaa\0bbb\0\0\0\0 - - //go back to first \0 - while (tidx>0) - if (tlist->block[tidx--]=='\0') - break; - - if (tlist->block[tidx+1]=='\0') - { - tidx+=2; - return &tlist->block[tidx]; - } - else{ - tidx=0; - return &tlist->block[0]; - } - -} - - -strstack::~strstack(){ - memnode *ptr; - while (list !=NULL){ - ptr=list->next; - delete [] list->block; - delete list; - list=ptr; - } -} - - -storage::storage(int maxsize,int blocksize) -{ - newmemory=0; - newcalls=0; - setsize=maxsize; - poolsize=blocksize; //in bytes - poolset=new mempool* [setsize+1]; - for (int i=0;i<=setsize;i++) - poolset[i]=NULL; -} - - -storage::~storage(){ - for (int i=0;i<=setsize;i++) - if (poolset[i]) - delete poolset[i]; - delete [] poolset; -} - - -char *storage::alloc(int size){ - - if (size<=setsize){ - if (!poolset[size]){ - poolset[size]=new mempool(size,poolsize/size); - } - return poolset[size]->alloc(); - } - else{ - - newmemory+=size+8; - newcalls++; - char* p=(char *)calloc(sizeof(char),size); - if (p==NULL){ - cerr << "storage::alloc insufficient memory\n"; - exit(1); - } - return p; - } -} - - - -char *storage::realloc(char *oldptr,int oldsize,int newsize){ - - char *newptr; - - assert(newsize>oldsize); - - if (oldsize<=setsize){ - if (newsize<=setsize){ - if (!poolset[newsize]) - poolset[newsize]=new mempool(newsize,poolsize/newsize); - newptr=poolset[newsize]->alloc(); - memset((char*)newptr,0,newsize); - } - else - newptr=(char *)calloc(sizeof(char),newsize); - - if (oldptr && oldsize){ - memcpy(newptr,oldptr,oldsize); - poolset[oldsize]->free(oldptr); - } - } - else{ - newptr=(char *)std::realloc(oldptr,newsize); - if (newptr==oldptr) - cerr << "r\b"; - else - cerr << "a\b"; - } - if (newptr==NULL){ - cerr << "storage::realloc insufficient memory\n"; - exit(1); - } - - return newptr; - -} - - -int storage::free(char *addr,int size){ - - /* - while(size<=setsize){ - if (poolset[size] && poolset[size]->free(addr)) - break; - size++; - } - */ - - if (size>setsize) - return free(addr),1; - else{ - poolset[size] && poolset[size]->free(addr); - } - return 1; -} - -void storage::stat(){ - int used=0; - int memory=sizeof(char *) * setsize; - int waste=0; - - for (int i=0;i<=setsize;i++) - if (poolset[i]){ - used++; - memory+=poolset[i]->used(); - waste+=poolset[i]->wasted(); - } - - cout << "storage class statistics\n"; - cout << "alloc entries " << newcalls - << " used memory " << newmemory/1024 << "Kb\n"; - cout << "mpools " << setsize - << " active " << used - << " used memory " << memory/1024 << "Kb" - << " wasted " << waste/1024 << "Kb\n"; -} - -/* -main(){ - - mempool* mp=new mempool(sizeof(int),80); - - int** ar= new (int*) [ 1000 ]; - - for (int i=0;i<1000;i++){ - ar[i]= (int *)mp->alloc(); - } - - mp->map(cout); - - for (int i=0;i<500;i++){ - mp->free(ar[i]); - } - - mp->map(cout); - -} - -*/ - - - diff --git a/irstlm/src/mempool.h b/irstlm/src/mempool.h deleted file mode 100644 index 20dc4d59f..000000000 --- a/irstlm/src/mempool.h +++ /dev/null @@ -1,181 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -// An efficient memory manager -// by M. Federico -// Copyright Marcello Federico, ITC-irst, 1998 - -#ifndef MF_MEMPOOL_H -#define MF_MEMPOOL_H - -#ifndef NULL -const int NULL=0; -//#define NULL=0; -#endif - -#include <iostream> // std::ostream - -//! Memory block -/*! This can be used by: -- mempool to store items of fixed size -- strstack to store strings of variable size -*/ - -class memnode{ - friend class mempool; //!< grant access - friend class strstack; //!< grant access - char *block; //!< block of memory - memnode *next; //!< next block ptr -}; - - -//! Memory pool - -/*! A memory pool is composed of: - - a linked list of block_num memory blocks - - each block might contain up to block_size items - - each item is made of exactly item_size bytes -*/ - -class mempool{ - int block_size; //!< number of entries per block - int item_size; //!< number of bytes per entry - int true_size; //!< number of bytes per block - memnode* block_list; //!< list of blocks - char* free_list; //!< free entry list - int entries; //!< number of stored entries - int blocknum; //!< number of allocated blocks - public: - - //! Creates a memory pool - mempool(int is, int bs); - - //! Destroys memory pool - ~mempool(); - - //! Prints a map of memory occupancy - void map(std::ostream& co); - - //! Allocates a single memory entry - char *alloc(); - - //! Frees a single memory entry - int free(char* addr); - - //! Prints statistics about this mempool - void stat(); - - //! Returns effectively used memory (bytes) - /*! includes 8 bytes required by each call of new */ - - int used(){return blocknum * (true_size + 8);}; - - //! Returns amount of wasted memory (bytes) - int wasted(){return used()-(entries * item_size);}; -}; - -//! A stack to store strings - -/*! - The stack is composed of - - a list of blocks memnode of fixed size - - attribute blocknum tells the block on top - - attribute idx tells position of the top string -*/ - -class strstack{ - memnode* list; //!< list of memory blocks - int size; //!< size of each block - int idx; //!< index of last stored string - int waste; //!< current waste of memory - int memory; //!< current use of memory - int entries; //!< current number of stored strings - int blocknum; //!< current number of used blocks - - public: - - strstack(int bs=1000); - - ~strstack(); - - char *push(char *s); - - char *pop(); - - char *top(); - - void stat(); - - int used(){return memory;}; - - int wasted(){return waste;}; - -}; - - -//! Manages multiple memory pools - -/*! - This class permits to manage memory pools - with items up to a specified size. - - items within the allowed range are stored in memory pools - - items larger than the limit are allocated with new -*/ - - -class storage{ - mempool **poolset; //!< array of memory pools - int setsize; //!< number of memory pools/maximum elem size - int poolsize; //!< size of each block - int newmemory; //!< stores amount of used memory - int newcalls; //!< stores number of allocated blocks - public: - - //! Creates storage - storage(int maxsize,int blocksize); - - //! Destroys storage - ~storage(); - - //! Allocates memory - char *alloc(int size); - - //! Realloc memory - char *realloc(char *oldptr,int oldsize,int newsize); - - //! Frees memory of an entry - int free(char *addr,int size=0); - - //! Prints statistics about storage - void stat(); -}; - - -#endif - - - - - - - - - - diff --git a/irstlm/src/ngram.cpp b/irstlm/src/ngram.cpp deleted file mode 100644 index 70a1a7c97..000000000 --- a/irstlm/src/ngram.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -#include <iomanip> -#include <assert.h> -#include "mempool.h" -#include "htable.h" -#include "dictionary.h" -#include "ngram.h" -#include "index.h" - -using namespace std; - -ngram::ngram(dictionary* d,int sz){ - dict=d; - size=sz; - succ=0; - freq=0; - info=0; - pinfo=0; - link=NULL; - isym=-1; - memset(word,0,sizeof(int)*MAX_NGRAM); - memset(midx,0,sizeof(int)*MAX_NGRAM); -} - -ngram::ngram(ngram& ng){ - size=ng.size; - freq=ng.freq; - succ=0; - info=0; - pinfo=0; - link=NULL; - isym=-1; - dict=ng.dict; - memcpy(word,ng.word,sizeof(int)*MAX_NGRAM); - memcpy(midx,ng.word,sizeof(int)*MAX_NGRAM); - -} - -void ngram::trans (const ngram& ng){ - size=ng.size; - freq=ng.freq; - if (dict == ng.dict){ - info=ng.info; - isym=ng.isym; - memcpy(word,ng.word,sizeof(int)*MAX_NGRAM); - memcpy(midx,ng.midx,sizeof(int)*MAX_NGRAM); - } - else{ - info=0; - memset(midx,0,sizeof(int)*MAX_NGRAM); - isym=-1; - for (int i=1;i<=size;i++) - word[MAX_NGRAM-i]=dict->encode(ng.dict->decode(*ng.wordp(i))); - } -} - - -ifstream& operator>> ( ifstream& fi , ngram& ng){ - char w[MAX_WORD]; - memset(w,0,MAX_WORD); - w[0]='\0'; - - if (!(fi >> setw(MAX_WORD) >> w)) - return fi; - - if (strlen(w)==(MAX_WORD-1)) - cerr << "ngram: a too long word was read (" - << w << ")\n"; - - if (ng.dict->intsymb() && - (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){ - - ng.isym=(long)index(ng.dict->intsymb(),w[0]) - - (long)ng.dict->intsymb(); - ng.size=0; - return fi; - } - - int c=ng.dict->encode(w); - - if (c == -1 ){ - cerr << "ngram: " << w << " is OOV \n"; - exit(1); - } - - memcpy(ng.word,ng.word+1,(MAX_NGRAM-1)*sizeof(int)); - - ng.word[MAX_NGRAM-1]=(int)c; - ng.freq=1; - - if (ng.size<MAX_NGRAM) ng.size++; - - return fi; - -} - - -int ngram::pushw(char* w){ - - assert(dict!=NULL); - - int c=dict->encode(w); - - if (c == -1 ){ - cerr << "ngram: " << w << " is OOV \n"; - exit(1); - } - - pushc(c); - - return 1; - -} - -int ngram::pushc(int c){ - - int buff[MAX_NGRAM-1]; - memcpy(buff,word+1,(MAX_NGRAM-1)*sizeof(int)); - memcpy(word,buff,(MAX_NGRAM-1)*sizeof(int)); - - word[MAX_NGRAM-1]=(int)c; - if (size<MAX_NGRAM) size++; - - return 1; - -} - - -istream& operator>> ( istream& fi , ngram& ng){ - char w[MAX_WORD]; - memset(w,0,MAX_WORD); - w[0]='\0'; - - assert(ng.dict != NULL); - - if (!(fi >> setw(MAX_WORD) >> w)) - return fi; - - if (strlen(w)==(MAX_WORD-1)) - cerr << "ngram: a too long word was read (" - << w << ")\n"; - - if (ng.dict->intsymb() && - (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){ - ng.isym=(long)index(ng.dict->intsymb(),w[0])-(long)ng.dict->intsymb(); - ng.size=0; - return fi; - } - - ng.pushw(w); - - ng.freq=1; - - return fi; - -} - -ofstream& operator<< (ofstream& fo,ngram& ng){ - - assert(ng.dict != NULL); - - for (int i=ng.size;i>0;i--) - fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " "; - //fo << "[size " << ng.size << " freq " << ng.freq << "]"; - fo << ng.freq; - return fo; -} - -ostream& operator<< (ostream& fo,ngram& ng){ - - assert(ng.dict != NULL); - - for (int i=ng.size;i>0;i--) - fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " "; - //fo << "[size " << ng.size << " freq " << ng.freq << "]"; - fo << ng.freq; - - return fo; -} - -/* -main(int argc, char** argv){ - dictionary d(argv[1]); - ifstream txt(argv[1]); - ngram ng(&d); - - while (txt >> ng){ - cout << ng << "\n"; - } - - ngram ng2=ng; - cerr << "copia l'ultimo =" << ng << "\n"; -} -*/ - diff --git a/irstlm/src/ngram.h b/irstlm/src/ngram.h deleted file mode 100644 index 12a885be0..000000000 --- a/irstlm/src/ngram.h +++ /dev/null @@ -1,117 +0,0 @@ -/****************************************************************************** - IrstLM: IRST Language Model Toolkit - Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -******************************************************************************/ - -// n-gram tables -// by M. Federico -// Copyright Marcello Federico, ITC-irst, 1998 - -#ifndef MF_NGRAM_H -#define MF_NGRAM_H - -#include <fstream> -#include "dictionary.h" - -#ifdef MYMAXNGRAM -#define MAX_NGRAM MYMAXNGRAM -#else -#define MAX_NGRAM 20 -#endif - -class dictionary; - -//typedef int code; - -class ngram{ - int word[MAX_NGRAM]; //encoded ngram - public: - dictionary *dict; //dictionary - char* link; // ngram-tree pointer - int midx[MAX_NGRAM]; // ngram-tree scan pointer - int lev; // ngram-tree level - int size; // ngram size - int freq; // ngram frequency - int succ; // number of successors - - unsigned char info; // ngram-tree info flags - unsigned char pinfo; // ngram-tree parent info flags - int isym; // last interruption symbol - - ngram(dictionary* d,int sz=0); - ngram(ngram& ng); - - int *wordp()// n-gram pointer - {return wordp(size);}; - int *wordp(int k) // n-gram pointer - {return size>=k?&word[MAX_NGRAM-k]:0;}; - const int *wordp() const // n-gram pointer - {return wordp(size);}; - const int *wordp(int k) const // n-gram pointer - {return size>=k?&word[MAX_NGRAM-k]:0;}; - - int shift(){ - for (int i=(MAX_NGRAM-1);i>0;i--){ - word[i]=word[i-1]; - } - size--; - return 1; - } - - - int containsWord(char* s,int lev){ - - int c=dict->encode(s); - if (c == -1) return 0; - - assert(lev <= size); - for (int i=0;i<lev;i++){ - if (*wordp(size-i)== c) return 1; - } - return 0; - } - - - void trans(const ngram& ng); - - friend std::ifstream& operator>> (std::ifstream& fi,ngram& ng); - friend std::ofstream& operator<< (std::ofstream& fi,ngram& ng); - friend std::istream& operator>> (std::istream& fi,ngram& ng); - friend std::ostream& operator<< (std::ostream& fi,ngram& ng); - - inline int ckhisto(int sz){ - - for (int i=sz;i>1;i--) - if (*wordp(i)==dict->oovcode()) - return 0; - return 1; - } - - int pushc(int c); - int pushw(char* w); - - //~ngram(); - - - -}; - -#endif - - - |