Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/irstlm
diff options
context:
space:
mode:
Diffstat (limited to 'irstlm')
-rw-r--r--irstlm/.cdtbuild47
-rw-r--r--irstlm/.cdtproject15
-rw-r--r--irstlm/.cvsignore2
-rw-r--r--irstlm/.project19
-rw-r--r--irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs13
-rw-r--r--irstlm/Makefile.am5
-rw-r--r--irstlm/README14
-rw-r--r--irstlm/config.h.in22
-rw-r--r--irstlm/configure.in11
-rwxr-xr-xirstlm/depcomp522
-rwxr-xr-xirstlm/install-sh322
-rw-r--r--irstlm/irstlm.vcproj347
-rwxr-xr-xirstlm/missing353
-rw-r--r--irstlm/src/Makefile.am19
-rw-r--r--irstlm/src/cmd.c661
-rw-r--r--irstlm/src/cmd.h68
-rw-r--r--irstlm/src/compile-lm.cpp124
-rw-r--r--irstlm/src/dictionary.cpp418
-rw-r--r--irstlm/src/dictionary.h209
-rw-r--r--irstlm/src/htable.cpp261
-rw-r--r--irstlm/src/htable.h125
-rw-r--r--irstlm/src/index.h19
-rw-r--r--irstlm/src/lmtable.cpp728
-rw-r--r--irstlm/src/lmtable.h245
-rw-r--r--irstlm/src/mempool.cpp516
-rw-r--r--irstlm/src/mempool.h181
-rw-r--r--irstlm/src/ngram.cpp214
-rw-r--r--irstlm/src/ngram.h117
28 files changed, 0 insertions, 5597 deletions
diff --git a/irstlm/.cdtbuild b/irstlm/.cdtbuild
deleted file mode 100644
index 5c8f99dd4..000000000
--- a/irstlm/.cdtbuild
+++ /dev/null
@@ -1,47 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?fileVersion 3.0.0?>
-
-<ManagedProjectBuildInfo>
-<project id="irstlm.cdt.managedbuild.target.gnu.lib.1070956508" name="Static Library (Gnu)" projectType="cdt.managedbuild.target.gnu.lib">
-<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.8750958" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
-<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1732402088" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.208381076" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.1534243185" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug">
-<option id="gnu.cpp.compiler.option.debugging.gprof.1713594612" superClass="gnu.cpp.compiler.option.debugging.gprof" value="true" valueType="boolean"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.1727542516" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.1884793796" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug"/>
-<macros/>
-</toolChain>
-</configuration>
-<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.1538178030" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
-<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.508823597" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.723647841" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.1586280207" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.1518934657" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.1672118671" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release"/>
-<macros/>
-</toolChain>
-</configuration>
-<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.debug.1333974501" name="DebugNBest" parent="cdt.managedbuild.config.gnu.lib.debug">
-<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.365917155" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.100325283" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.955425850" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug">
-<option id="gnu.cpp.compiler.option.debugging.gprof.2034209861" superClass="gnu.cpp.compiler.option.debugging.gprof" value="true" valueType="boolean"/>
-</tool>
-<tool id="cdt.managedbuild.tool.gnu.archiver.lib.debug.370966026" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.debug"/>
-<tool id="cdt.managedbuild.tool.gnu.assembler.lib.debug.720742733" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.debug"/>
-<macros/>
-</toolChain>
-</configuration>
-<configuration artifactExtension="a" artifactName="irstlm" cleanCommand="rm -rf" description="" errorParsers="org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.GASErrorParser" id="cdt.managedbuild.config.gnu.lib.release.32089835" name="ReleaseNBest" parent="cdt.managedbuild.config.gnu.lib.release">
-<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.158080822" name="GCC Tool Chain" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
-<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.release.1803995257" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release.152646939" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.archiver.lib.release.58978613" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.lib.release"/>
-<tool id="cdt.managedbuild.tool.gnu.assembler.lib.release.2018125558" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.lib.release"/>
-<macros/>
-</toolChain>
-</configuration>
-</project>
-</ManagedProjectBuildInfo>
diff --git a/irstlm/.cdtproject b/irstlm/.cdtproject
deleted file mode 100644
index 41c23c46b..000000000
--- a/irstlm/.cdtproject
+++ /dev/null
@@ -1,15 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<?eclipse-cdt version="2.0"?>
-
-<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake">
-<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
-<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
-<extension id="org.eclipse.cdt.core.domsourceindexer" point="org.eclipse.cdt.core.CIndexer"/>
-<data>
-<item id="org.eclipse.cdt.core.pathentry">
-<pathentry kind="src" path=""/>
-<pathentry kind="out" path=""/>
-<pathentry kind="con" path="org.eclipse.cdt.managedbuilder.MANAGED_CONTAINER"/>
-</item>
-</data>
-</cdtproject>
diff --git a/irstlm/.cvsignore b/irstlm/.cvsignore
deleted file mode 100644
index 9816a999f..000000000
--- a/irstlm/.cvsignore
+++ /dev/null
@@ -1,2 +0,0 @@
-Debug*
-Release*
diff --git a/irstlm/.project b/irstlm/.project
deleted file mode 100644
index bb14e64fb..000000000
--- a/irstlm/.project
+++ /dev/null
@@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<projectDescription>
- <name>irstlm</name>
- <comment></comment>
- <projects>
- </projects>
- <buildSpec>
- <buildCommand>
- <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
- <arguments>
- </arguments>
- </buildCommand>
- </buildSpec>
- <natures>
- <nature>org.eclipse.cdt.core.cnature</nature>
- <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
- <nature>org.eclipse.cdt.core.ccnature</nature>
- </natures>
-</projectDescription>
diff --git a/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs b/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs
deleted file mode 100644
index d0951526a..000000000
--- a/irstlm/.settings/org.eclipse.cdt.managedbuilder.core.prefs
+++ /dev/null
@@ -1,13 +0,0 @@
-#Thu Jul 27 11:13:57 EDT 2006
-=\=\=\=\=\=\=
-<<<<<<<=org.eclipse.cdt.managedbuilder.core.prefs
->>>>>>>=1.2
-eclipse.preferences.version=1
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.1333974501=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.debug.8750958=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.1538178030=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.lib.release.32089835=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment>\n<variable name\="CPATH" operation\="remove"/>\n<variable name\="CPLUS_INCLUDE_PATH" operation\="remove"/>\n</environment>\n
-environment/project=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.lib.debug.1333974501=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.lib.debug.8750958=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
-environment/project/cdt.managedbuild.config.gnu.lib.release.1538178030=<?xml version\="1.0" encoding\="UTF-8"?>\n<environment/>\n
diff --git a/irstlm/Makefile.am b/irstlm/Makefile.am
deleted file mode 100644
index 4566b2680..000000000
--- a/irstlm/Makefile.am
+++ /dev/null
@@ -1,5 +0,0 @@
-# not a GNU package. You can remove this line, if
-# have all needed files, that a GNU package needs
-AUTOMAKE_OPTIONS = foreign
-SUBDIRS = src
-
diff --git a/irstlm/README b/irstlm/README
deleted file mode 100644
index 17a9b920f..000000000
--- a/irstlm/README
+++ /dev/null
@@ -1,14 +0,0 @@
-To build:
-
- aclocal
- autoconf
- automake
-
- ./configure --with-prefix=PATH TO INSTALL (probably `pwd`)
- make
- make install
-
-*Make install is important since it creates the include/ and lib/ directories
-that client software will depend on.
-
-
diff --git a/irstlm/config.h.in b/irstlm/config.h.in
deleted file mode 100644
index b292ea963..000000000
--- a/irstlm/config.h.in
+++ /dev/null
@@ -1,22 +0,0 @@
-/* config.h.in. Generated from configure.in by autoheader. */
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* Version number of package */
-#undef VERSION
diff --git a/irstlm/configure.in b/irstlm/configure.in
deleted file mode 100644
index c2ad8dda5..000000000
--- a/irstlm/configure.in
+++ /dev/null
@@ -1,11 +0,0 @@
-AC_INIT(src)
-
-AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(irstlm, 1.0)
-
-AC_PROG_CXX
-AC_LANG_CPLUSPLUS
-AC_PROG_RANLIB
-#AM_PROG_LIBTOOL
-
-AC_OUTPUT(Makefile src/Makefile)
diff --git a/irstlm/depcomp b/irstlm/depcomp
deleted file mode 100755
index 11e2d3bfe..000000000
--- a/irstlm/depcomp
+++ /dev/null
@@ -1,522 +0,0 @@
-#! /bin/sh
-# depcomp - compile a program generating dependencies as side-effects
-
-scriptversion=2004-05-31.23
-
-# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
-
-case $1 in
- '')
- echo "$0: No command. Try \`$0 --help' for more information." 1>&2
- exit 1;
- ;;
- -h | --h*)
- cat <<\EOF
-Usage: depcomp [--help] [--version] PROGRAM [ARGS]
-
-Run PROGRAMS ARGS to compile a file, generating dependencies
-as side-effects.
-
-Environment variables:
- depmode Dependency tracking mode.
- source Source file read by `PROGRAMS ARGS'.
- object Object file output by `PROGRAMS ARGS'.
- DEPDIR directory where to store dependencies.
- depfile Dependency file to output.
- tmpdepfile Temporary file to use when outputing dependencies.
- libtool Whether libtool is used (yes/no).
-
-Report bugs to <bug-automake@gnu.org>.
-EOF
- exit 0
- ;;
- -v | --v*)
- echo "depcomp $scriptversion"
- exit 0
- ;;
-esac
-
-if test -z "$depmode" || test -z "$source" || test -z "$object"; then
- echo "depcomp: Variables source, object and depmode must be set" 1>&2
- exit 1
-fi
-
-# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
-depfile=${depfile-`echo "$object" |
- sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
-tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
-
-rm -f "$tmpdepfile"
-
-# Some modes work just like other modes, but use different flags. We
-# parameterize here, but still list the modes in the big case below,
-# to make depend.m4 easier to write. Note that we *cannot* use a case
-# here, because this file can only contain one case statement.
-if test "$depmode" = hp; then
- # HP compiler uses -M and no extra arg.
- gccflag=-M
- depmode=gcc
-fi
-
-if test "$depmode" = dashXmstdout; then
- # This is just like dashmstdout with a different argument.
- dashmflag=-xM
- depmode=dashmstdout
-fi
-
-case "$depmode" in
-gcc3)
-## gcc 3 implements dependency tracking that does exactly what
-## we want. Yay! Note: for some reason libtool 1.4 doesn't like
-## it if -MD -MP comes after the -MF stuff. Hmm.
- "$@" -MT "$object" -MD -MP -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- mv "$tmpdepfile" "$depfile"
- ;;
-
-gcc)
-## There are various ways to get dependency output from gcc. Here's
-## why we pick this rather obscure method:
-## - Don't want to use -MD because we'd like the dependencies to end
-## up in a subdir. Having to rename by hand is ugly.
-## (We might end up doing this anyway to support other compilers.)
-## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
-## -MM, not -M (despite what the docs say).
-## - Using -M directly means running the compiler twice (even worse
-## than renaming).
- if test -z "$gccflag"; then
- gccflag=-MD,
- fi
- "$@" -Wp,"$gccflag$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
-## The second -e expression handles DOS-style file names with drive letters.
- sed -e 's/^[^:]*: / /' \
- -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
-## This next piece of magic avoids the `deleted header file' problem.
-## The problem is that when a header file which appears in a .P file
-## is deleted, the dependency causes make to die (because there is
-## typically no way to rebuild the header). We avoid this by adding
-## dummy dependencies for each header file. Too bad gcc doesn't do
-## this for us directly.
- tr ' ' '
-' < "$tmpdepfile" |
-## Some versions of gcc put a space before the `:'. On the theory
-## that the space means something, we add a space to the output as
-## well.
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-hp)
- # This case exists only to let depend.m4 do its work. It works by
- # looking at the text of this script. This case will never be run,
- # since it is checked for above.
- exit 1
- ;;
-
-sgi)
- if test "$libtool" = yes; then
- "$@" "-Wp,-MDupdate,$tmpdepfile"
- else
- "$@" -MDupdate "$tmpdepfile"
- fi
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
-
- if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
- echo "$object : \\" > "$depfile"
-
- # Clip off the initial element (the dependent). Don't try to be
- # clever and replace this with sed code, as IRIX sed won't handle
- # lines with more than a fixed number of characters (4096 in
- # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
- # the IRIX cc adds comments like `#:fec' to the end of the
- # dependency line.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
- tr '
-' ' ' >> $depfile
- echo >> $depfile
-
- # The second pass generates a dummy entry for each header file.
- tr ' ' '
-' < "$tmpdepfile" \
- | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
- >> $depfile
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-aix)
- # The C for AIX Compiler uses -M and outputs the dependencies
- # in a .u file. In older versions, this file always lives in the
- # current directory. Also, the AIX compiler puts `$object:' at the
- # start of each line; $object doesn't have directory information.
- # Version 6 uses the directory in both cases.
- stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'`
- tmpdepfile="$stripped.u"
- if test "$libtool" = yes; then
- "$@" -Wc,-M
- else
- "$@" -M
- fi
- stat=$?
-
- if test -f "$tmpdepfile"; then :
- else
- stripped=`echo "$stripped" | sed 's,^.*/,,'`
- tmpdepfile="$stripped.u"
- fi
-
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
-
- if test -f "$tmpdepfile"; then
- outname="$stripped.o"
- # Each line is of the form `foo.o: dependent.h'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile"
- sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
- else
- # The sourcefile does not contain any dependencies, so just
- # store a dummy comment line, to avoid errors with the Makefile
- # "include basename.Plo" scheme.
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-icc)
- # Intel's C compiler understands `-MD -MF file'. However on
- # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
- # ICC 7.0 will fill foo.d with something like
- # foo.o: sub/foo.c
- # foo.o: sub/foo.h
- # which is wrong. We want:
- # sub/foo.o: sub/foo.c
- # sub/foo.o: sub/foo.h
- # sub/foo.c:
- # sub/foo.h:
- # ICC 7.1 will output
- # foo.o: sub/foo.c sub/foo.h
- # and will wrap long lines using \ :
- # foo.o: sub/foo.c ... \
- # sub/foo.h ... \
- # ...
-
- "$@" -MD -MF "$tmpdepfile"
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile"
- exit $stat
- fi
- rm -f "$depfile"
- # Each line is of the form `foo.o: dependent.h',
- # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
- # Do two passes, one to just change these to
- # `$object: dependent.h' and one to simply `dependent.h:'.
- sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
- # Some versions of the HPUX 10.20 sed can't process this invocation
- # correctly. Breaking it into two sed invocations is a workaround.
- sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
- sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-tru64)
- # The Tru64 compiler uses -MD to generate dependencies as a side
- # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
- # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
- # dependencies in `foo.d' instead, so we check for that too.
- # Subdirectories are respected.
- dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
- test "x$dir" = "x$object" && dir=
- base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
-
- if test "$libtool" = yes; then
- # Dependencies are output in .lo.d with libtool 1.4.
- # With libtool 1.5 they are output both in $dir.libs/$base.o.d
- # and in $dir.libs/$base.o.d and $dir$base.o.d. We process the
- # latter, because the former will be cleaned when $dir.libs is
- # erased.
- tmpdepfile1="$dir.libs/$base.lo.d"
- tmpdepfile2="$dir$base.o.d"
- tmpdepfile3="$dir.libs/$base.d"
- "$@" -Wc,-MD
- else
- tmpdepfile1="$dir$base.o.d"
- tmpdepfile2="$dir$base.d"
- tmpdepfile3="$dir$base.d"
- "$@" -MD
- fi
-
- stat=$?
- if test $stat -eq 0; then :
- else
- rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
- exit $stat
- fi
-
- if test -f "$tmpdepfile1"; then
- tmpdepfile="$tmpdepfile1"
- elif test -f "$tmpdepfile2"; then
- tmpdepfile="$tmpdepfile2"
- else
- tmpdepfile="$tmpdepfile3"
- fi
- if test -f "$tmpdepfile"; then
- sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
- # That's a tab and a space in the [].
- sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
- else
- echo "#dummy" > "$depfile"
- fi
- rm -f "$tmpdepfile"
- ;;
-
-#nosideeffect)
- # This comment above is used by automake to tell side-effect
- # dependency tracking mechanisms from slower ones.
-
-dashmstdout)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout, regardless of -o.
- "$@" || exit $?
-
- # Remove the call to Libtool.
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
-
- # Remove `-o $object'.
- IFS=" "
- for arg
- do
- case $arg in
- -o)
- shift
- ;;
- $object)
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift # fnord
- shift # $arg
- ;;
- esac
- done
-
- test -z "$dashmflag" && dashmflag=-M
- # Require at least two characters before searching for `:'
- # in the target name. This is to cope with DOS-style filenames:
- # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
- "$@" $dashmflag |
- sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- tr ' ' '
-' < "$tmpdepfile" | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-dashXmstdout)
- # This case only exists to satisfy depend.m4. It is never actually
- # run, as this mode is specially recognized in the preamble.
- exit 1
- ;;
-
-makedepend)
- "$@" || exit $?
- # Remove any Libtool call
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
- # X makedepend
- shift
- cleared=no
- for arg in "$@"; do
- case $cleared in
- no)
- set ""; shift
- cleared=yes ;;
- esac
- case "$arg" in
- -D*|-I*)
- set fnord "$@" "$arg"; shift ;;
- # Strip any option that makedepend may not understand. Remove
- # the object too, otherwise makedepend will parse it as a source file.
- -*|$object)
- ;;
- *)
- set fnord "$@" "$arg"; shift ;;
- esac
- done
- obj_suffix="`echo $object | sed 's/^.*\././'`"
- touch "$tmpdepfile"
- ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
- rm -f "$depfile"
- cat < "$tmpdepfile" > "$depfile"
- sed '1,2d' "$tmpdepfile" | tr ' ' '
-' | \
-## Some versions of the HPUX 10.20 sed can't process this invocation
-## correctly. Breaking it into two sed invocations is a workaround.
- sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile" "$tmpdepfile".bak
- ;;
-
-cpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout.
- "$@" || exit $?
-
- # Remove the call to Libtool.
- if test "$libtool" = yes; then
- while test $1 != '--mode=compile'; do
- shift
- done
- shift
- fi
-
- # Remove `-o $object'.
- IFS=" "
- for arg
- do
- case $arg in
- -o)
- shift
- ;;
- $object)
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift # fnord
- shift # $arg
- ;;
- esac
- done
-
- "$@" -E |
- sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
- sed '$ s: \\$::' > "$tmpdepfile"
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- cat < "$tmpdepfile" >> "$depfile"
- sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-msvisualcpp)
- # Important note: in order to support this mode, a compiler *must*
- # always write the preprocessed file to stdout, regardless of -o,
- # because we must use -o when running libtool.
- "$@" || exit $?
- IFS=" "
- for arg
- do
- case "$arg" in
- "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
- set fnord "$@"
- shift
- shift
- ;;
- *)
- set fnord "$@" "$arg"
- shift
- shift
- ;;
- esac
- done
- "$@" -E |
- sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
- rm -f "$depfile"
- echo "$object : \\" > "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
- echo " " >> "$depfile"
- . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
- rm -f "$tmpdepfile"
- ;;
-
-none)
- exec "$@"
- ;;
-
-*)
- echo "Unknown depmode $depmode" 1>&2
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local Variables:
-# mode: shell-script
-# sh-indentation: 2
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/irstlm/install-sh b/irstlm/install-sh
deleted file mode 100755
index dd97db7aa..000000000
--- a/irstlm/install-sh
+++ /dev/null
@@ -1,322 +0,0 @@
-#!/bin/sh
-# install - install a program, script, or datafile
-
-scriptversion=2004-09-10.20
-
-# This originates from X11R5 (mit/util/scripts/install.sh), which was
-# later released in X11R6 (xc/config/util/install.sh) with the
-# following copyright and license.
-#
-# Copyright (C) 1994 X Consortium
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
-# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
-# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# Except as contained in this notice, the name of the X Consortium shall not
-# be used in advertising or otherwise to promote the sale, use or other deal-
-# ings in this Software without prior written authorization from the X Consor-
-# tium.
-#
-#
-# FSF changes to this file are in the public domain.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch. It can only install one file at a time, a restriction
-# shared with many OS's install programs.
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit="${DOITPROG-}"
-
-# put in absolute paths if you don't have them in your path; or use env. vars.
-
-mvprog="${MVPROG-mv}"
-cpprog="${CPPROG-cp}"
-chmodprog="${CHMODPROG-chmod}"
-chownprog="${CHOWNPROG-chown}"
-chgrpprog="${CHGRPPROG-chgrp}"
-stripprog="${STRIPPROG-strip}"
-rmprog="${RMPROG-rm}"
-mkdirprog="${MKDIRPROG-mkdir}"
-
-chmodcmd="$chmodprog 0755"
-chowncmd=
-chgrpcmd=
-stripcmd=
-rmcmd="$rmprog -f"
-mvcmd="$mvprog"
-src=
-dst=
-dir_arg=
-dstarg=
-no_target_directory=
-
-usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
- or: $0 [OPTION]... SRCFILES... DIRECTORY
- or: $0 [OPTION]... -t DIRECTORY SRCFILES...
- or: $0 [OPTION]... -d DIRECTORIES...
-
-In the 1st form, copy SRCFILE to DSTFILE.
-In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
-In the 4th, create DIRECTORIES.
-
-Options:
--c (ignored)
--d create directories instead of installing files.
--g GROUP $chgrpprog installed files to GROUP.
--m MODE $chmodprog installed files to MODE.
--o USER $chownprog installed files to USER.
--s $stripprog installed files.
--t DIRECTORY install into DIRECTORY.
--T report an error if DSTFILE is a directory.
---help display this help and exit.
---version display version info and exit.
-
-Environment variables override the default commands:
- CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
-"
-
-while test -n "$1"; do
- case $1 in
- -c) shift
- continue;;
-
- -d) dir_arg=true
- shift
- continue;;
-
- -g) chgrpcmd="$chgrpprog $2"
- shift
- shift
- continue;;
-
- --help) echo "$usage"; exit 0;;
-
- -m) chmodcmd="$chmodprog $2"
- shift
- shift
- continue;;
-
- -o) chowncmd="$chownprog $2"
- shift
- shift
- continue;;
-
- -s) stripcmd=$stripprog
- shift
- continue;;
-
- -t) dstarg=$2
- shift
- shift
- continue;;
-
- -T) no_target_directory=true
- shift
- continue;;
-
- --version) echo "$0 $scriptversion"; exit 0;;
-
- *) # When -d is used, all remaining arguments are directories to create.
- # When -t is used, the destination is already specified.
- test -n "$dir_arg$dstarg" && break
- # Otherwise, the last argument is the destination. Remove it from $@.
- for arg
- do
- if test -n "$dstarg"; then
- # $@ is not empty: it contains at least $arg.
- set fnord "$@" "$dstarg"
- shift # fnord
- fi
- shift # arg
- dstarg=$arg
- done
- break;;
- esac
-done
-
-if test -z "$1"; then
- if test -z "$dir_arg"; then
- echo "$0: no input file specified." >&2
- exit 1
- fi
- # It's OK to call `install-sh -d' without argument.
- # This can happen when creating conditional directories.
- exit 0
-fi
-
-for src
-do
- # Protect names starting with `-'.
- case $src in
- -*) src=./$src ;;
- esac
-
- if test -n "$dir_arg"; then
- dst=$src
- src=
-
- if test -d "$dst"; then
- mkdircmd=:
- chmodcmd=
- else
- mkdircmd=$mkdirprog
- fi
- else
- # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
- # might cause directories to be created, which would be especially bad
- # if $src (and thus $dsttmp) contains '*'.
- if test ! -f "$src" && test ! -d "$src"; then
- echo "$0: $src does not exist." >&2
- exit 1
- fi
-
- if test -z "$dstarg"; then
- echo "$0: no destination specified." >&2
- exit 1
- fi
-
- dst=$dstarg
- # Protect names starting with `-'.
- case $dst in
- -*) dst=./$dst ;;
- esac
-
- # If destination is a directory, append the input filename; won't work
- # if double slashes aren't ignored.
- if test -d "$dst"; then
- if test -n "$no_target_directory"; then
- echo "$0: $dstarg: Is a directory" >&2
- exit 1
- fi
- dst=$dst/`basename "$src"`
- fi
- fi
-
- # This sed command emulates the dirname command.
- dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
-
- # Make sure that the destination directory exists.
-
- # Skip lots of stat calls in the usual case.
- if test ! -d "$dstdir"; then
- defaultIFS='
- '
- IFS="${IFS-$defaultIFS}"
-
- oIFS=$IFS
- # Some sh's can't handle IFS=/ for some reason.
- IFS='%'
- set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
- IFS=$oIFS
-
- pathcomp=
-
- while test $# -ne 0 ; do
- pathcomp=$pathcomp$1
- shift
- if test ! -d "$pathcomp"; then
- $mkdirprog "$pathcomp"
- # mkdir can fail with a `File exist' error in case several
- # install-sh are creating the directory concurrently. This
- # is OK.
- test -d "$pathcomp" || exit
- fi
- pathcomp=$pathcomp/
- done
- fi
-
- if test -n "$dir_arg"; then
- $doit $mkdircmd "$dst" \
- && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
- && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
- && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
- && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
-
- else
- dstfile=`basename "$dst"`
-
- # Make a couple of temp file names in the proper directory.
- dsttmp=$dstdir/_inst.$$_
- rmtmp=$dstdir/_rm.$$_
-
- # Trap to clean up those temp files at exit.
- trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
- trap '(exit $?); exit' 1 2 13 15
-
- # Copy the file name to the temp name.
- $doit $cpprog "$src" "$dsttmp" &&
-
- # and set any options; do chmod last to preserve setuid bits.
- #
- # If any of these fail, we abort the whole thing. If we want to
- # ignore errors from any of these, just make sure not to ignore
- # errors from the above "$doit $cpprog $src $dsttmp" command.
- #
- { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
- && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
- && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
- && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
-
- # Now rename the file to the real destination.
- { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
- || {
- # The rename failed, perhaps because mv can't rename something else
- # to itself, or perhaps because mv is so ancient that it does not
- # support -f.
-
- # Now remove or move aside any old file at destination location.
- # We try this two ways since rm can't unlink itself on some
- # systems and the destination file might be busy for other
- # reasons. In this case, the final cleanup might fail but the new
- # file should still install successfully.
- {
- if test -f "$dstdir/$dstfile"; then
- $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
- || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
- || {
- echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
- (exit 1); exit
- }
- else
- :
- fi
- } &&
-
- # Now rename the file to the real destination.
- $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
- }
- }
- fi || { (exit 1); exit; }
-done
-
-# The final little trick to "correctly" pass the exit status to the exit trap.
-{
- (exit 0); exit
-}
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/irstlm/irstlm.vcproj b/irstlm/irstlm.vcproj
deleted file mode 100644
index 0ceb12191..000000000
--- a/irstlm/irstlm.vcproj
+++ /dev/null
@@ -1,347 +0,0 @@
-<?xml version="1.0" encoding="Windows-1252"?>
-<VisualStudioProject
- ProjectType="Visual C++"
- Version="8.00"
- Name="irstlm"
- ProjectGUID="{19C023D8-67DE-4609-9C89-3152EF95995D}"
- RootNamespace="irstlm"
- Keyword="ManagedCProj"
- >
- <Platforms>
- <Platform
- Name="Win32"
- />
- </Platforms>
- <ToolFiles>
- </ToolFiles>
- <Configurations>
- <Configuration
- Name="Debug|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="3"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="Release|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- WholeProgramOptimization="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="2"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="DebugNBest|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- Optimization="0"
- PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="3"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- <Configuration
- Name="ReleaseNBest|Win32"
- OutputDirectory="$(SolutionDir)$(ConfigurationName)"
- IntermediateDirectory="$(ConfigurationName)"
- ConfigurationType="4"
- CharacterSet="1"
- ManagedExtensions="0"
- WholeProgramOptimization="1"
- >
- <Tool
- Name="VCPreBuildEventTool"
- />
- <Tool
- Name="VCCustomBuildTool"
- />
- <Tool
- Name="VCXMLDataGeneratorTool"
- />
- <Tool
- Name="VCWebServiceProxyGeneratorTool"
- />
- <Tool
- Name="VCMIDLTool"
- />
- <Tool
- Name="VCCLCompilerTool"
- PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE"
- RuntimeLibrary="2"
- UsePrecompiledHeader="0"
- WarningLevel="3"
- DebugInformationFormat="3"
- />
- <Tool
- Name="VCManagedResourceCompilerTool"
- />
- <Tool
- Name="VCResourceCompilerTool"
- />
- <Tool
- Name="VCPreLinkEventTool"
- />
- <Tool
- Name="VCLibrarianTool"
- />
- <Tool
- Name="VCALinkTool"
- />
- <Tool
- Name="VCXDCMakeTool"
- />
- <Tool
- Name="VCBscMakeTool"
- />
- <Tool
- Name="VCFxCopTool"
- />
- <Tool
- Name="VCPostBuildEventTool"
- />
- </Configuration>
- </Configurations>
- <References>
- <AssemblyReference
- RelativePath="System.dll"
- AssemblyName="System, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"
- />
- <AssemblyReference
- RelativePath="System.Data.dll"
- AssemblyName="System.Data, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=x86"
- />
- <AssemblyReference
- RelativePath="System.XML.dll"
- AssemblyName="System.Xml, Version=2.0.0.0, PublicKeyToken=b77a5c561934e089, processorArchitecture=MSIL"
- />
- </References>
- <Files>
- <Filter
- Name="Source Files"
- Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
- UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
- >
- <File
- RelativePath=".\src\cmd.c"
- >
- </File>
- <File
- RelativePath=".\src\dictionary.cpp"
- >
- </File>
- <File
- RelativePath=".\src\htable.cpp"
- >
- </File>
- <File
- RelativePath=".\src\lmtable.cpp"
- >
- </File>
- <File
- RelativePath=".\src\mempool.cpp"
- >
- </File>
- <File
- RelativePath=".\src\ngram.cpp"
- >
- </File>
- </Filter>
- <Filter
- Name="Header Files"
- Filter="h;hpp;hxx;hm;inl;inc;xsd"
- UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
- >
- <File
- RelativePath=".\src\cmd.h"
- >
- </File>
- <File
- RelativePath=".\src\dictionary.h"
- >
- </File>
- <File
- RelativePath=".\src\htable.h"
- >
- </File>
- <File
- RelativePath=".\src\index.h"
- >
- </File>
- <File
- RelativePath=".\src\lmtable.h"
- >
- </File>
- <File
- RelativePath=".\src\mempool.h"
- >
- </File>
- <File
- RelativePath=".\src\ngram.h"
- >
- </File>
- </Filter>
- <Filter
- Name="Resource Files"
- Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
- UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
- >
- </Filter>
- </Files>
- <Globals>
- </Globals>
-</VisualStudioProject>
diff --git a/irstlm/missing b/irstlm/missing
deleted file mode 100755
index 64b5f901d..000000000
--- a/irstlm/missing
+++ /dev/null
@@ -1,353 +0,0 @@
-#! /bin/sh
-# Common stub for a few missing GNU programs while installing.
-
-scriptversion=2004-09-07.08
-
-# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004
-# Free Software Foundation, Inc.
-# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-if test $# -eq 0; then
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
-fi
-
-run=:
-
-# In the cases where this matters, `missing' is being run in the
-# srcdir already.
-if test -f configure.ac; then
- configure_ac=configure.ac
-else
- configure_ac=configure.in
-fi
-
-msg="missing on your system"
-
-case "$1" in
---run)
- # Try to run requested program, and just exit if it succeeds.
- run=
- shift
- "$@" && exit 0
- # Exit code 63 means version mismatch. This often happens
- # when the user try to use an ancient version of a tool on
- # a file that requires a minimum version. In this case we
- # we should proceed has if the program had been absent, or
- # if --run hadn't been passed.
- if test $? = 63; then
- run=:
- msg="probably too old"
- fi
- ;;
-
- -h|--h|--he|--hel|--help)
- echo "\
-$0 [OPTION]... PROGRAM [ARGUMENT]...
-
-Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
-error status if there is no known handling for PROGRAM.
-
-Options:
- -h, --help display this help and exit
- -v, --version output version information and exit
- --run try to run the given command, and emulate it if it fails
-
-Supported PROGRAM values:
- aclocal touch file \`aclocal.m4'
- autoconf touch file \`configure'
- autoheader touch file \`config.h.in'
- automake touch all \`Makefile.in' files
- bison create \`y.tab.[ch]', if possible, from existing .[ch]
- flex create \`lex.yy.c', if possible, from existing .c
- help2man touch the output file
- lex create \`lex.yy.c', if possible, from existing .c
- makeinfo touch the output file
- tar try tar, gnutar, gtar, then tar without non-portable flags
- yacc create \`y.tab.[ch]', if possible, from existing .[ch]
-
-Send bug reports to <bug-automake@gnu.org>."
- exit 0
- ;;
-
- -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
- echo "missing $scriptversion (GNU Automake)"
- exit 0
- ;;
-
- -*)
- echo 1>&2 "$0: Unknown \`$1' option"
- echo 1>&2 "Try \`$0 --help' for more information"
- exit 1
- ;;
-
-esac
-
-# Now exit if we have it, but it failed. Also exit now if we
-# don't have it and --version was passed (most likely to detect
-# the program).
-case "$1" in
- lex|yacc)
- # Not GNU programs, they don't have --version.
- ;;
-
- tar)
- if test -n "$run"; then
- echo 1>&2 "ERROR: \`tar' requires --run"
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- exit 1
- fi
- ;;
-
- *)
- if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
- # We have it, but it failed.
- exit 1
- elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
- # Could not run --version or --help. This is probably someone
- # running `$TOOL --version' or `$TOOL --help' to check whether
- # $TOOL exists and not knowing $TOOL uses missing.
- exit 1
- fi
- ;;
-esac
-
-# If it does not exist, or fails to run (possibly an outdated version),
-# try to emulate it.
-case "$1" in
- aclocal*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acinclude.m4' or \`${configure_ac}'. You might want
- to install the \`Automake' and \`Perl' packages. Grab them from
- any GNU archive site."
- touch aclocal.m4
- ;;
-
- autoconf)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`${configure_ac}'. You might want to install the
- \`Autoconf' and \`GNU m4' packages. Grab them from any GNU
- archive site."
- touch configure
- ;;
-
- autoheader)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`acconfig.h' or \`${configure_ac}'. You might want
- to install the \`Autoconf' and \`GNU m4' packages. Grab them
- from any GNU archive site."
- files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
- test -z "$files" && files="config.h"
- touch_files=
- for f in $files; do
- case "$f" in
- *:*) touch_files="$touch_files "`echo "$f" |
- sed -e 's/^[^:]*://' -e 's/:.*//'`;;
- *) touch_files="$touch_files $f.in";;
- esac
- done
- touch $touch_files
- ;;
-
- automake*)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
- You might want to install the \`Automake' and \`Perl' packages.
- Grab them from any GNU archive site."
- find . -type f -name Makefile.am -print |
- sed 's/\.am$/.in/' |
- while read f; do touch "$f"; done
- ;;
-
- autom4te)
- echo 1>&2 "\
-WARNING: \`$1' is needed, but is $msg.
- You might have modified some files without having the
- proper tools for further handling them.
- You can get \`$1' as part of \`Autoconf' from any GNU
- archive site."
-
- file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
- test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
- if test -f "$file"; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo "#! /bin/sh"
- echo "# Created by GNU Automake missing as a replacement of"
- echo "# $ $@"
- echo "exit 0"
- chmod +x $file
- exit 1
- fi
- ;;
-
- bison|yacc)
- echo 1>&2 "\
-WARNING: \`$1' $msg. You should only need it if
- you modified a \`.y' file. You may need the \`Bison' package
- in order for those modifications to take effect. You can get
- \`Bison' from any GNU archive site."
- rm -f y.tab.c y.tab.h
- if [ $# -ne 1 ]; then
- eval LASTARG="\${$#}"
- case "$LASTARG" in
- *.y)
- SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" y.tab.c
- fi
- SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" y.tab.h
- fi
- ;;
- esac
- fi
- if [ ! -f y.tab.h ]; then
- echo >y.tab.h
- fi
- if [ ! -f y.tab.c ]; then
- echo 'main() { return 0; }' >y.tab.c
- fi
- ;;
-
- lex|flex)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.l' file. You may need the \`Flex' package
- in order for those modifications to take effect. You can get
- \`Flex' from any GNU archive site."
- rm -f lex.yy.c
- if [ $# -ne 1 ]; then
- eval LASTARG="\${$#}"
- case "$LASTARG" in
- *.l)
- SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
- if [ -f "$SRCFILE" ]; then
- cp "$SRCFILE" lex.yy.c
- fi
- ;;
- esac
- fi
- if [ ! -f lex.yy.c ]; then
- echo 'main() { return 0; }' >lex.yy.c
- fi
- ;;
-
- help2man)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a dependency of a manual page. You may need the
- \`Help2man' package in order for those modifications to take
- effect. You can get \`Help2man' from any GNU archive site."
-
- file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
- if test -z "$file"; then
- file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
- fi
- if [ -f "$file" ]; then
- touch $file
- else
- test -z "$file" || exec >$file
- echo ".ab help2man is required to generate this page"
- exit 1
- fi
- ;;
-
- makeinfo)
- echo 1>&2 "\
-WARNING: \`$1' is $msg. You should only need it if
- you modified a \`.texi' or \`.texinfo' file, or any other file
- indirectly affecting the aspect of the manual. The spurious
- call might also be the consequence of using a buggy \`make' (AIX,
- DU, IRIX). You might want to install the \`Texinfo' package or
- the \`GNU make' package. Grab either from any GNU archive site."
- file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
- if test -z "$file"; then
- file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
- file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
- fi
- touch $file
- ;;
-
- tar)
- shift
-
- # We have already tried tar in the generic part.
- # Look for gnutar/gtar before invocation to avoid ugly error
- # messages.
- if (gnutar --version > /dev/null 2>&1); then
- gnutar "$@" && exit 0
- fi
- if (gtar --version > /dev/null 2>&1); then
- gtar "$@" && exit 0
- fi
- firstarg="$1"
- if shift; then
- case "$firstarg" in
- *o*)
- firstarg=`echo "$firstarg" | sed s/o//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- case "$firstarg" in
- *h*)
- firstarg=`echo "$firstarg" | sed s/h//`
- tar "$firstarg" "$@" && exit 0
- ;;
- esac
- fi
-
- echo 1>&2 "\
-WARNING: I can't seem to be able to run \`tar' with the given arguments.
- You may want to install GNU tar or Free paxutils, or check the
- command line arguments."
- exit 1
- ;;
-
- *)
- echo 1>&2 "\
-WARNING: \`$1' is needed, and is $msg.
- You might have modified some files without having the
- proper tools for further handling them. Check the \`README' file,
- it often tells you about the needed prerequisites for installing
- this package. You may also peek at any GNU archive site, in case
- some other package would contain this missing \`$1' program."
- exit 1
- ;;
-esac
-
-exit 0
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "scriptversion="
-# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-end: "$"
-# End:
diff --git a/irstlm/src/Makefile.am b/irstlm/src/Makefile.am
deleted file mode 100644
index ea26d6945..000000000
--- a/irstlm/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-lib_LIBRARIES = libirstlm.a
-
-libirstlm_a_SOURCES = \
- dictionary.cpp \
- htable.cpp \
- lmtable.cpp \
- mempool.cpp \
- ngram.cpp
-
-library_includedir=$(includedir)
-library_include_HEADERS = dictionary.h lmtable.h ngram.h
-
-bin_PROGRAMS = compile-lm
-
-AM_LDFLAGS=-L .
-LIBS=-lirstlm
-
-compile_lm_SOURCES = compile-lm.cpp
-
diff --git a/irstlm/src/cmd.c b/irstlm/src/cmd.c
deleted file mode 100644
index aeb36d7b9..000000000
--- a/irstlm/src/cmd.c
+++ /dev/null
@@ -1,661 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "cmd.h"
-
-static Enum_T BoolEnum[] = {
- { "FALSE", 0 },
- { "TRUE", 1 },
- { 0, 0 }
-};
-
-#ifdef NEEDSTRDUP
-char *strdup();
-#endif
-
-#define FALSE 0
-#define TRUE 1
-
-#define LINSIZ 10240
-#define MAXPARAM 256
-
-static char *GetLine(),
- **str2array();
-static int Scan(),
- SetParam(),
- SetEnum(),
- SetSubrange(),
- SetStrArray(),
- SetGte(),
- SetLte(),
- CmdError(),
- EnumError(),
- SubrangeError(),
- GteError(),
- LteError(),
- PrintParam(),
- PrintEnum(),
- PrintStrArray();
-
-static Cmd_T cmds[MAXPARAM+1];
-static char *SepString = " \t\n";
-
-#if defined(__STDC__)
-#include <stdarg.h>
-int DeclareParams(char *ParName, ...)
-#else
-#include <varargs.h>
-int DeclareParams(ParName, va_alist)
-char *ParName;
-va_dcl
-#endif
-{
- va_list args;
- static int ParamN = 0;
- int j,
- c;
- char *s;
-
-#if defined(__STDC__)
- va_start(args, ParName);
-#else
- va_start(args);
-#endif
- for(;ParName;) {
- if(ParamN==MAXPARAM) {
- fprintf(stderr, "Too many parameters !!\n");
- break;
- }
- for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
- ;
- if(!c) {
- fprintf(stderr,
- "Warning: parameter \"%s\" declared twice.\n",
- ParName);
- }
- for(c=ParamN; c>j; c--) {
- cmds[c] = cmds[c-1];
- }
- cmds[j].Name = ParName;
- cmds[j].Type = va_arg(args, int);
- cmds[j].Val = va_arg(args, void *);
- switch(cmds[j].Type) {
- case CMDENUMTYPE: /* get the pointer to Enum_T struct */
- cmds[j].p = va_arg(args, void *);
- break;
- case CMDSUBRANGETYPE: /* get the two extremes */
- cmds[j].p = (void*) calloc(2, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- ((int*)cmds[j].p)[1] = va_arg(args, int);
- break;
- case CMDGTETYPE: /* get lower or upper bound */
- case CMDLTETYPE:
- cmds[j].p = (void*) calloc(1, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- break;
- case CMDSTRARRAYTYPE: /* get the separators string */
- cmds[j].p = (s=va_arg(args, char*))
- ? (void*)strdup(s) : 0;
- break;
- case CMDBOOLTYPE:
- cmds[j].Type = CMDENUMTYPE;
- cmds[j].p = BoolEnum;
- break;
- case CMDDOUBLETYPE: /* nothing else is needed */
- case CMDINTTYPE:
- case CMDSTRINGTYPE:
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "DeclareParam()", "Unknown Type",
- cmds[j].Type, "for parameter", cmds[j].Name);
- exit(1);
- }
- ParamN++;
- ParName = va_arg(args, char *);
- }
- cmds[ParamN].Name = NULL;
- va_end(args);
- return 0;
-}
-
-int GetParams(n, a, CmdFileName)
-int *n;
-char ***a;
-char *CmdFileName;
-{
- char *Line,
- *ProgName;
- int argc = *n;
- char **argv = *a,
- *s;
- FILE *fp;
- int IsPipe;
-
-#ifdef MSDOS
-#define PATHSEP '\\'
- char *dot = NULL;
-#else
-#define PATHSEP '/'
-#endif
-
- if(!(Line=malloc(LINSIZ))) {
- fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
- LINSIZ);
- exit(1);
- }
- if((ProgName=strrchr(*argv, PATHSEP))) {
- ++ProgName;
- } else {
- ProgName = *argv;
- }
-#ifdef MSDOS
- if(dot=strchr(ProgName, '.')) *dot = 0;
-#endif
- --argc;
- ++argv;
- for(;;) {
- if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
- CmdFileName = argv[0]+2;
- ++argv;
- --argc;
- }
- if(!CmdFileName) {
- break;
- }
- IsPipe = !strncmp(CmdFileName, "@@", 2);
- fp = IsPipe
- ? popen(CmdFileName+2, "r")
- : strcmp(CmdFileName, "-")
- ? fopen(CmdFileName, "r")
- : stdin;
- if(!fp) {
- fprintf(stderr, "Unable to open command file %s\n",
- CmdFileName);
- exit(1);
- }
- while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
- if(Scan(ProgName, cmds, Line)) {
- CmdError(Line);
- }
- }
- if(fp!=stdin) {
- if(IsPipe) pclose(fp); else fclose(fp);
- }
- CmdFileName = NULL;
- }
- while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
- *s = ' ';
- sprintf(Line, "%s/%s", ProgName, *argv+1);
- *s = '=';
- if(Scan(ProgName, cmds, Line)) CmdError(*argv);
- --argc;
- ++argv;
- }
- *n = argc;
- *a = argv;
-#ifdef MSDOS
- if(dot) *dot = '.';
-#endif
- free(Line);
- return 0;
-}
-
-int PrintParams(ValFlag, fp)
-int ValFlag;
-FILE *fp;
-{
- int i;
-
- fflush(fp);
- if(ValFlag) {
- fprintf(fp, "Parameters Values:\n");
- } else {
- fprintf(fp, "Parameters:\n");
- }
- for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
- fprintf(fp, "\n");
- fflush(fp);
- return 0;
-}
-
-int SPrintParams(a, pfx)
-char ***a,
- *pfx;
-{
- int l,
- n;
- Cmd_T *cmd;
-
- if(!pfx) pfx="";
- l = strlen(pfx);
- for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
- a[0] = calloc(n, sizeof(char*));
- for(n=0, cmd=cmds; cmd->Name; cmd++) {
- if(!cmd->ArgStr) continue;
- a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
- sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
- ++n;
- }
- return n;
-}
-
-static int CmdError(opt)
-char *opt;
-{
- fprintf(stderr, "Invalid option \"%s\"\n", opt);
- fprintf(stderr, "This program expectes the following parameters:\n");
- PrintParams(FALSE, stderr);
- exit(0);
-}
-
-static int PrintParam(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- fprintf(fp, "%4s", "");
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDENUMTYPE:
- PrintEnum(cmd, ValFlag, fp);
- break;
- case CMDINTTYPE:
- case CMDSUBRANGETYPE:
- case CMDGTETYPE:
- case CMDLTETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDSTRINGTYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- if(*(char **)cmd->Val) {
- fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
- } else {
- fprintf(fp, ": %s", "NULL");
- }
- }
- fprintf(fp, "\n");
- break;
- case CMDSTRARRAYTYPE:
- PrintStrArray(cmd, ValFlag, fp);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "PrintParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- return 0;
-}
-
-static char *GetLine(fp, n, Line)
-FILE *fp;
-int n;
-char *Line;
-{
- int j,
- l,
- offs=0;
-
- for(;;) {
- if(!fgets(Line+offs, n-offs, fp)) {
- return NULL;
- }
- if(Line[offs]=='#') continue;
- l = strlen(Line+offs)-1;
- Line[offs+l] = 0;
- for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
- ;
- if(l<1) continue;
- if(j > offs) {
- char *s = Line+offs,
- *q = Line+j;
-
- while((*s++=*q++))
- ;
- }
- if(Line[offs+l-1]=='\\') {
- offs += l;
- Line[offs-1] = ' ';
- } else {
- break;
- }
- }
- return Line;
-}
-
-static int Scan(ProgName, cmds, Line)
-char *ProgName,
- *Line;
-Cmd_T *cmds;
-{
- char *q,
- *p;
- int i,
- hl,
- HasToMatch = FALSE,
- c0,
- c;
-
- p = Line+strspn(Line, SepString);
- if(!(hl=strcspn(p, SepString))) {
- return 0;
- }
- if((q=strchr(p, '/')) && q-p<hl) {
- *q = 0;
- if(strcmp(p, ProgName)) {
- *q = '/';
- return 0;
- }
- *q = '/';
- HasToMatch=TRUE;
- p = q+1;
- }
- if(!(hl = strcspn(p, SepString))) {
- return 0;
- }
- c0 = p[hl];
- p[hl] = 0;
- for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
- ;
- p[hl] = c0;
- if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
- return HasToMatch && c;
-}
-
-static int SetParam(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- if(!*s && cmd->Type != CMDSTRINGTYPE) {
- fprintf(stderr,
- "WARNING: No value specified for parameter \"%s\"\n",
- cmd->Name);
- return 0;
- }
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
- fprintf(stderr,
- "Float value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDENUMTYPE:
- SetEnum(cmd, s);
- break;
- case CMDINTTYPE:
- if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDSTRINGTYPE:
- *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
- ? strdup(s)
- : 0;
- break;
- case CMDSTRARRAYTYPE:
- SetStrArray(cmd, s);
- break;
- case CMDGTETYPE:
- SetGte(cmd, s);
- break;
- case CMDLTETYPE:
- SetLte(cmd, s);
- break;
- case CMDSUBRANGETYPE:
- SetSubrange(cmd, s);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "SetParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- cmd->ArgStr = strdup(s);
- return 0;
-}
-
-static int SetEnum(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && !strcmp(s, en->Name)) {
- *(int *) cmd->Val = en->Idx;
- return 0;
- }
- }
- return EnumError(cmd, s);
-}
-
-static int SetSubrange(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
- return SubrangeError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetGte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n<*(int *)cmd->p) {
- return GteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetStrArray(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- *(char***)cmd->Val = str2array(s, (char*)cmd->p);
- return 0;
-}
-
-static int SetLte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n > *(int *)cmd->p) {
- return LteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int EnumError(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- fprintf(stderr,
- "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
- fprintf(stderr, "Valid values are:\n");
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name) {
- fprintf(stderr, " %s\n", en->Name);
- }
- }
- fprintf(stderr, "\n");
- exit(1);
-}
-
-static int GteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be greater than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int LteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be less than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int SubrangeError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values range from %d to %d\n",
- *(int *)cmd->p, *((int *)cmd->p+1));
- exit(1);
-}
-
-static int PrintEnum(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- Enum_T *en;
-
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && en->Idx==*(int *)cmd->Val) {
- fprintf(fp, ": %s", en->Name);
- }
- }
- }
- fprintf(fp, "\n");
- return 0;
-}
-
-static int PrintStrArray(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- char *indent,
- **s = *(char***)cmd->Val;
- int l = 4+strlen(cmd->Name);
-
- fprintf(fp, "%s", cmd->Name);
- indent = malloc(l+2);
- memset(indent, ' ', l+1);
- indent[l+1] = 0;
- if(ValFlag) {
- fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
- if(s) while(*s) {
- fprintf(fp, "\n%s %s", indent, *s++);
- }
- }
- free(indent);
- fprintf(fp, "\n");
- return 0;
-}
-
-static char **str2array(s, sep)
-char *s,
- *sep;
-{
- char *p,
- **a;
- int n = 0,
- l;
-
- if(!sep) sep = SepString;
- p = s += strspn(s, sep);
- while(*p) {
- p += strcspn(p, sep);
- p += strspn(p, sep);
- ++n;
- }
- a = calloc(n+1, sizeof(char *));
- p = s;
- n = 0;
- while(*p) {
- l = strcspn(p, sep);
- a[n] = malloc(l+1);
- memcpy(a[n], p, l);
- a[n][l] = 0;
- ++n;
- p += l;
- p += strspn(p, sep);
- }
- return a;
-}
diff --git a/irstlm/src/cmd.h b/irstlm/src/cmd.h
deleted file mode 100644
index 708905f6f..000000000
--- a/irstlm/src/cmd.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#if !defined(CMD_H)
-
-#define CMD_H
-
-#define CMDDOUBLETYPE 1
-#define CMDENUMTYPE 2
-#define CMDINTTYPE 3
-#define CMDSTRINGTYPE 4
-#define CMDSUBRANGETYPE 5
-#define CMDGTETYPE 6
-#define CMDLTETYPE 7
-#define CMDSTRARRAYTYPE 8
-#define CMDBOOLTYPE 9
-
-typedef struct {
- char *Name;
- int Idx;
-} Enum_T;
-
-typedef struct {
- int Type;
- char *Name,
- *ArgStr;
- void *Val,
- *p;
-} Cmd_T;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__STDC__)
-int DeclareParams(char *, ...);
-#else
-int DeclareParams();
-#endif
-
-int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
-
-#ifdef __cplusplus
-}
-#endif
-#endif
-
-
-
diff --git a/irstlm/src/compile-lm.cpp b/irstlm/src/compile-lm.cpp
deleted file mode 100644
index 17c152fcf..000000000
--- a/irstlm/src/compile-lm.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit, compile LM
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <stdlib.h>
-
-#include "lmtable.h"
-
-
-/* GLOBAL OPTIONS ***************/
-std::string sn = "0";
-std::string sres = "0";
-std::string sdecay = "0.95";
-/********************************/
-
-void usage(const char *msg = 0) {
- if (msg) { std::cerr << msg << std::endl; }
- std::cerr << "Usage: compile-lm [options] input-file.lm [output-file.blm]" << std::endl;
- if (!msg) std::cerr << std::endl
- << " compile-lm reads a standard LM file in ARPA format and produces" << std::endl
- << " a compiled representation that the IRST LM toolkit can quickly" << std::endl
- << " read and process." << std::endl << std::endl;
- std::cerr << "Options:\n -r=RESOLUTION\n -d=DECAY\n -n=NGRAM SIZE <required>\n\n";
-}
-
-bool starts_with(const std::string &s, const std::string &pre) {
- if (pre.size() > s.size()) return false;
-
- if (pre == s) return true;
- std::string pre_equals(pre+'=');
- if (pre_equals.size() > s.size()) return false;
- return (s.substr(0,pre_equals.size()) == pre_equals);
-}
-
-std::string get_param(const std::string& opt, int argc, const char **argv, int& argi)
-{
- std::string::size_type equals = opt.find_first_of('=');
- if (equals != std::string::npos && equals < opt.size()-1) {
- return opt.substr(equals+1);
- }
- std::string nexto;
- if (argi + 1 < argc) {
- nexto = argv[++argi];
- } else {
- usage((opt + " requires a value!").c_str());
- exit(1);
- }
- return nexto;
-}
-
-void handle_option(const std::string& opt, int argc, const char **argv, int& argi)
-{
- if (opt == "--help" || opt == "-h") { usage(); exit(1); }
- if (starts_with(opt, "--resolution") || starts_with(opt, "-r"))
- sres = get_param(opt, argc, argv, argi);
- else if (starts_with(opt, "--decay") || starts_with(opt, "-d"))
- sdecay = get_param(opt, argc, argv, argi);
- else if (starts_with(opt, "--ngram-size") || starts_with(opt, "-n"))
- sn = get_param(opt, argc, argv, argi);
- else {
- usage(("Don't understand option " + opt).c_str());
- exit(1);
- }
-}
-
-int main(int argc, const char **argv)
-{
- if (argc < 2) { usage(); exit(1); }
- std::vector<std::string> files;
- for (int i=1; i < argc; i++) {
- std::string opt = argv[i];
- if (opt[0] == '-') { handle_option(opt, argc, argv, i); }
- else files.push_back(opt);
- }
- if (files.size() > 2) { usage("Too many arguments"); exit(1); }
- if (files.size() < 1) { usage("Please specify a LM file to read from"); exit(1); }
- double decay = strtod(sdecay.c_str(),0);
- int resolution = strtol(sres.c_str(),0,10);
- int ngram_size = strtol(sn.c_str(),0,10);
- if (ngram_size < 1) { usage("Please specify an ngram size greater than or equal 1 with -n"); exit(1); }
- std::string infile = files[0];
- if (files.size() == 1) {
- std::string::size_type p = infile.rfind('/');
- if (p != std::string::npos && ((p+1) < infile.size())) {
- files.push_back(infile.substr(p+1) + ".blm");
- } else {
- files.push_back(infile + ".blm");
- }
- }
- std::string outfile = files[1];
- std::cout << "Using decay=" << decay << ", resolution=" << resolution << std::endl;
- std::cout << "Reading " << infile << "..." << std::endl;
- std::ifstream inp(infile.c_str());
- if (!inp.good()) {
- std::cerr << "Failed to open " << infile << "!\n";
- exit(1);
- }
- lmtable lmt(inp);
- std::cout << "Saving to " << outfile << std::endl;
- lmt.savebin(outfile.c_str());
- return 0;
-}
-
diff --git a/irstlm/src/dictionary.cpp b/irstlm/src/dictionary.cpp
deleted file mode 100644
index ab53116a2..000000000
--- a/irstlm/src/dictionary.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include "mempool.h"
-#include "htable.h"
-#include "dictionary.h"
-#include "index.h"
-
-using namespace std;
-
-dictionary::dictionary(char *filename,int size,char* isymb,char* oovlexfile){
-
- // unitialized memory
- if (oovlexfile!=NULL)
- oovlex=new dictionary(oovlexfile,size,isymb,NULL);
- else
- oovlex=(dictionary *)NULL;
-
- htb = new htable(size/LOAD_FACTOR);
- tb = new dict_entry[size];
- st = new strstack(size * 10);
-
- for (int i=0;i<size;i++) tb[i].freq=0;
-
- is=(char*) NULL;
- intsymb(isymb);
-
- oov_code = -1;
- in_oov_lex=0;
- n = 0;
- N = 0;
- dubv = 0;
- lim = size;
- ifl=0; //increment flag
-
- if (filename==NULL) return;
-
- std::ifstream inp(filename,ios::in);
-
- if (!inp){
- cerr << "cannot open " << filename << "\n";
- exit(1);
- }
-
- char buffer[100];
-
- inp >> setw(100) >> buffer;
-
- inp.close();
-
- if ((strncmp(buffer,"dict",4)==0) ||
- (strncmp(buffer,"DICT",4)==0))
- load(filename);
- else
- generate(filename);
-
- cerr << "loaded \n";
-
-
-}
-
-
-
-void dictionary::generate(char *filename){
-
- char buffer[MAX_WORD];
- int k;
-
- ifstream inp(filename,ios::in);
-
- if (!inp){
- cerr << "cannot open " << filename << "\n";
- exit(1);
- }
-
- cerr << "dict:";
-
- ifl=1; k=0;
- while (inp >> setw(MAX_WORD) >> buffer){
-
- if (strlen(buffer)==(MAX_WORD-1)){
- cerr << "dictionary: a too long word was read ("
- << buffer << ")\n";
- };
-
-
- if (strlen(buffer)==0){
- cerr << "zero lenght word!\n";
- continue;
- }
-
- //if (is && (strlen(buffer)==1) && !index(is,buffer[0]))
- if (is && (strlen(buffer)==1) && (index(is,buffer[0])!=NULL))
- continue; //skip over the interruption symbol
-
- incfreq(encode(buffer),1);
-
- if (!(++k % 1000000)) cerr << ".";
- }
- ifl=0;
- cerr << "\n";
-
- inp.close();
-
-}
-
-void dictionary::load(char* filename){
- char header[100];
- char buffer[MAX_WORD];
- char *addr;
- int freqflag=0;
-
- ifstream inp(filename,ios::in);
-
- if (!inp){
- cerr << "\ncannot open " << filename << "\n";
- exit(1);
- }
-
- cerr << "dict:";
-
- inp.getline(header,100);
- if (strncmp(header,"DICT",4)==0)
- freqflag=1;
- else
- if (strncmp(header,"dict",4)!=0){
- cerr << "\ndictionary file " << filename << " has a wrong header\n";
- exit(1);
- }
-
-
- while (inp >> setw(MAX_WORD) >> buffer){
-
- if (strlen(buffer)==(MAX_WORD-1)){
- cerr << "\ndictionary: a too long word was read ("
- << buffer << ")\n";
- };
-
- tb[n].word=st->push(buffer);
- tb[n].code=n;
-
- if (freqflag)
- inp >> tb[n].freq;
- else
- tb[n].freq=0;
-
- if ((addr=htb->search((char *)&tb[n].word,HT_ENTER)))
- if (addr!=(char *)&tb[n].word){
- cerr << "dictionary::loadtxt wrong entry was found ("
- << buffer << ") in position " << n << "\n";
- exit(1);
- }
-
- N+=tb[n].freq;
-
- if (strcmp(buffer,OOV())==0) oov_code=n;
-
- if (++n==lim) grow();
-
- }
-
- inp.close();
-}
-
-
-void dictionary::load(std::istream& inp){
-
- char buffer[MAX_WORD];
- char *addr;
- int size;
-
- inp >> size;
-
- for (int i=0;i<size;i++){
-
- inp >> buffer;
-
- tb[n].word=st->push(buffer);
- tb[n].code=n;
- inp >> tb[n].freq;
- N+=tb[n].freq;
-
- if ((addr=htb->search((char *)&tb[n].word,HT_ENTER)))
- if (addr!=(char *)&tb[n].word){
- cerr << "dictionary::loadtxt wrong entry was found ("
- << buffer << ") in position " << n << "\n";
- exit(1);
- }
-
- if (strcmp(tb[n].word,OOV())==0)
- oov_code=n;
-
- if (++n==lim) grow();
- }
- inp.getline(buffer,MAX_WORD-1);
-}
-
-void dictionary::save(std::ostream& out){
- out << n << "\n";
- for (int i=0;i<n;i++)
- out << tb[i].word << " " << tb[i].freq << "\n";
-}
-
-
-int cmpdictentry(const void *a,const void *b){
- dict_entry *ae=(dict_entry *)a;
- dict_entry *be=(dict_entry *)b;
- return be->freq-ae->freq;
-}
-
-dictionary::dictionary(dictionary* d){
-
- //transfer values
-
- n=d->n; //total entries
- N=d->N; //total frequency
- lim=d->lim; //limit of entries
- oov_code=-1; //code od oov must be re-defined
- ifl=0; //increment flag=0;
- dubv=d->dubv; //dictionary upperbound transferred
- in_oov_lex=0; //does not copy oovlex;
-
-
- //creates a sorted copy of the table
-
- tb = new dict_entry[lim];
- htb = new htable(lim/LOAD_FACTOR);
- st = new strstack(lim * 10);
-
- for (int i=0;i<n;i++){
- tb[i].code=d->tb[i].code;
- tb[i].freq=d->tb[i].freq;
- tb[i].word=st->push(d->tb[i].word);
- }
-
- //sort all entries according to frequency
- cerr << "sorting dictionary ...";
- qsort(tb,n,sizeof(dict_entry),cmpdictentry);
- cerr << "done\n";
-
- for (int i=0;i<n;i++){
-
- //eventually re-assign oov code
- if (d->oov_code==tb[i].code) oov_code=i;
-
- tb[i].code=i;
- htb->search((char *)&tb[i].word,HT_ENTER);
- };
-
-}
-
-
-
-dictionary::~dictionary(){
- delete htb;
- delete st;
- delete [] tb;
-}
-
-void dictionary::stat(){
- cout << "dictionary class statistics\n";
- cout << "size " << n
- << " used memory "
- << (lim * sizeof(int) +
- htb->used() +
- st->used())/1024 << " Kb\n";
-}
-
-void dictionary::grow(){
-
- delete htb;
-
- cerr << "+\b";
-
- dict_entry *tb2=new dict_entry[lim+GROWTH_STEP];
-
- memcpy(tb2,tb,sizeof(dict_entry) * lim );
-
- delete [] tb; tb=tb2;
-
- htb=new htable((lim+GROWTH_STEP)/LOAD_FACTOR);
-
- for (int i=0;i<lim;i++)
-
- htb->search((char *)&tb[i].word,HT_ENTER);
-
- for (int i=lim;i<lim+GROWTH_STEP;i++) tb[i].freq=0;
-
- lim+=GROWTH_STEP;
-
-
-}
-
-void dictionary::save(char *filename,int freqflag){
-
- std::ofstream out(filename,ios::out);
-
- if (!out){
- cerr << "cannot open " << filename << "\n";
- }
-
- // header
- if (freqflag)
- out << "DICTIONARY 0 " << n << "\n";
- else
- out << "dictionary 0 " << n << "\n";
-
- for (int i=0;i<n;i++){
- out << tb[i].word;
- if (freqflag)
- out << " " << tb[i].freq;
- out << "\n";
- }
-
- out.close();
-}
-
-
-int dictionary::getcode(const char *w){
- dict_entry* ptr=(dict_entry *)htb->search((char *)&w,HT_FIND);
- if (ptr==NULL) return -1;
- return ptr->code;
-}
-
-int dictionary::encode(const char *w){
-
- //case of strange characters
- if (strlen(w)==0){cerr << "0";w=OOV();}
-
- dict_entry* ptr;
-
- if ((ptr=(dict_entry *)htb->search((char *)&w,HT_FIND))!=NULL)
- return ptr->code;
- else{
- if (!ifl){ //do not extend dictionary
- if (oov_code==-1){ //did not use OOV yet
- cerr << "starting to use OOV words [" << w << "]\n";
- tb[n].word=st->push(OOV());
- htb->search((char *)&tb[n].word,HT_ENTER);
- tb[n].code=n;
- tb[n].freq=0;
- oov_code=n;
- if (++n==lim) grow();
- }
- //if there is an oov lexicon, check if this word belongs to
- dict_entry* oovptr;
- if (oovlex){
- if ((oovptr=(dict_entry *)oovlex->htb->search((char *)&w,HT_FIND))!=NULL){
- in_oov_lex=1;
- oov_lex_code=oovptr->code;
- }else
- in_oov_lex=0;
- }
- return encode(OOV());
- }
- else{ //extend dictionary
- tb[n].word=st->push((char *)w);
- htb->search((char *)&tb[n].word,HT_ENTER);
- tb[n].code=n;
- tb[n].freq=0;
- if (++n==lim) grow();
- return n-1;
- }
- }
-}
-
-
-char *dictionary::decode(int c){
- if (c>=0 && c < n)
- return tb[c].word;
- else{
- cerr << "decode: code out of boundary\n";
- return OOV();
- }
-}
-
-
-dictionary_iter::dictionary_iter(dictionary *dict) : m_dict(dict) {
- m_dict->htb->scan(HT_INIT);
-}
-
-dict_entry* dictionary_iter::next() {
- return (dict_entry*)m_dict->htb->scan(HT_CONT);
-}
-
-
-
-
-
-/*
-main(int argc,char **argv){
- dictionary d(argv[1],40000);
- d.stat();
- cout << "ROMA" << d.decode(0) << "\n";
- cout << "ROMA:" << d.encode("ROMA") << "\n";
- d.save(argv[2]);
-}
-*/
diff --git a/irstlm/src/dictionary.h b/irstlm/src/dictionary.h
deleted file mode 100644
index 494b240d2..000000000
--- a/irstlm/src/dictionary.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-/*
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef MF_DICTIONARY_H
-#define MF_DICTIONARY_H
-
-#include <string.h>
-#include <iostream>
-
-#define MAX_WORD 100
-#define LOAD_FACTOR 5
-
-#ifndef GROWTH_STEP
-#define GROWTH_STEP 100000
-#endif
-
-#ifndef DICT_INITSIZE
-#define DICT_INITSIZE 100000
-#endif
-
-
-//Begin of sentence symbol
-#ifndef BOS_
-#define BOS_ "<s>"
-#endif
-
-
-//End of sentence symbol
-#ifndef EOS_
-#define EOS_ "</s>"
-#endif
-
-//End of sentence symbol
-#ifndef OOV_
-#define OOV_ "_unk_"
-#endif
-
-
-typedef struct{
- char *word;
- int code;
- int freq;
-}dict_entry;
-
-class strstack;
-class htable;
-
-class dictionary{
- strstack *st; //!< stack of strings
- dict_entry *tb; //!< entry table
- htable *htb; //!< hash table
- int n; //!< number of entries
- int N; //!< total frequency
- int lim; //!< limit of entries
- int oov_code; //!< code assigned to oov words
- char* is; //!< interruption symbol list
- char ifl; //!< increment flag
- int dubv; //!< dictionary size upper bound
- int in_oov_lex; //!< flag
- int oov_lex_code; //< dictionary
- char* oov_str; //!< oov string
-
- public:
-
- friend class dictionary_iter;
-
- dictionary* oovlex; //<! additional dictionary
-
- inline int dub(){return dubv;}
- inline int dub(int value){return (dubv=value);}
-
- inline char *OOV(){return (OOV_);}
- inline char *BoS(){return (BOS_);}
- inline char *EoS(){return (EOS_);}
-
- inline int oovcode(int v=-1){return oov_code=(v>=0?v:oov_code);}
-
- inline char *intsymb(char* isymb=NULL){
- if (isymb==NULL) return is;
- if (is!=NULL) delete [] is;
- is=new char[strlen(isymb+1)];
- strcpy(is,isymb);
- return is=isymb;
- }
-
- inline int incflag(){return ifl;}
- inline int incflag(int v){return ifl=v;}
- inline int oovlexsize(){return oovlex?oovlex->n:0;}
- inline int inoovlex(){return in_oov_lex;}
- inline int oovlexcode(){return oov_lex_code;}
-
-
- int isprintable(char* w){
- char buffer[MAX_WORD];
- sprintf(buffer,"%s",w);
- return strcmp(w,buffer)==0;
- }
-
- inline void genoovcode(){
- int c=encode(OOV());
- std::cerr << "OOV code is "<< c << std::endl;
- oovcode(c);
- }
-
- inline dictionary* oovlexp(char *fname=NULL){
- if (fname==NULL) return oovlex;
- if (oovlex!=NULL) delete oovlex;
- oovlex=new dictionary(fname,DICT_INITSIZE);
- return oovlex;
- }
-
- inline int setoovrate(double oovrate){
- encode(OOV()); //be sure OOV code exists
- int oovfreq=(int)(oovrate * totfreq());
- std::cerr << "setting OOV rate to: " << oovrate << " -- freq= " << oovfreq << std::endl;
- return freq(oovcode(),oovfreq);
-
- return 1;
- }
-
-
- inline int incfreq(int code,int value){N+=value;return tb[code].freq+=value;}
-
- inline int multfreq(int code,double value){
- N+=(int)(value * tb[code].freq)-tb[code].freq;
- return tb[code].freq=(int)(value * tb[code].freq);
- }
-
- inline int freq(int code,int value=-1){
- if (value>=0){
- N+=value-tb[code].freq;
- tb[code].freq=value;
- }
- return tb[code].freq;
- }
-
- inline int totfreq(){return N;}
-
- void grow();
- //dictionary(int size=400,char* isym=NULL,char* oovlex=NULL);
- dictionary(char *filename=NULL,int size=DICT_INITSIZE,char* isymb=NULL,char* oovlex=NULL);
- dictionary(dictionary* d);
-
- ~dictionary();
- void generate(char *filename);
- void load(char *filename);
- void save(char *filename,int freqflag=0);
- void load(std::istream& fd);
- void save(std::ostream& fd);
-
- int size(){return n;};
- int getcode(const char *w);
- int encode(const char *w);
- char *decode(int c);
- void stat();
-
- void cleanfreq(){
- for (int i=0;i<n;tb[i++].freq=0);
- N=0;
- }
-
-};
-
-class dictionary_iter {
- public:
- dictionary_iter(dictionary *dict);
- dict_entry* next();
- private:
- dictionary* m_dict;
-};
-
-#endif
-
diff --git a/irstlm/src/htable.cpp b/irstlm/src/htable.cpp
deleted file mode 100644
index 1f56723a2..000000000
--- a/irstlm/src/htable.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iostream>
-#include <assert.h>
-#include "mempool.h"
-#include "htable.h"
-
-using namespace std;
-
-htable::htable(int n,int kl,HTYPE ht,size_t (*klf)(const char* )){
-
- memory=new mempool( sizeof(entry) , BlockSize );
-
- table = new entry* [ size=n ];
-
- memset(table,0,sizeof(entry *) * n );
-
- keylen=kl;
-
- htype=ht;
-
- keys = accesses = collisions = 0;
-
- keylenfunc=(klf?klf:&strlen);
-
-}
-
-
-char *htable::search(char *item, HT_ACTION action)
-
-{
- address h;
- entry *q,**p;
- int i;
-
- //if (action == HT_FIND)
- accesses++;
-
- h = Hash(item);
-
- i=(h % size);
-
- p = &table[h % size];
-
- q=*p;
-
- /*
- ** Follow collision chain
- */
-
- while (q != NULL && Comp((char *)q->key,(char *)item))
- {
- p = (entry **)&q->next;
- q=*p;
- //if (action == HT_FIND)
- collisions++;
- }
-
- if (
- q != NULL /* found */
- ||
- action == HT_FIND /* not found, search only */
- ||
- (q = (entry *)memory->alloc())
- ==
- NULL /* not found, no room */
- )
-
- return((q!=NULL)?(char *)q->key:(char *)NULL);
-
- *p = q; /* link into chain */
- /*
- ** Initialize new element
- */
-
- q->key = item;
- q->next = NULL;
- keys++;
-
- return((char *)q->key);
-}
-
-
-char *htable::scan(HT_ACTION action){
-
- char *k;
-
- if (action == HT_INIT)
- {
- scan_i=0;scan_p=table[0];
- return NULL;
- }
-
- // if scan_p==NULL go to the first non null pointer
- while ((scan_p==NULL) && (++scan_i<size)) scan_p=table[scan_i];
-
- if (scan_p!=NULL)
- {
- k=scan_p->key;
- scan_p=(entry *)scan_p->next;
- return k;
- };
-
- return NULL;
-}
-
-
-void htable::map(ostream& co,int cols){
-
- entry *p;
- char* img=new char[cols+1];
-
- img[cols]='\0';
- memset(img,'.',cols);
-
- co << "htable memory map: . (0 items), - (<5), # (>5)\n";
-
- for (int i=0; i<size;i++)
- {
- int n=0;p=table[i];
-
- while(p!=NULL){
- n++;
- p=(entry *)p->next;
- };
-
- if (i && (i % cols)==0){
- co << img << "\n";
- memset(img,'.',cols);
- }
-
- if (n>0)
- img[i % cols]=n<=5?'-':'#';
-
- }
-
- img[size % cols]='\0';
- co << img << "\n";
-
- delete []img;
-}
-
-
-void htable::stat(){
- cout << "htable class statistics\n";
- cout << "size " << size
- << " keys " << keys
- << " acc " << accesses
- << " coll " << collisions
- << " used memory " << used()/1024 << "Kb\n";
-}
-
-htable::~htable()
-{
- delete [] table;
- delete memory;
-}
-
-address htable::Hash(char *key)
-{
- char *Key=(htype==STRPTR? *(char **)key:key);
- int length=(keylen?keylen:keylenfunc(Key));
-
- //cerr << "hash: " << Key << " length:" << length << "\n";
-
- register address h=0;
- register int i;
-
- for (i=0,h=0;i<length;i++)
- h = h * Prime1 ^ (Key[i] - ' ');
- h %= Prime2;
-
- return h;
-}
-
-
-int htable::Comp(char *key1, char *key2)
-{
- assert(key1 && key2);
-
- char *Key1=(htype==STRPTR?*(char **)key1:key1);
- char *Key2=(htype==STRPTR?*(char **)key2:key2);
-
- assert(Key1 && Key2);
-
- int length1=(keylen?keylen:keylenfunc(Key1));
- int length2=(keylen?keylen:keylenfunc(Key2));
-
- if (length1!=length2) return 1;
-
- register int i;
-
- for (i=0;i<length1;i++)
- if (Key1[i]!=Key2[i]) return 1;
- return 0;
-}
-
-
-
-/*
-main(){
-
-const int n=1000;
-
-htable *ht=new htable(1000/5);
-
- char w[n][20];
- char *c;
-
- for (int i=0;i<n;i++)
- {
- sprintf(w[i],"ciao%d",i);
- ht->search((char *)&w[i],HT_ENTER);
- }
-
- for (int i=0;i<n;i++)
- if (ht->search((char *)&w[i],HT_FIND))
- cout << w[i] << " trovato\n" ;
- else
- cout << w[i] << " non trovato\n";
-
- ht->stat();
-
- delete ht;
- htable *ht2=new htable(n);
- for (int i=0;i<n;i++)
- ht2->search((char *)&w[i],HT_ENTER);
-
- ht2->scan(INIT);
- cout << "elenco:\n";
- while ((c=ht2->scan(CONT))!=NULL)
- cout << *(char **) c << "\n";
-
- ht2->map();
-}
-*/
-
-
-
-
-
-
-
diff --git a/irstlm/src/htable.h b/irstlm/src/htable.h
deleted file mode 100644
index 3fd484094..000000000
--- a/irstlm/src/htable.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-/*
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef MF_HTABLE_H
-#define MF_HTABLE_H
-
-#include <iostream>
-
-#define Prime1 37
-#define Prime2 1048583
-#define BlockSize 100
-
-
-// Fast arithmetic, relying on powers of 2,
-// and on pre-processor concatenation property
-
-typedef struct{
- char* key;
- char* next; // secret from user
-}entry;
-
-typedef unsigned int address;
-
-typedef enum {HT_FIND, //!< search: find an entry
- HT_ENTER, //!< search: enter an entry
- HT_INIT, //!< scan: start scan
- HT_CONT //!< scan: continue scan
-} HT_ACTION;
-
-typedef enum {STR, //!< string
- STRPTR //!< pointer to string
-}HTYPE;
-
-//! Hash Table for strings
-
-class htable {
- int size; //!< table size
- int keylen; //!< key length
- HTYPE htype; //!< type of entry pointer
- entry **table; //!< hash table
- int scan_i; //!< scan support
- entry *scan_p; //!< scan support
- // statistics
- long keys; //!< # of entries
- long accesses; //!< # of accesses
- long collisions; //!< # of collisions
-
- mempool *memory; //!< memory pool
-
- size_t (*keylenfunc)(const char*); //!< function computing key length
-
- public:
-
- //! Creates an hash table
- htable(int n,int kl=0,HTYPE ht=STRPTR,size_t (*klf)(const char* )=NULL);
-
- //! Destroys an and hash table
- ~htable();
-
- //! Computes the hash function
- address Hash(char *key);
-
- //! Compares the keys of two entries
- int Comp(char *Key1,char *Key2);
-
- //! Searches for an item
- char *search(char *item, HT_ACTION action);
-
- //! Scans the content
- char *scan(HT_ACTION action);
-
- //! Prints statistics
- void stat();
-
- //! Print a map of memory use
- void map(std::ostream& co=std::cout, int cols=80);
-
- //! Returns amount of used memory
- int used(){return
- size * sizeof(entry **) +
- memory->used();};
-};
-
-
-
-#endif
-
-
-
diff --git a/irstlm/src/index.h b/irstlm/src/index.h
deleted file mode 100644
index 500587989..000000000
--- a/irstlm/src/index.h
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-#pragma once
-
-#ifdef WIN32
-
-inline const char *index(const char *str, char search)
-{
- int i=0;
- while (i< strlen(str) ){
- if (str[i]==search) return &str[i];
- }
- return NULL;
-}
-
-
-#endif
-
-
diff --git a/irstlm/src/lmtable.cpp b/irstlm/src/lmtable.cpp
deleted file mode 100644
index 997102da2..000000000
--- a/irstlm/src/lmtable.cpp
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <iostream>
-#include <stdexcept>
-#include <assert.h>
-
-#include "math.h"
-#include "mempool.h"
-#include "htable.h"
-#include "dictionary.h"
-#include "ngram.h"
-#include "lmtable.h"
-
-using namespace std;
-
-inline void error(char* message){
- cerr << message << "\n";
- throw std::runtime_error(message);
-}
-
-lmtable::lmtable(std::istream& inp){
-
- //initialization
- maxlev=1;
-
- memset(cursize, 0, sizeof(cursize));
- memset(tbltype, 0, sizeof(tbltype));
- memset(maxsize, 0, sizeof(maxsize));
- memset(info, 0, sizeof(info));
- memset(NumCenters, 0, sizeof(NumCenters));
-
- dict=new dictionary((char *)NULL,1000000,(char*)NULL,(char*)NULL);
-
- //default settings is a non quantized lmtable
- configure(1,isQtable=0);
-
- char header[1024];
-
- inp >> header; cerr << header << "\n";
-
- if (strncmp(header,"Qblmt",5)==0 || strncmp(header,"blmt",4)==0)
- loadbin(inp, header);
- else
- loadtxt(inp, header);
-
- dict->genoovcode();
-
- cerr << "OOV code is " << dict->oovcode() << "\n";
-}
-
-
-int parseWords(char *sentence, char **words, int max)
-{
- char *word;
- int i = 0;
-
- char *const wordSeparators = " \t\r\n";
-
- for (word = strtok(sentence, wordSeparators);
- i < max && word != 0;
- i++, word = strtok(0, wordSeparators))
- {
- words[i] = word;
- }
-
- if (i < max){words[i] = 0;}
-
- return i;
-}
-
-
-
-//Load a LM as a text file. LM could have been generated either with the
-//IRST LM toolkit or with the SRILM Toolkit. In the latter we are not
-//sure that n-grams are lexically ordered (according to the 1-grams).
-//However, we make the following assumption:
-//"all successors of any prefix are sorted and written in contiguous lines!"
-//This method also loads files processed with the quantization
-//tool: qlm
-
-void parseline(std::istream& inp, int Order,ngram& ng,float& prob,float& bow){
-
- char* words[1+ LMTMAXLEV + 1 + 1];
- int howmany;
- char line[1024];
-
- inp.getline(line,1024);
-
- howmany = parseWords(line, words, Order + 3);
- assert(howmany == (Order+ 1) || howmany == (Order + 2));
-
- //read words
- ng.size=0;
- for (int i=1;i<=Order;i++)
- ng.pushw(strcmp(words[i],"<unk>")?words[i]:ng.dict->OOV());
- //read logprob/code and logbow/code
- assert(sscanf(words[0],"%f",&prob));
- if (howmany==(Order+2))
- assert(sscanf(words[Order+1],"%f",&bow));
- else
- bow=0.0; //this is log10prob=0 for implicit backoff
-}
-
-
-void lmtable::loadcenters(std::istream& inp,int Order){
- char line[11];
-
- //first read the coodebook
- cerr << Order << " read code book ";
- inp >> NumCenters[Order];
- Pcenters[Order]=new float[NumCenters[Order]];
- Bcenters[Order]=(Order<maxlev?new float[NumCenters[Order]]:NULL);
-
- for (int c=0;c<NumCenters[Order];c++){
- inp >> Pcenters[Order][c];
- if (Order<maxlev) inp >> Bcenters[Order][c];
- };
- //empty the last line
- inp.getline((char*)line,10);
-
-}
-
-
-void lmtable::loadtxt(std::istream& inp, const char* header){
-
- //open input stream and prepare an input string
- char line[1024];
-
- //prepare word dictionary
- //dict=(dictionary*) new dictionary(NULL,1000000,NULL,NULL);
- dict->incflag(1);
-
- //put here ngrams, log10 probabilities or their codes
- ngram ng(dict);
- float prob,bow,log10=(float)log(10.0);
-
- //check the header to decide if the LM is quantized or not
- isQtable=(strncmp(header,"qARPA",5)==0?true:false);
-
- //we will configure the table later we we know the maxlev;
- bool yetconfigured=false;
-
- cerr << "loadtxt()\n";
-
- // READ ARPA Header
- int Order, n;
-
- while (inp.getline(line,1024)){
-
- bool backslash = (line[0] == '\\');
-
- if (sscanf(line, "ngram %d=%d", &Order, &n) == 2) {
- maxsize[Order] = n; maxlev=Order; //upadte Order
- }
-
- if (backslash && sscanf(line, "\\%d-grams", &Order) == 1) {
-
- //at this point we are sure about the size of the LM
- if (!yetconfigured) {configure(maxlev,isQtable);yetconfigured=true;}
-
- cerr << Order << "-grams: reading ";
-
- if (isQtable) loadcenters(inp,Order);
-
- //allocate space for loading the table of this level
- table[Order]= new char[maxsize[Order] * nodesize(tbltype[Order])];
-
- //allocate support vector to manage badly ordered n-grams
- if (maxlev>1) {
- startpos[Order]=new int[maxsize[Order]];
- for (int c=0;c<maxsize[Order];c++) startpos[Order][c]=-1;
- }
-
- //prepare to read the n-grams entries
- cerr << maxsize[Order] << " entries\n";
-
- //WE ASSUME A WELL STRUCTURED FILE!!!
-
- for (int c=0;c<maxsize[Order];c++){
-
- parseline(inp,Order,ng,prob,bow);
-
- //add to table
- add(ng,
- (int)(isQtable?prob:exp(prob * log10)*UNIGRAM_RESOLUTION),
- (int)(isQtable?bow:exp(bow * log10)*UNIGRAM_RESOLUTION));
- }
- // now we can fix table at level Order -1
- if (maxlev>1 && Order>1) checkbounds(Order-1);
- }
- }
-
- dict->incflag(0);
- cerr << "done\n";
-
-}
-
-//set all bounds of entries with no successors to the bound
-//of the previous entry.
-
-void lmtable::checkbounds(int level){
-
- char* tbl=table[level];
- char* succtbl=table[level+1];
-
- LMT_TYPE ndt=tbltype[level], succndt=tbltype[level+1];
- int ndsz=nodesize(ndt), succndsz=nodesize(succndt);
-
- //re-order table at level+1
- char* newtbl=new char[succndsz * cursize[level+1]];
- int start,end,newstart;
-
- //re-order table at
- newstart=0;
- for (int c=0;c<cursize[level];c++){
- start=startpos[level][c]; end=bound(tbl+c*ndsz,ndt);
- //is start==-1 there are no successors for this entry and end==-2
- if (end==-2) end=start;
- assert(start<=end);
- assert(newstart+(end-start)<=cursize[level+1]);
- assert(end<=cursize[level+1]);
-
- if (start<end)
- memcpy((void*)(newtbl + newstart * succndsz),
- (void*)(succtbl + start * succndsz),
- (end-start) * succndsz);
-
- bound(tbl+c*ndsz,ndt,newstart+(end-start));
- newstart+=(end-start);
- }
- delete [] table[level+1];
- table[level+1]=newtbl;
- newtbl=NULL;
-}
-
-//Add method inserts n-grams in the table structure. It is ONLY used during
-//loading of LMs in text format. It searches for the prefix, then it adds the
-//suffix to the last level and updates the start-end positions.
-
-int lmtable::add(ngram& ng,int iprob,int ibow){
-
- char *found; LMT_TYPE ndt; int ndsz;
-
- if (ng.size>1){
-
- // find the prefix starting from the first level
- int start=0, end=cursize[1];
-
- for (int l=1;l<ng.size;l++){
-
- ndt=tbltype[l]; ndsz=nodesize(ndt);
-
- if (search(table[l] + (start * ndsz),ndt,l,(end-start),ndsz,
- ng.wordp(ng.size-l+1),LMT_FIND, &found)){
-
- //update start-end positions for next step
- if (l< (ng.size-1)){
- //set start position
- if (found==table[l]) start=0; //first pos in table
- else start=bound(found - ndsz,ndt); //end of previous entry
-
- //set end position
- end=bound(found,ndt);
- }
- }
- else{
- cerr << "warning: missing back-off for ngram " << ng << "\n";
- return 0;
- }
- }
-
- // update book keeping information about level ng-size -1.
- // if this is the first successor update start position
- int position=(found-table[ng.size-1])/ndsz;
- if (startpos[ng.size-1][position]==-1)
- startpos[ng.size-1][position]=cursize[ng.size];
-
- //always update ending position
- bound(found,ndt,cursize[ng.size]+1);
- //cout << "startpos: " << startpos[ng.size-1][position]
- //<< " endpos: " << bound(found,ndt) << "\n";
-
- }
-
- // just add at the end of table[ng.size]
-
- assert(cursize[ng.size]< maxsize[ng.size]); // is there enough space?
- ndt=tbltype[ng.size];ndsz=nodesize(ndt);
-
- found=table[ng.size] + (cursize[ng.size] * ndsz);
- word(found,*ng.wordp(1));
- prob(found,ndt,iprob);
- if (ng.size<maxlev){bow(found,ndt,ibow);bound(found,ndt,-2);}
-
- cursize[ng.size]++;
-
- return 1;
-
-}
-
-
-void *lmtable::search(char* tb,
- LMT_TYPE ndt,
- int lev,
- int n,
- int sz,
- int *ngp,
- LMT_ACTION action,
- char **found){
-
- //prepare search pattern
- char w[LMTCODESIZE];putmem(w,ngp[0],0,LMTCODESIZE);
-
- int idx=0; // index returned by mybsearch
- if (found) *found=NULL; //initialize output variable
- switch(action){
- case LMT_FIND:
- if (!tb || !mybsearch(tb,n,sz,(unsigned char *)w,&idx))
- return 0;
- else
- if (found) *found=tb + (idx * sz);
- return tb + (idx * sz);
- default:
- error("lmtable::search: this option is available");
- };
-
- return (void *)0x0;
-}
-
-
-int lmtable::mybsearch(char *ar, int n, int size,
- unsigned char *key, int *idx)
-{
- register int low, high;
- register unsigned char *p;
- register int result;
- register int i;
-
- /* return idx with the first
- position equal or greater than key */
-
- /* Warning("start bsearch \n"); */
-
- low = 0;high = n; *idx=0;
- while (low < high)
- {
- *idx = (low + high) / 2;
- p = (unsigned char *) (ar + (*idx * size));
-
- //comparison
- for (i=(LMTCODESIZE-1);i>=0;i--){
- result=key[i]-p[i];
- if (result) break;
- }
-
- if (result < 0)
- high = *idx;
- else if (result > 0)
- low = *idx + 1;
- else
- return 1;
- }
-
- *idx=low;
-
- return 0;
-
-}
-
-
-// saves a LM table in text format
-
-void lmtable::savetxt(const char* filename){
-
- fstream out(filename,ios::out);
- int l;
-
- out.precision(6);
-
- if (isQtable) out << "qARPA\n";
-
-
- ngram ng(dict,0);
-
- cerr << "savetxt()\n";
-
- out << "\n\\data\\\n";
- for (l=1;l<=maxlev;l++){
- out << "ngram " << l << "= " << cursize[l] << "\n";
- }
-
- for (l=1;l<=maxlev;l++){
-
- out << "\n\\" << l << "-grams:\n";
- cerr << "save: " << cursize[l] << " " << l << "-grams\n";
- if (isQtable){
- out << NumCenters[l] << "\n";
- for (int c=0;c<NumCenters[l];c++){
- out << Pcenters[l][c];
- if (l<maxlev) out << " " << Bcenters[l][c];
- out << "\n";
- }
- }
-
- ng.size=0;
- dumplm(out,ng,1,l,0,cursize[1]);
-
- }
-
- out << "\\end\\\n";
- cerr << "done\n";
-}
-
-
-void lmtable::savebin(const char *filename){
-
- fstream out(filename,ios::out);
- cerr << "savebin: " << filename << "\n";
-
- // print header
- if (isQtable){
- out << "Qblmt " << maxlev;
- for (int i=1;i<=maxlev;i++) out << " " << cursize[i];
- out << "\nNumCenters";
- for (int i=1;i<=maxlev;i++) out << " " << NumCenters[i];
- out << "\n";
-
- }else{
- out << "blmt " << maxlev;
- for (int i=1;i<=maxlev;i++) out << " " << cursize[i] ;
- out << "\n";
- }
-
- dict->save(out);
-
- for (int i=1;i<=maxlev;i++){
- cerr << "saving " << cursize[i] << " " << i << "-grams\n";
- if (isQtable){
- out.write((char*)Pcenters[i],NumCenters[i] * sizeof(float));
- if (i<maxlev)
- out.write((char *)Bcenters[i],NumCenters[i] * sizeof(float));
- }
- out.write(table[i],cursize[i]*nodesize(tbltype[i]));
- }
-
- cerr << "done\n";
-}
-
-
-void lmtable::loadbin(std::istream& inp, const char *header){
-
- cerr << "loadbin()\n";
-
- // read header
- inp >> maxlev;
-
- if (strncmp(header,"Qblmt",5)==0) isQtable=1;
- else if(strncmp(header,"blmt",4)==0) isQtable=0;
- else error("loadbin: wrong header");
-
- configure(maxlev,isQtable);
-
- for (int i=1;i<=maxlev;i++){
- inp >> cursize[i]; maxsize[i]=cursize[i];
- table[i]=new char[cursize[i] * nodesize(tbltype[i])];
- }
-
- if (isQtable){
- cerr << "reading num centers:";
- char tmp[1024];
- inp >> tmp;
- for (int i=1;i<=maxlev;i++){
- inp >> NumCenters[i];cerr << " " << NumCenters[i];
- Pcenters[i]=new float [NumCenters[i]];
- Bcenters[i]=(i<maxlev?new float [NumCenters[i]]:NULL);
- }
- cerr << "\n";
- }
-
- //dict=new dictionary(NULL,1000000,NULL,NULL);
- dict->load(inp);
-
- for (int i=1;i<=maxlev;i++){
- if (isQtable){
- inp.read((char*)Pcenters[i],NumCenters[i] * sizeof(float));
- if (i<maxlev) inp.read((char *)Bcenters[i],NumCenters[i]*sizeof(float));
- }
- cerr << "loading " << cursize[i] << " " << i << "-grams\n";
- inp.read(table[i],cursize[i]*nodesize(tbltype[i]));
- }
-
- cerr << "done\n";
-}
-
-
-
-int lmtable::get(ngram& ng,int n,int lev){
-
- // cout << "cerco:" << ng << "\n";
-
- if (lev > maxlev) error("get: lev exceeds maxlevel");
- if (n < lev) error("get: ngram is too small");
-
- //set boudaries for 1-gram
- int offset=0,limit=cursize[1];
-
- //information of table entries
- char* found; LMT_TYPE ndt;
-
- for (int l=1;l<=lev;l++){
-
- //initialize entry information
- found = NULL; ndt=tbltype[l];
-
- //search in table at level i
- search(table[l] + (offset * nodesize(ndt)),
- ndt,
- l,
- (limit-offset),
- nodesize(ndt),
- ng.wordp(n-l+1),
- LMT_FIND,
- &found);
-
- if (!found) return 0;
-
- if (l<maxlev){ //set start/end point for next search
-
- //if current offset is at the bottom also that of successors will be
- if (offset+1==cursize[l]) limit=cursize[l+1];
- else limit=bound(found,ndt);
-
- //if current start is at the begin, then also that of successors will be
- if (found==table[l]) offset=0;
- else offset=bound((found - nodesize(ndt)),ndt);
-
- assert(offset!=-1); assert(limit!=-1);
- }
- }
-
- //put information inside ng
- ng.size=n; ng.lev=lev; ng.freq=0; ng.link=found; ng.info=ndt;
- ng.succ=(lev<maxlev?limit-offset:0);
-
- return 1;
-}
-
-
-//recursively prints the language model table
-
-void lmtable::dumplm(std::ostream& out,ngram ng, int ilev, int elev, int ipos,int epos){
-
- LMT_TYPE ndt=tbltype[ilev];
- int ndsz=nodesize(ndt);
- float log10=log(10.0);
-
- assert(ng.size==ilev-1);
- assert(ipos>=0 && epos<=cursize[ilev] && ipos<epos);
- ng.pushc(0);
-
- for (int i=ipos;i<epos;i++){
- *ng.wordp(1)=word(table[ilev]+i*ndsz);
- if (ilev<elev){
- //get first and last successor position
- int isucc=(i>0?bound(table[ilev]+(i-1)*ndsz,ndt):0);
- int esucc=bound(table[ilev]+i*ndsz,ndt);
- if (isucc < esucc) //there are successors!
- dumplm(out,ng,ilev+1,elev,isucc,esucc);
- //else
- //cout << "no successors for " << ng << "\n";
- }
- else{
- //out << i << " "; //this was just to count printed n-grams
- int ipr=prob(table[ilev]+ i * ndsz,ndt);
- out << (isQtable?ipr:log((ipr+1)/UNIGRAM_RESOLUTION)/log10) <<"\t";
- for (int k=ng.size;k>=1;k--){
- if (k<ng.size) out << " ";
- out << dict->decode(*ng.wordp(k));
- }
- int ibo=(int)(ilev<maxlev?bow(table[ilev]+ i * ndsz,ndt):UNIGRAM_RESOLUTION);
- if (ibo!=UNIGRAM_RESOLUTION)
- out << "\t" << (isQtable?ibo:log((ibo+1)/UNIGRAM_RESOLUTION)/log10);
- out << "\n";
- }
- }
-}
-
-//succscan iteratively returns all successors of an ngram h for which
-//get(h,h.size,h.size) returned true.
-
-
-int lmtable::succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
- assert(lev==h.lev+1 && h.size==lev && lev<=maxlev);
-
- LMT_TYPE ndt=tbltype[h.lev];
- int ndsz=nodesize(ndt);
-
- switch (action){
-
- case LMT_INIT:
- //reset ngram local indexes
-
- ng.size=lev;
- ng.trans(h);
- ng.midx[lev]=(h.link>table[h.lev]?bound(h.link-ndsz,ndt):0);
-
- return 1;
-
- case LMT_CONT:
-
- if (ng.midx[lev]<bound(h.link,ndt))
- {
- //put current word into ng
- *ng.wordp(1)=word(table[lev]+ng.midx[lev]*nodesize(tbltype[lev]));
- ng.midx[lev]++;
- return 1;
- }
- else
- return 0;
-
- default:
- cerr << "succscan: only permitted options are LMT_INIT and LMT_CONT\n";
- exit(0);
- }
-
-}
-
-//maxsuffptr returns the largest suffix of an n-gram that is contained
-//in the LM table. This can be used as a compact representation of the
-//(n-1)-gram state of a n-gram LM. if the input k-gram has k>=n then it
-//is trimmed to its n-1 suffix.
-
-const char *lmtable::maxsuffptr(ngram ong){
-
- if (ong.size==0) return (char*) NULL;
- if (ong.size>=maxlev) ong.size=maxlev-1;
-
- ngram ng(dict); //eventually use the <unk> word
- ng.trans(ong);
-
- if (get(ng,ng.size,ng.size))
- return ng.link;
- else{
- ong.size--;
-#ifndef WIN32
-#warning maxsuffptr is not implemented
-#endif
- exit(1);
-// return getstate(ong);
- }
-}
-
-
-// returns the probability of an n-gram
-
-double lmtable::prob(const ngram& ong){
-
- if (ong.size==0) return 0.0;
-
- ngram ng(dict);
- ng.trans(ong);
- if (ong.size>maxlev) ng.size=maxlev;
-
- double rbow;
- int ibow,iprob;
- LMT_TYPE ndt;
-
- if (get(ng,ng.size,ng.size)){
- ndt=(LMT_TYPE)ng.info; iprob=prob(ng.link,ndt);
- return (double)(isQtable?Pcenters[ng.size][iprob]
- :(iprob+1.0)/UNIGRAM_RESOLUTION);
- }
- else{ //size==1 means an OOV word
- if (ng.size==1) return (double)1.0/UNIGRAM_RESOLUTION;
- else{ // compute backoff
- //set backoff state, shift n-gram, set default bow prob
- bo_state(1); ng.shift();rbow=1.0;
- if (get(ng)){
- ndt= (LMT_TYPE)ng.info; ibow=bow(ng.link,ndt);
- rbow= (double) (isQtable?Bcenters[ng.size][ibow]:(ibow+1.0)/UNIGRAM_RESOLUTION);
- }
- //prepare recursion step
- ng.size--;
- return rbow * prob(ng);
- }
- }
-}
-
-
-void lmtable::stat(int level){
- int totmem=0,memory;
- float mega=1024 * 1024;
-
- cout.precision(2);
-
- cout << "lmtable class statistics\n";
-
- cout << "levels " << maxlev << "\n";
- for (int l=1;l<=maxlev;l++){
- memory=cursize[l] * nodesize(tbltype[l]);
- cout << "lev " << l
- << " entries "<< cursize[l]
- << " used mem " << memory/mega << "Mb\n";
- totmem+=memory;
- }
-
- cout << "total allocated mem " << totmem/mega << "Mb\n";
-
- if (level >1 ) dict->stat();
-
-}
diff --git a/irstlm/src/lmtable.h b/irstlm/src/lmtable.h
deleted file mode 100644
index 9e392bc9a..000000000
--- a/irstlm/src/lmtable.h
+++ /dev/null
@@ -1,245 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-/*
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef MF_LMTABLE_H
-#define MF_LMTABLE_H
-
-#include "ngram.h"
-
-#define LMTMAXLEV 11
-
-#ifndef LMTCODESIZE
-#define LMTCODESIZE (int)3
-#endif
-
-#define SHORTSIZE (int)2
-#define PTRSIZE (int)sizeof(char *)
-#define INTSIZE (int)4
-#define CHARSIZE (int)1
-
-#define PROBSIZE (int)4 //use float
-#define QPROBSIZE (int)1
-#define BOUNDSIZE (int)4
-
-#define UNIGRAM_RESOLUTION 10000000.0
-
-typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE;
-typedef char* node;
-
-typedef enum {LMT_FIND, //!< search: find an entry
- LMT_ENTER, //!< search: enter an entry
- LMT_INIT, //!< scan: start scan
- LMT_CONT //!< scan: continue scan
-} LMT_ACTION;
-
-
-class lmtable{
-
- char* table[LMTMAXLEV]; //storage of all levels
- LMT_TYPE tbltype[LMTMAXLEV]; //table type for each levels
- int cursize[LMTMAXLEV]; //current size of levels
- int maxsize[LMTMAXLEV]; //current size of levels
- int* startpos[LMTMAXLEV]; //support vector to store start positions
-
- int maxlev; //max level of table
- char info[100]; //information put in the header
-
- //probability quantization
- bool isQtable;
-
- int NumCenters[LMTMAXLEV];
- float* Pcenters[LMTMAXLEV];
- float* Bcenters[LMTMAXLEV];
-
- int lmt_oov_code;
- int lmt_oov_size;
- int backoff_state;
-
-
- public:
-
- dictionary *dict; // dictionary
-
- lmtable(std::istream& in);
-
- ~lmtable(){
- for (int i=1;i<=maxlev;i++){
- delete [] table[i];
- if (isQtable){
- delete [] Pcenters[i];
- if (i<maxlev) delete [] Bcenters[i];
- }
- }
- }
-
- void configure(int n,bool quantized){
- maxlev=n;
- if (n==1)
- tbltype[1]=(quantized?QLEAF:LEAF);
- else{
- for (int i=1;i<n;i++) tbltype[i]=(quantized?QINTERNAL:INTERNAL);
- tbltype[n]=(quantized?QLEAF:LEAF);
- }
- };
-
- int maxlevel(){return maxlev;};
-
- void savetxt(const char *filename);
- void savebin(const char *filename);
- void dumplm(std::ostream& out,ngram ng, int ilev, int elev, int ipos,int epos);
-
- void loadtxt(std::istream& in, const char* header);
- void loadbin(std::istream& in, const char* header);
-
- void loadcenters(std::istream& inp,int Order);
-
- double prob(const ngram& ng);
-
- void *search(char *tb,LMT_TYPE ndt,int lev,int n,int sz,int *w,
- LMT_ACTION action,char **found=(char **)NULL);
-
- int mybsearch(char *ar, int n, int size, unsigned char *key, int *idx);
-
- int add(ngram& ng,int prob,int bow);
- void checkbounds(int level);
-
- int get(ngram& ng){return get(ng,ng.size,ng.size);}
- int get(ngram& ng,int n,int lev);
-
- int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev);
- const char *maxsuffptr(ngram ong);
- inline int putmem(char* ptr,int value,int offs,int size){
- assert(ptr!=NULL);
- for (int i=0;i<size;i++)
- ptr[offs+i]=(value >> (8 * i)) & 0xff;
- return value;
- };
-
- inline int getmem(char* ptr,int* value,int offs,int size){
- assert(ptr!=NULL);
- *value=ptr[offs] & 0xff;
- for (int i=1;i<size;i++)
- *value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
- return *value;
- };
-
-
- int bo_state(int value=-1){
- return (value==-1?backoff_state:backoff_state=value);
- };
-
-
- int nodesize(LMT_TYPE ndt){
- switch (ndt){
- case INTERNAL:
- return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE;
- case QINTERNAL:
- return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE;
- case QLEAF:
- return LMTCODESIZE + QPROBSIZE;
- case LEAF:
- return LMTCODESIZE + PROBSIZE;
- default:
- assert(0);
- return 0;
- }
- }
-
- inline int word(node nd,int value=-1)
- {
- int offset=0;
-
- if (value==-1)
- getmem(nd,&value,offset,LMTCODESIZE);
- else
- putmem(nd,value,offset,LMTCODESIZE);
-
- return value;
- };
-
- inline int prob(node nd,LMT_TYPE ndt, int value=-1)
- {
- int offs=LMTCODESIZE;
- int size=(ndt==QINTERNAL || ndt==QLEAF?QPROBSIZE:PROBSIZE);
-
- if (value==-1)
- getmem(nd,&value,offs,size);
- else
- putmem(nd,value,offs,size);
-
- return value;
- };
-
-
- inline int bow(node nd,LMT_TYPE ndt, int value=-1)
- {
- assert(ndt==INTERNAL || ndt==QINTERNAL);
- int size=(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
- int offs=LMTCODESIZE+size;
-
- if (value==-1)
- getmem(nd,&value,offs,size);
- else
- putmem(nd,value,offs,size);
-
- return value;
- };
-
- inline int bound(node nd,LMT_TYPE ndt, int value=-1)
- {
- assert(ndt==INTERNAL || ndt==QINTERNAL);
- int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-
- if (value==-1)
- getmem(nd,&value,offs,BOUNDSIZE);
- else
- putmem(nd,value,offs,BOUNDSIZE);
-
- return value;
- };
-
- void stat(int lev=0);
-
-};
-
-#endif
-
-
-
-
diff --git a/irstlm/src/mempool.cpp b/irstlm/src/mempool.cpp
deleted file mode 100644
index 00a9777d0..000000000
--- a/irstlm/src/mempool.cpp
+++ /dev/null
@@ -1,516 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-// An efficient memory pool manager
-// by M. Federico
-// Copyright Marcello Federico, ITC-irst, 1998
-
-#include <iostream>
-#include <assert.h>
-#include "mempool.h"
-
-using namespace std;
-
-/*! The pool contains:
- - entries of size is
- - tables for bs entries
-*/
-
-mempool::mempool(int is, int bs){
-
- // item size must be multiple of memory alignment step (4 bytes)
- // example: is is=9 becomes i=12 (9 + 4 - 9 %4 )
-
- is=(is>(int)sizeof(char *)?is:0);
-
- is=is + sizeof(char *) - (is % sizeof(char *));
-
- item_size = is;
-
- block_size = bs;
-
- true_size = is * bs;
-
- block_list = new memnode;
-
- block_list->block = new char[true_size];
-
- memset(block_list->block,'0',true_size);
-
- block_list->next = 0;
-
- blocknum = 1;
-
- entries = 0;
-
- // build free list
-
- char *ptr = free_list = block_list->block;
-
- for (int i=0;i<block_size-1;i++) {
- *(char **)ptr= ptr + item_size;
- ptr+=item_size;
- }
- *(char **)ptr = NULL; //last item
-
-}
-
-
-char * mempool::alloc(){
-
- char *ptr;
-
- if (free_list==NULL)
- {
- memnode *new_block = new memnode;
-
- new_block->block = new char[true_size];
-
- memset(new_block->block,'0',true_size);
-
- new_block->next = block_list;
-
- block_list=new_block; // update block list
-
- /* update free list */
-
- ptr = free_list = block_list->block;
-
- for (int i=0;i<block_size-1;i++) {
- *(char **)ptr = ptr + item_size;
- ptr = ptr + item_size;
- }
-
- *(char **)ptr=NULL;
-
- blocknum++;
- }
-
- ptr = free_list;
-
- free_list=*(char **)ptr;
-
- *(char **)ptr=NULL; // reset the released item
-
- entries++;
-
- return ptr;
-
-}
-
-
-int mempool::free(char* addr){
-
- // do not check if it belongs to this pool !!
- /*
- memnode *list=block_list;
- while ((list != NULL) &&
- ((addr < list->block) ||
- (addr >= (list->block + true_size))))
- list=list->next;
-
- if ((list==NULL) || (((addr - list->block) % item_size)!=0))
- {
- //cerr << "mempool::free-> addr does not belong to this pool\n";
- return 0;
- }
- */
-
- *(char **)addr=free_list;
- free_list=addr;
-
- entries--;
-
- return 1;
-}
-
-
-mempool::~mempool()
-{
- memnode *ptr;
-
- while (block_list !=NULL){
- ptr=block_list->next;
- delete [] block_list->block;
- delete block_list;
- block_list=ptr;
- }
-
-}
-
-void mempool::map (ostream& co){
-
- co << "mempool memory map:\n";
- //percorri piu` volte la lista libera
-
- memnode *bl=block_list;
- char *fl=free_list;
-
- char* img=new char[block_size+1];
- img[block_size]='\0';
-
- while (bl !=NULL){
-
- memset(img,'#',block_size);
-
- fl=free_list;
- while (fl != NULL){
- if ((fl >= bl->block)
- &&
- (fl < bl->block + true_size))
- {
- img[(fl-bl->block)/item_size]='-';
- }
-
- fl=*(char **)fl;
- }
-
- co << img << "\n";
- bl=bl->next;
- }
- delete [] img;
-}
-
-void mempool::stat(){
-
- cout << "mempool class statistics\n"
- << "entries " << entries
- << " blocks " << blocknum
- << " used memory " << (blocknum * true_size)/1024 << " Kb\n";
-}
-
-
-
-strstack::strstack(int bs){
-
- size=bs;
- list=new memnode;
-
- list->block=new char[size];
-
- list->next=0;
-
- memset(list->block,'\0',size);
- idx=0;
-
- waste=0;
- memory=size;
- entries=0;
- blocknum=1;
-
-}
-
-
-void strstack::stat(){
-
- cout << "strstack class statistics\n"
- << "entries " << entries
- << " blocks " << blocknum
- << " used memory " << memory/1024 << " Kb\n";
-}
-
-
-char *strstack::push(char *s){
- int len=strlen(s);
-
- if ((len+1) >= size){
- cerr << "strstack::push string is too long\n";
- exit(1);
- };
-
- if ((idx+len+1) >= size){
- //append a new block
- //there must be space to
- //put the index after
- //the word
-
- waste+=size-idx;
- blocknum++;
- memory+=size;
-
- memnode* nd=new memnode;
- nd->block=new char[size];
- nd->next=list;
-
- list=nd;
-
- memset(list->block,'\0',size);
-
- idx=0;
-
- }
-
- // append in current block
-
- strcpy(&list->block[idx],s);
-
- idx+=len+1;
-
- entries++;
-
- return &list->block[idx-len-1];
-
-}
-
-
-char *strstack::pop(){
-
- if (list==0) return 0;
-
- if (idx==0){
-
- // free this block and go to next
-
- memnode *ptr=list->next;
-
- delete [] list->block;
- delete list;
-
- list=ptr;
-
- if (list==0)
- return 0;
- else
- idx=size-1;
- }
-
- //go back to first non \0
- while (idx>0)
- if (list->block[idx--]!='\0')
- break;
-
- //go back to first \0
- while (idx>0)
- if (list->block[idx--]=='\0')
- break;
-
- entries--;
-
- if (list->block[idx+1]=='\0')
- {
- idx+=2;
- memset(&list->block[idx],'\0',size-idx);
- return &list->block[idx];
- }
- else{
- idx=0;
- memset(&list->block[idx],'\0',size);
- return &list->block[0];
- }
-}
-
-
-char *strstack::top(){
-
- int tidx=idx;
- memnode *tlist=list;
-
- if (tlist==0) return 0;
-
- if (idx==0){
-
- tlist=tlist->next;
-
- if (tlist==0) return 0;
-
- tidx=size-1;
- }
-
- //go back to first non \0
- while (tidx>0)
- if (tlist->block[tidx--]!='\0')
- break;
-
- //aaa\0bbb\0\0\0\0
-
- //go back to first \0
- while (tidx>0)
- if (tlist->block[tidx--]=='\0')
- break;
-
- if (tlist->block[tidx+1]=='\0')
- {
- tidx+=2;
- return &tlist->block[tidx];
- }
- else{
- tidx=0;
- return &tlist->block[0];
- }
-
-}
-
-
-strstack::~strstack(){
- memnode *ptr;
- while (list !=NULL){
- ptr=list->next;
- delete [] list->block;
- delete list;
- list=ptr;
- }
-}
-
-
-storage::storage(int maxsize,int blocksize)
-{
- newmemory=0;
- newcalls=0;
- setsize=maxsize;
- poolsize=blocksize; //in bytes
- poolset=new mempool* [setsize+1];
- for (int i=0;i<=setsize;i++)
- poolset[i]=NULL;
-}
-
-
-storage::~storage(){
- for (int i=0;i<=setsize;i++)
- if (poolset[i])
- delete poolset[i];
- delete [] poolset;
-}
-
-
-char *storage::alloc(int size){
-
- if (size<=setsize){
- if (!poolset[size]){
- poolset[size]=new mempool(size,poolsize/size);
- }
- return poolset[size]->alloc();
- }
- else{
-
- newmemory+=size+8;
- newcalls++;
- char* p=(char *)calloc(sizeof(char),size);
- if (p==NULL){
- cerr << "storage::alloc insufficient memory\n";
- exit(1);
- }
- return p;
- }
-}
-
-
-
-char *storage::realloc(char *oldptr,int oldsize,int newsize){
-
- char *newptr;
-
- assert(newsize>oldsize);
-
- if (oldsize<=setsize){
- if (newsize<=setsize){
- if (!poolset[newsize])
- poolset[newsize]=new mempool(newsize,poolsize/newsize);
- newptr=poolset[newsize]->alloc();
- memset((char*)newptr,0,newsize);
- }
- else
- newptr=(char *)calloc(sizeof(char),newsize);
-
- if (oldptr && oldsize){
- memcpy(newptr,oldptr,oldsize);
- poolset[oldsize]->free(oldptr);
- }
- }
- else{
- newptr=(char *)std::realloc(oldptr,newsize);
- if (newptr==oldptr)
- cerr << "r\b";
- else
- cerr << "a\b";
- }
- if (newptr==NULL){
- cerr << "storage::realloc insufficient memory\n";
- exit(1);
- }
-
- return newptr;
-
-}
-
-
-int storage::free(char *addr,int size){
-
- /*
- while(size<=setsize){
- if (poolset[size] && poolset[size]->free(addr))
- break;
- size++;
- }
- */
-
- if (size>setsize)
- return free(addr),1;
- else{
- poolset[size] && poolset[size]->free(addr);
- }
- return 1;
-}
-
-void storage::stat(){
- int used=0;
- int memory=sizeof(char *) * setsize;
- int waste=0;
-
- for (int i=0;i<=setsize;i++)
- if (poolset[i]){
- used++;
- memory+=poolset[i]->used();
- waste+=poolset[i]->wasted();
- }
-
- cout << "storage class statistics\n";
- cout << "alloc entries " << newcalls
- << " used memory " << newmemory/1024 << "Kb\n";
- cout << "mpools " << setsize
- << " active " << used
- << " used memory " << memory/1024 << "Kb"
- << " wasted " << waste/1024 << "Kb\n";
-}
-
-/*
-main(){
-
- mempool* mp=new mempool(sizeof(int),80);
-
- int** ar= new (int*) [ 1000 ];
-
- for (int i=0;i<1000;i++){
- ar[i]= (int *)mp->alloc();
- }
-
- mp->map(cout);
-
- for (int i=0;i<500;i++){
- mp->free(ar[i]);
- }
-
- mp->map(cout);
-
-}
-
-*/
-
-
-
diff --git a/irstlm/src/mempool.h b/irstlm/src/mempool.h
deleted file mode 100644
index 20dc4d59f..000000000
--- a/irstlm/src/mempool.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-// An efficient memory manager
-// by M. Federico
-// Copyright Marcello Federico, ITC-irst, 1998
-
-#ifndef MF_MEMPOOL_H
-#define MF_MEMPOOL_H
-
-#ifndef NULL
-const int NULL=0;
-//#define NULL=0;
-#endif
-
-#include <iostream> // std::ostream
-
-//! Memory block
-/*! This can be used by:
-- mempool to store items of fixed size
-- strstack to store strings of variable size
-*/
-
-class memnode{
- friend class mempool; //!< grant access
- friend class strstack; //!< grant access
- char *block; //!< block of memory
- memnode *next; //!< next block ptr
-};
-
-
-//! Memory pool
-
-/*! A memory pool is composed of:
- - a linked list of block_num memory blocks
- - each block might contain up to block_size items
- - each item is made of exactly item_size bytes
-*/
-
-class mempool{
- int block_size; //!< number of entries per block
- int item_size; //!< number of bytes per entry
- int true_size; //!< number of bytes per block
- memnode* block_list; //!< list of blocks
- char* free_list; //!< free entry list
- int entries; //!< number of stored entries
- int blocknum; //!< number of allocated blocks
- public:
-
- //! Creates a memory pool
- mempool(int is, int bs);
-
- //! Destroys memory pool
- ~mempool();
-
- //! Prints a map of memory occupancy
- void map(std::ostream& co);
-
- //! Allocates a single memory entry
- char *alloc();
-
- //! Frees a single memory entry
- int free(char* addr);
-
- //! Prints statistics about this mempool
- void stat();
-
- //! Returns effectively used memory (bytes)
- /*! includes 8 bytes required by each call of new */
-
- int used(){return blocknum * (true_size + 8);};
-
- //! Returns amount of wasted memory (bytes)
- int wasted(){return used()-(entries * item_size);};
-};
-
-//! A stack to store strings
-
-/*!
- The stack is composed of
- - a list of blocks memnode of fixed size
- - attribute blocknum tells the block on top
- - attribute idx tells position of the top string
-*/
-
-class strstack{
- memnode* list; //!< list of memory blocks
- int size; //!< size of each block
- int idx; //!< index of last stored string
- int waste; //!< current waste of memory
- int memory; //!< current use of memory
- int entries; //!< current number of stored strings
- int blocknum; //!< current number of used blocks
-
- public:
-
- strstack(int bs=1000);
-
- ~strstack();
-
- char *push(char *s);
-
- char *pop();
-
- char *top();
-
- void stat();
-
- int used(){return memory;};
-
- int wasted(){return waste;};
-
-};
-
-
-//! Manages multiple memory pools
-
-/*!
- This class permits to manage memory pools
- with items up to a specified size.
- - items within the allowed range are stored in memory pools
- - items larger than the limit are allocated with new
-*/
-
-
-class storage{
- mempool **poolset; //!< array of memory pools
- int setsize; //!< number of memory pools/maximum elem size
- int poolsize; //!< size of each block
- int newmemory; //!< stores amount of used memory
- int newcalls; //!< stores number of allocated blocks
- public:
-
- //! Creates storage
- storage(int maxsize,int blocksize);
-
- //! Destroys storage
- ~storage();
-
- //! Allocates memory
- char *alloc(int size);
-
- //! Realloc memory
- char *realloc(char *oldptr,int oldsize,int newsize);
-
- //! Frees memory of an entry
- int free(char *addr,int size=0);
-
- //! Prints statistics about storage
- void stat();
-};
-
-
-#endif
-
-
-
-
-
-
-
-
-
-
diff --git a/irstlm/src/ngram.cpp b/irstlm/src/ngram.cpp
deleted file mode 100644
index 70a1a7c97..000000000
--- a/irstlm/src/ngram.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-#include <iomanip>
-#include <assert.h>
-#include "mempool.h"
-#include "htable.h"
-#include "dictionary.h"
-#include "ngram.h"
-#include "index.h"
-
-using namespace std;
-
-ngram::ngram(dictionary* d,int sz){
- dict=d;
- size=sz;
- succ=0;
- freq=0;
- info=0;
- pinfo=0;
- link=NULL;
- isym=-1;
- memset(word,0,sizeof(int)*MAX_NGRAM);
- memset(midx,0,sizeof(int)*MAX_NGRAM);
-}
-
-ngram::ngram(ngram& ng){
- size=ng.size;
- freq=ng.freq;
- succ=0;
- info=0;
- pinfo=0;
- link=NULL;
- isym=-1;
- dict=ng.dict;
- memcpy(word,ng.word,sizeof(int)*MAX_NGRAM);
- memcpy(midx,ng.word,sizeof(int)*MAX_NGRAM);
-
-}
-
-void ngram::trans (const ngram& ng){
- size=ng.size;
- freq=ng.freq;
- if (dict == ng.dict){
- info=ng.info;
- isym=ng.isym;
- memcpy(word,ng.word,sizeof(int)*MAX_NGRAM);
- memcpy(midx,ng.midx,sizeof(int)*MAX_NGRAM);
- }
- else{
- info=0;
- memset(midx,0,sizeof(int)*MAX_NGRAM);
- isym=-1;
- for (int i=1;i<=size;i++)
- word[MAX_NGRAM-i]=dict->encode(ng.dict->decode(*ng.wordp(i)));
- }
-}
-
-
-ifstream& operator>> ( ifstream& fi , ngram& ng){
- char w[MAX_WORD];
- memset(w,0,MAX_WORD);
- w[0]='\0';
-
- if (!(fi >> setw(MAX_WORD) >> w))
- return fi;
-
- if (strlen(w)==(MAX_WORD-1))
- cerr << "ngram: a too long word was read ("
- << w << ")\n";
-
- if (ng.dict->intsymb() &&
- (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){
-
- ng.isym=(long)index(ng.dict->intsymb(),w[0]) -
- (long)ng.dict->intsymb();
- ng.size=0;
- return fi;
- }
-
- int c=ng.dict->encode(w);
-
- if (c == -1 ){
- cerr << "ngram: " << w << " is OOV \n";
- exit(1);
- }
-
- memcpy(ng.word,ng.word+1,(MAX_NGRAM-1)*sizeof(int));
-
- ng.word[MAX_NGRAM-1]=(int)c;
- ng.freq=1;
-
- if (ng.size<MAX_NGRAM) ng.size++;
-
- return fi;
-
-}
-
-
-int ngram::pushw(char* w){
-
- assert(dict!=NULL);
-
- int c=dict->encode(w);
-
- if (c == -1 ){
- cerr << "ngram: " << w << " is OOV \n";
- exit(1);
- }
-
- pushc(c);
-
- return 1;
-
-}
-
-int ngram::pushc(int c){
-
- int buff[MAX_NGRAM-1];
- memcpy(buff,word+1,(MAX_NGRAM-1)*sizeof(int));
- memcpy(word,buff,(MAX_NGRAM-1)*sizeof(int));
-
- word[MAX_NGRAM-1]=(int)c;
- if (size<MAX_NGRAM) size++;
-
- return 1;
-
-}
-
-
-istream& operator>> ( istream& fi , ngram& ng){
- char w[MAX_WORD];
- memset(w,0,MAX_WORD);
- w[0]='\0';
-
- assert(ng.dict != NULL);
-
- if (!(fi >> setw(MAX_WORD) >> w))
- return fi;
-
- if (strlen(w)==(MAX_WORD-1))
- cerr << "ngram: a too long word was read ("
- << w << ")\n";
-
- if (ng.dict->intsymb() &&
- (strlen(w)==1) && (index(ng.dict->intsymb(),w[0])!=NULL)){
- ng.isym=(long)index(ng.dict->intsymb(),w[0])-(long)ng.dict->intsymb();
- ng.size=0;
- return fi;
- }
-
- ng.pushw(w);
-
- ng.freq=1;
-
- return fi;
-
-}
-
-ofstream& operator<< (ofstream& fo,ngram& ng){
-
- assert(ng.dict != NULL);
-
- for (int i=ng.size;i>0;i--)
- fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " ";
- //fo << "[size " << ng.size << " freq " << ng.freq << "]";
- fo << ng.freq;
- return fo;
-}
-
-ostream& operator<< (ostream& fo,ngram& ng){
-
- assert(ng.dict != NULL);
-
- for (int i=ng.size;i>0;i--)
- fo << ng.dict->decode(ng.word[MAX_NGRAM-i]) << " ";
- //fo << "[size " << ng.size << " freq " << ng.freq << "]";
- fo << ng.freq;
-
- return fo;
-}
-
-/*
-main(int argc, char** argv){
- dictionary d(argv[1]);
- ifstream txt(argv[1]);
- ngram ng(&d);
-
- while (txt >> ng){
- cout << ng << "\n";
- }
-
- ngram ng2=ng;
- cerr << "copia l'ultimo =" << ng << "\n";
-}
-*/
-
diff --git a/irstlm/src/ngram.h b/irstlm/src/ngram.h
deleted file mode 100644
index 12a885be0..000000000
--- a/irstlm/src/ngram.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/******************************************************************************
- IrstLM: IRST Language Model Toolkit
- Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
-
-// n-gram tables
-// by M. Federico
-// Copyright Marcello Federico, ITC-irst, 1998
-
-#ifndef MF_NGRAM_H
-#define MF_NGRAM_H
-
-#include <fstream>
-#include "dictionary.h"
-
-#ifdef MYMAXNGRAM
-#define MAX_NGRAM MYMAXNGRAM
-#else
-#define MAX_NGRAM 20
-#endif
-
-class dictionary;
-
-//typedef int code;
-
-class ngram{
- int word[MAX_NGRAM]; //encoded ngram
- public:
- dictionary *dict; //dictionary
- char* link; // ngram-tree pointer
- int midx[MAX_NGRAM]; // ngram-tree scan pointer
- int lev; // ngram-tree level
- int size; // ngram size
- int freq; // ngram frequency
- int succ; // number of successors
-
- unsigned char info; // ngram-tree info flags
- unsigned char pinfo; // ngram-tree parent info flags
- int isym; // last interruption symbol
-
- ngram(dictionary* d,int sz=0);
- ngram(ngram& ng);
-
- int *wordp()// n-gram pointer
- {return wordp(size);};
- int *wordp(int k) // n-gram pointer
- {return size>=k?&word[MAX_NGRAM-k]:0;};
- const int *wordp() const // n-gram pointer
- {return wordp(size);};
- const int *wordp(int k) const // n-gram pointer
- {return size>=k?&word[MAX_NGRAM-k]:0;};
-
- int shift(){
- for (int i=(MAX_NGRAM-1);i>0;i--){
- word[i]=word[i-1];
- }
- size--;
- return 1;
- }
-
-
- int containsWord(char* s,int lev){
-
- int c=dict->encode(s);
- if (c == -1) return 0;
-
- assert(lev <= size);
- for (int i=0;i<lev;i++){
- if (*wordp(size-i)== c) return 1;
- }
- return 0;
- }
-
-
- void trans(const ngram& ng);
-
- friend std::ifstream& operator>> (std::ifstream& fi,ngram& ng);
- friend std::ofstream& operator<< (std::ofstream& fi,ngram& ng);
- friend std::istream& operator>> (std::istream& fi,ngram& ng);
- friend std::ostream& operator<< (std::ostream& fi,ngram& ng);
-
- inline int ckhisto(int sz){
-
- for (int i=sz;i>1;i--)
- if (*wordp(i)==dict->oovcode())
- return 0;
- return 1;
- }
-
- int pushc(int c);
- int pushw(char* w);
-
- //~ngram();
-
-
-
-};
-
-#endif
-
-
-